#
# RPM spec file (make rpm-pkg)
#
-/*.spec
+/kernel.spec
/rpmbuild/
#
4.2 .BTF.ext section
--------------------
-The .BTF.ext section encodes func_info and line_info which needs loader
-manipulation before loading into the kernel.
+The .BTF.ext section encodes func_info, line_info and CO-RE relocations
+which needs loader manipulation before loading into the kernel.
The specification for .BTF.ext section is defined at ``tools/lib/bpf/btf.h``
and ``tools/lib/bpf/btf.c``.
__u32 func_info_len;
__u32 line_info_off;
__u32 line_info_len;
+
+ /* optional part of .BTF.ext header */
+ __u32 core_relo_off;
+ __u32 core_relo_len;
};
It is very similar to .BTF section. Instead of type/string section, it
-contains func_info and line_info section. See :ref:`BPF_Prog_Load` for details
-about func_info and line_info record format.
+contains func_info, line_info and core_relo sub-sections.
+See :ref:`BPF_Prog_Load` for details about func_info and line_info
+record format.
The func_info is organized as below.::
- func_info_rec_size
+ func_info_rec_size /* __u32 value */
btf_ext_info_sec for section #1 /* func_info for section #1 */
btf_ext_info_sec for section #2 /* func_info for section #2 */
...
The line_info is organized as below.::
- line_info_rec_size
+ line_info_rec_size /* __u32 value */
btf_ext_info_sec for section #1 /* line_info for section #1 */
btf_ext_info_sec for section #2 /* line_info for section #2 */
...
bpf_insn``. For ELF API, the ``insn_off`` is the byte offset from the
beginning of section (``btf_ext_info_sec->sec_name_off``).
+The core_relo is organized as below.::
+
+ core_relo_rec_size /* __u32 value */
+ btf_ext_info_sec for section #1 /* core_relo for section #1 */
+ btf_ext_info_sec for section #2 /* core_relo for section #2 */
+
+``core_relo_rec_size`` specifies the size of ``bpf_core_relo``
+structure when .BTF.ext is generated. All ``bpf_core_relo`` structures
+within a single ``btf_ext_info_sec`` describe relocations applied to
+section named by ``btf_ext_info_sec->sec_name_off``.
+
+See :ref:`Documentation/bpf/llvm_reloc.rst <btf-co-re-relocations>`
+for more information on CO-RE relocations.
+
4.2 .BTF_ids section
--------------------
bpf_licensing
test_debug
clang-notes
+ linux-notes
other
redirect
Offset Info Type Symbol's Value Symbol's Name
000000000000002c 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
0000000000000040 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text
+
+.. _btf-co-re-relocations:
+
+=================
+CO-RE Relocations
+=================
+
+From object file point of view CO-RE mechanism is implemented as a set
+of CO-RE specific relocation records. These relocation records are not
+related to ELF relocations and are encoded in .BTF.ext section.
+See :ref:`Documentation/bpf/btf.rst <BTF_Ext_Section>` for more
+information on .BTF.ext structure.
+
+CO-RE relocations are applied to BPF instructions to update immediate
+or offset fields of the instruction at load time with information
+relevant for target kernel.
+
+Field to patch is selected basing on the instruction class:
+
+* For BPF_ALU, BPF_ALU64, BPF_LD `immediate` field is patched;
+* For BPF_LDX, BPF_STX, BPF_ST `offset` field is patched;
+* BPF_JMP, BPF_JMP32 instructions **should not** be patched.
+
+Relocation kinds
+================
+
+There are several kinds of CO-RE relocations that could be split in
+three groups:
+
+* Field-based - patch instruction with field related information, e.g.
+ change offset field of the BPF_LDX instruction to reflect offset
+ of a specific structure field in the target kernel.
+
+* Type-based - patch instruction with type related information, e.g.
+ change immediate field of the BPF_ALU move instruction to 0 or 1 to
+ reflect if specific type is present in the target kernel.
+
+* Enum-based - patch instruction with enum related information, e.g.
+ change immediate field of the BPF_LD_IMM64 instruction to reflect
+ value of a specific enum literal in the target kernel.
+
+The complete list of relocation kinds is represented by the following enum:
+
+.. code-block:: c
+
+ enum bpf_core_relo_kind {
+ BPF_CORE_FIELD_BYTE_OFFSET = 0, /* field byte offset */
+ BPF_CORE_FIELD_BYTE_SIZE = 1, /* field size in bytes */
+ BPF_CORE_FIELD_EXISTS = 2, /* field existence in target kernel */
+ BPF_CORE_FIELD_SIGNED = 3, /* field signedness (0 - unsigned, 1 - signed) */
+ BPF_CORE_FIELD_LSHIFT_U64 = 4, /* bitfield-specific left bitshift */
+ BPF_CORE_FIELD_RSHIFT_U64 = 5, /* bitfield-specific right bitshift */
+ BPF_CORE_TYPE_ID_LOCAL = 6, /* type ID in local BPF object */
+ BPF_CORE_TYPE_ID_TARGET = 7, /* type ID in target kernel */
+ BPF_CORE_TYPE_EXISTS = 8, /* type existence in target kernel */
+ BPF_CORE_TYPE_SIZE = 9, /* type size in bytes */
+ BPF_CORE_ENUMVAL_EXISTS = 10, /* enum value existence in target kernel */
+ BPF_CORE_ENUMVAL_VALUE = 11, /* enum value integer value */
+ BPF_CORE_TYPE_MATCHES = 12, /* type match in target kernel */
+ };
+
+Notes:
+
+* ``BPF_CORE_FIELD_LSHIFT_U64`` and ``BPF_CORE_FIELD_RSHIFT_U64`` are
+ supposed to be used to read bitfield values using the following
+ algorithm:
+
+ .. code-block:: c
+
+ // To read bitfield ``f`` from ``struct s``
+ is_signed = relo(s->f, BPF_CORE_FIELD_SIGNED)
+ off = relo(s->f, BPF_CORE_FIELD_BYTE_OFFSET)
+ sz = relo(s->f, BPF_CORE_FIELD_BYTE_SIZE)
+ l = relo(s->f, BPF_CORE_FIELD_LSHIFT_U64)
+ r = relo(s->f, BPF_CORE_FIELD_RSHIFT_U64)
+ // define ``v`` as signed or unsigned integer of size ``sz``
+ v = *({s|u}<sz> *)((void *)s + off)
+ v <<= l
+ v >>= r
+
+* The ``BPF_CORE_TYPE_MATCHES`` queries matching relation, defined as
+ follows:
+
+ * for integers: types match if size and signedness match;
+ * for arrays & pointers: target types are recursively matched;
+ * for structs & unions:
+
+ * local members need to exist in target with the same name;
+
+ * for each member we recursively check match unless it is already behind a
+ pointer, in which case we only check matching names and compatible kind;
+
+ * for enums:
+
+ * local variants have to have a match in target by symbolic name (but not
+ numeric value);
+
+ * size has to match (but enum may match enum64 and vice versa);
+
+ * for function pointers:
+
+ * number and position of arguments in local type has to match target;
+ * for each argument and the return value we recursively check match.
+
+CO-RE Relocation Record
+=======================
+
+Relocation record is encoded as the following structure:
+
+.. code-block:: c
+
+ struct bpf_core_relo {
+ __u32 insn_off;
+ __u32 type_id;
+ __u32 access_str_off;
+ enum bpf_core_relo_kind kind;
+ };
+
+* ``insn_off`` - instruction offset (in bytes) within a code section
+ associated with this relocation;
+
+* ``type_id`` - BTF type ID of the "root" (containing) entity of a
+ relocatable type or field;
+
+* ``access_str_off`` - offset into corresponding .BTF string section.
+ String interpretation depends on specific relocation kind:
+
+ * for field-based relocations, string encodes an accessed field using
+ a sequence of field and array indices, separated by colon (:). It's
+ conceptually very close to LLVM's `getelementptr <GEP_>`_ instruction's
+ arguments for identifying offset to a field. For example, consider the
+ following C code:
+
+ .. code-block:: c
+
+ struct sample {
+ int a;
+ int b;
+ struct { int c[10]; };
+ } __attribute__((preserve_access_index));
+ struct sample *s;
+
+ * Access to ``s[0].a`` would be encoded as ``0:0``:
+
+ * ``0``: first element of ``s`` (as if ``s`` is an array);
+ * ``0``: index of field ``a`` in ``struct sample``.
+
+ * Access to ``s->a`` would be encoded as ``0:0`` as well.
+ * Access to ``s->b`` would be encoded as ``0:1``:
+
+ * ``0``: first element of ``s``;
+ * ``1``: index of field ``b`` in ``struct sample``.
+
+ * Access to ``s[1].c[5]`` would be encoded as ``1:2:0:5``:
+
+ * ``1``: second element of ``s``;
+ * ``2``: index of anonymous structure field in ``struct sample``;
+ * ``0``: index of field ``c`` in anonymous structure;
+ * ``5``: access to array element #5.
+
+ * for type-based relocations, string is expected to be just "0";
+
+ * for enum value-based relocations, string contains an index of enum
+ value within its enum type;
+
+* ``kind`` - one of ``enum bpf_core_relo_kind``.
+
+.. _GEP: https://llvm.org/docs/LangRef.html#getelementptr-instruction
+
+.. _btf_co_re_relocation_examples:
+
+CO-RE Relocation Examples
+=========================
+
+For the following C code:
+
+.. code-block:: c
+
+ struct foo {
+ int a;
+ int b;
+ unsigned c:15;
+ } __attribute__((preserve_access_index));
+
+ enum bar { U, V };
+
+With the following BTF definitions:
+
+.. code-block::
+
+ ...
+ [2] STRUCT 'foo' size=8 vlen=2
+ 'a' type_id=3 bits_offset=0
+ 'b' type_id=3 bits_offset=32
+ 'c' type_id=4 bits_offset=64 bitfield_size=15
+ [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+ [4] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+ ...
+ [16] ENUM 'bar' encoding=UNSIGNED size=4 vlen=2
+ 'U' val=0
+ 'V' val=1
+
+Field offset relocations are generated automatically when
+``__attribute__((preserve_access_index))`` is used, for example:
+
+.. code-block:: c
+
+ void alpha(struct foo *s, volatile unsigned long *g) {
+ *g = s->a;
+ s->a = 1;
+ }
+
+ 00 <alpha>:
+ 0: r3 = *(s32 *)(r1 + 0x0)
+ 00: CO-RE <byte_off> [2] struct foo::a (0:0)
+ 1: *(u64 *)(r2 + 0x0) = r3
+ 2: *(u32 *)(r1 + 0x0) = 0x1
+ 10: CO-RE <byte_off> [2] struct foo::a (0:0)
+ 3: exit
+
+
+All relocation kinds could be requested via built-in functions.
+E.g. field-based relocations:
+
+.. code-block:: c
+
+ void bravo(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_field_info(s->b, 0 /* field byte offset */);
+ *g = __builtin_preserve_field_info(s->b, 1 /* field byte size */);
+ *g = __builtin_preserve_field_info(s->b, 2 /* field existence */);
+ *g = __builtin_preserve_field_info(s->b, 3 /* field signedness */);
+ *g = __builtin_preserve_field_info(s->c, 4 /* bitfield left shift */);
+ *g = __builtin_preserve_field_info(s->c, 5 /* bitfield right shift */);
+ }
+
+ 20 <bravo>:
+ 4: r1 = 0x4
+ 20: CO-RE <byte_off> [2] struct foo::b (0:1)
+ 5: *(u64 *)(r2 + 0x0) = r1
+ 6: r1 = 0x4
+ 30: CO-RE <byte_sz> [2] struct foo::b (0:1)
+ 7: *(u64 *)(r2 + 0x0) = r1
+ 8: r1 = 0x1
+ 40: CO-RE <field_exists> [2] struct foo::b (0:1)
+ 9: *(u64 *)(r2 + 0x0) = r1
+ 10: r1 = 0x1
+ 50: CO-RE <signed> [2] struct foo::b (0:1)
+ 11: *(u64 *)(r2 + 0x0) = r1
+ 12: r1 = 0x31
+ 60: CO-RE <lshift_u64> [2] struct foo::c (0:2)
+ 13: *(u64 *)(r2 + 0x0) = r1
+ 14: r1 = 0x31
+ 70: CO-RE <rshift_u64> [2] struct foo::c (0:2)
+ 15: *(u64 *)(r2 + 0x0) = r1
+ 16: exit
+
+
+Type-based relocations:
+
+.. code-block:: c
+
+ void charlie(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_type_info(*s, 0 /* type existence */);
+ *g = __builtin_preserve_type_info(*s, 1 /* type size */);
+ *g = __builtin_preserve_type_info(*s, 2 /* type matches */);
+ *g = __builtin_btf_type_id(*s, 0 /* type id in this object file */);
+ *g = __builtin_btf_type_id(*s, 1 /* type id in target kernel */);
+ }
+
+ 88 <charlie>:
+ 17: r1 = 0x1
+ 88: CO-RE <type_exists> [2] struct foo
+ 18: *(u64 *)(r2 + 0x0) = r1
+ 19: r1 = 0xc
+ 98: CO-RE <type_size> [2] struct foo
+ 20: *(u64 *)(r2 + 0x0) = r1
+ 21: r1 = 0x1
+ a8: CO-RE <type_matches> [2] struct foo
+ 22: *(u64 *)(r2 + 0x0) = r1
+ 23: r1 = 0x2 ll
+ b8: CO-RE <local_type_id> [2] struct foo
+ 25: *(u64 *)(r2 + 0x0) = r1
+ 26: r1 = 0x2 ll
+ d0: CO-RE <target_type_id> [2] struct foo
+ 28: *(u64 *)(r2 + 0x0) = r1
+ 29: exit
+
+Enum-based relocations:
+
+.. code-block:: c
+
+ void delta(struct foo *s, volatile unsigned long *g) {
+ *g = __builtin_preserve_enum_value(*(enum bar *)U, 0 /* enum literal existence */);
+ *g = __builtin_preserve_enum_value(*(enum bar *)V, 1 /* enum literal value */);
+ }
+
+ f0 <delta>:
+ 30: r1 = 0x1 ll
+ f0: CO-RE <enumval_exists> [16] enum bar::U = 0
+ 32: *(u64 *)(r2 + 0x0) = r1
+ 33: r1 = 0x1 ll
+ 108: CO-RE <enumval_value> [16] enum bar::V = 1
+ 35: *(u64 *)(r2 + 0x0) = r1
+ 36: exit
--- /dev/null
+.. contents::
+.. sectnum::
+
+===================================================
+BPF ABI Recommended Conventions and Guidelines v1.0
+===================================================
+
+This is version 1.0 of an informational document containing recommended
+conventions and guidelines for producing portable BPF program binaries.
+
+Registers and calling convention
+================================
+
+BPF has 10 general purpose registers and a read-only frame pointer register,
+all of which are 64-bits wide.
+
+The BPF calling convention is defined as:
+
+* R0: return value from function calls, and exit value for BPF programs
+* R1 - R5: arguments for function calls
+* R6 - R9: callee saved registers that function calls will preserve
+* R10: read-only frame pointer to access stack
+
+R0 - R5 are scratch registers and BPF programs needs to spill/fill them if
+necessary across calls.
:maxdepth: 1
instruction-set
- linux-notes
+ abi
.. Links:
.. _IETF BPF Working Group: https://datatracker.ietf.org/wg/bpf/about/
.. contents::
.. sectnum::
-========================================
-eBPF Instruction Set Specification, v1.0
-========================================
+=======================================
+BPF Instruction Set Specification, v1.0
+=======================================
-This document specifies version 1.0 of the eBPF instruction set.
+This document specifies version 1.0 of the BPF instruction set.
Documentation conventions
=========================
A: 10000110
B: 11111111 10000110
-Registers and calling convention
-================================
-
-eBPF has 10 general purpose registers and a read-only frame pointer register,
-all of which are 64-bits wide.
-
-The eBPF calling convention is defined as:
-
-* R0: return value from function calls, and exit value for eBPF programs
-* R1 - R5: arguments for function calls
-* R6 - R9: callee saved registers that function calls will preserve
-* R10: read-only frame pointer to access stack
-
-R0 - R5 are scratch registers and eBPF programs needs to spill/fill them if
-necessary across calls.
-
Instruction encoding
====================
-eBPF has two instruction encodings:
+BPF has two instruction encodings:
* the basic instruction encoding, which uses 64 bits to encode an instruction
* the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
========= ===== ======= ==========================================================
Underflow and overflow are allowed during arithmetic operations, meaning
-the 64-bit or 32-bit value will wrap. If eBPF program execution would
+the 64-bit or 32-bit value will wrap. If BPF program execution would
result in division by zero, the destination register is instead set to zero.
If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
the destination register is unchanged whereas for ``BPF_ALU`` the upper
BPF_JSGT 0x6 any PC += offset if dst > src signed
BPF_JSGE 0x7 any PC += offset if dst >= src signed
BPF_CALL 0x8 0x0 call helper function by address see `Helper functions`_
-BPF_CALL 0x8 0x1 call PC += offset see `Program-local functions`_
+BPF_CALL 0x8 0x1 call PC += imm see `Program-local functions`_
BPF_CALL 0x8 0x2 call helper function by BTF ID see `Helper functions`_
BPF_EXIT 0x9 0x0 return BPF_JMP only
BPF_JLT 0xa any PC += offset if dst < src unsigned
BPF_JSLE 0xd any PC += offset if dst <= src signed
======== ===== === =========================================== =========================================
-The eBPF program needs to store the return value into register R0 before doing a
+The BPF program needs to store the return value into register R0 before doing a
``BPF_EXIT``.
Example:
~~~~~~~~~~~~~~~~~~~~~~~
Program-local functions are functions exposed by the same BPF program as the
caller, and are referenced by offset from the call instruction, similar to
-``BPF_JA``. A ``BPF_EXIT`` within the program-local function will return to
-the caller.
+``BPF_JA``. The offset is encoded in the imm field of the call instruction.
+A ``BPF_EXIT`` within the program-local function will return to the caller.
Load and store instructions
===========================
Atomic operations are operations that operate on memory and can not be
interrupted or corrupted by other access to the same memory region
-by other eBPF programs or means outside of this specification.
+by other BPF programs or means outside of this specification.
-All atomic operations supported by eBPF are encoded as store operations
+All atomic operations supported by BPF are encoded as store operations
that use the ``BPF_ATOMIC`` mode modifier as follows:
* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
Maps
~~~~
-Maps are shared memory regions accessible by eBPF programs on some platforms.
+Maps are shared memory regions accessible by BPF programs on some platforms.
A map can have various semantics as defined in a separate document, and may or
may not have a single contiguous memory region, but the 'map_val(map)' is
currently only defined for maps that do have a single contiguous memory region.
Legacy BPF Packet access instructions
-------------------------------------
-eBPF previously introduced special instructions for access to packet data that were
+BPF previously introduced special instructions for access to packet data that were
carried over from classic BPF. However, these instructions are
deprecated and should no longer be used.
Architectures
~~~~~~~~~~~~~
-Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, and
-xtensa, and the tag-based KASAN modes are supported only on arm64.
+Generic KASAN is supported on x86_64, arm, arm64, powerpc, riscv, s390, xtensa,
+and loongarch, and the tag-based KASAN modes are supported only on arm64.
Compilers
~~~~~~~~~
port {
ov7251_ep: endpoint {
data-lanes = <0 1>;
+ link-frequencies = /bits/ 64 <240000000 319200000>;
remote-endpoint = <&csiphy3_ep>;
};
};
minimum: 0x1
maximum: 0xff
description: |
- Dynamic address to be assigned to this device. This property is only
- valid if the I3C device has a static address (first cell of the reg
- property != 0).
+ Dynamic address to be assigned to this device. In case static address is
+ present (first cell of the reg property != 0), this address is assigned
+ through SETDASA. If static address is not present, this address is assigned
+ through SETNEWDA after assigning a temporary address via ENTDAA.
required:
- reg
pagesize = <0x8>;
};
- /* I3C device with a static I2C address. */
+ /* I3C device with a static I2C address and assigned address. */
thermal_sensor: sensor@68,39200144004 {
reg = <0x68 0x392 0x144004>;
assigned-address = <0xa>;
};
+ /* I3C device with only assigned address. */
+ pressure_sensor: sensor@0,39200124004 {
+ reg = <0x0 0x392 0x124000>;
+ assigned-address = <0xc>;
+ };
+
/*
* I3C device without a static I2C address but requiring
* resources described in the DT.
$id: http://devicetree.org/schemas/input/azoteq,iqs7222.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Azoteq IQS7222A/B/C Capacitive Touch Controller
+title: Azoteq IQS7222A/B/C/D Capacitive Touch Controller
maintainers:
- Jeff LaBundy <jeff@labundy.com>
description: |
- The Azoteq IQS7222A, IQS7222B and IQS7222C are multichannel capacitive touch
- controllers that feature additional sensing capabilities.
+ The Azoteq IQS7222A, IQS7222B, IQS7222C and IQS7222D are multichannel
+ capacitive touch controllers that feature additional sensing capabilities.
Link to datasheets: https://www.azoteq.com/
- azoteq,iqs7222a
- azoteq,iqs7222b
- azoteq,iqs7222c
+ - azoteq,iqs7222d
reg:
maxItems: 1
maximum: 3000
description: Specifies the report rate (in ms) during ultra-low-power mode.
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+ trackpad:
+ type: object
+ description: Represents all channels associated with the trackpad.
+
+ properties:
+ azoteq,channel-select:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 12
+ items:
+ minimum: 0
+ maximum: 13
+ description:
+ Specifies the order of the channels that participate in the trackpad.
+ Specify 255 to omit a given channel for the purpose of mapping a non-
+ rectangular trackpad.
+
+ azoteq,num-rows:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 12
+ description: Specifies the number of rows that comprise the trackpad.
+
+ azoteq,num-cols:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 1
+ maximum: 12
+ description: Specifies the number of columns that comprise the trackpad.
+
+ azoteq,top-speed:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ multipleOf: 4
+ minimum: 0
+ maximum: 1020
+ description:
+ Specifies the speed (in coordinates traveled per conversion) after
+ which coordinate filtering is no longer applied.
+
+ azoteq,bottom-speed:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description:
+ Specifies the speed (in coordinates traveled per conversion) after
+ which coordinate filtering is linearly reduced.
+
+ azoteq,use-prox:
+ type: boolean
+ description:
+ Directs the trackpad to respond to the proximity states of the
+ selected channels instead of their corresponding touch states.
+ Note the trackpad cannot report granular coordinates during a
+ state of proximity.
+
+ patternProperties:
+ "^azoteq,lower-cal-(x|y)$":
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's lower starting points.
+
+ "^azoteq,upper-cal-(x|y)$":
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's upper starting points.
+
+ "^event-(press|tap|(swipe|flick)-(x|y)-(pos|neg))$":
+ type: object
+ $ref: input.yaml#
+ description:
+ Represents a press or gesture event reported by the trackpad. Specify
+ 'linux,code' under the press event to report absolute coordinates.
+
+ properties:
+ linux,code: true
+
+ azoteq,gesture-angle-tighten:
+ type: boolean
+ description:
+ Limits the tangent of the gesture angle to 0.5 (axial gestures
+ only). If specified in one direction, the effect is applied in
+ either direction.
+
+ azoteq,gesture-max-ms:
+ multipleOf: 16
+ minimum: 0
+ maximum: 4080
+ description:
+ Specifies the length of time (in ms) within which a tap, swipe
+ or flick gesture must be completed in order to be acknowledged
+ by the device. The number specified for any one swipe or flick
+ gesture applies to all other swipe or flick gestures.
+
+ azoteq,gesture-min-ms:
+ multipleOf: 16
+ minimum: 0
+ maximum: 4080
+ description:
+ Specifies the length of time (in ms) for which a tap gesture must
+ be held in order to be acknowledged by the device.
+
+ azoteq,gesture-dist:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description:
+ Specifies the distance (in coordinates) across which a swipe or
+ flick gesture must travel in order to be acknowledged by the
+ device. The number specified for any one swipe or flick gesture
+ applies to all remaining swipe or flick gestures.
+
+ For tap gestures, this property specifies the distance from the
+ original point of contact across which the contact is permitted
+ to travel before the gesture is rejected by the device.
+
+ azoteq,gpio-select:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 3
+ items:
+ minimum: 0
+ maximum: 2
+ description: |
+ Specifies one or more GPIO mapped to the event as follows:
+ 0: GPIO0
+ 1: GPIO3
+ 2: GPIO4
+
+ Note that although multiple events can be mapped to a single
+ GPIO, they must all be of the same type (proximity, touch or
+ trackpad gesture).
+
+ additionalProperties: false
+
+ required:
+ - azoteq,channel-select
+
+ additionalProperties: false
+
patternProperties:
"^cycle-[0-9]$":
type: object
Activates the reference channel in response to proximity events
instead of touch events.
+ azoteq,counts-filt-enable:
+ type: boolean
+ description: Applies counts filtering to the channel.
+
azoteq,ati-band:
$ref: /schemas/types.yaml#/definitions/uint32
enum: [0, 1, 2, 3]
description: |
Specifies one or more GPIO mapped to the event as follows:
0: GPIO0
- 1: GPIO3 (IQS7222C only)
- 2: GPIO4 (IQS7222C only)
+ 1: GPIO3
+ 2: GPIO4
Note that although multiple events can be mapped to a single
GPIO, they must all be of the same type (proximity, touch or
- slider gesture).
+ slider/trackpad gesture).
azoteq,thresh:
$ref: /schemas/types.yaml#/definitions/uint32
minimum: 0
maximum: 65535
description:
- Specifies the speed of movement after which coordinate filtering is
- no longer applied.
+ Specifies the speed (in coordinates traveled per conversion) after
+ which coordinate filtering is no longer applied.
azoteq,bottom-speed:
$ref: /schemas/types.yaml#/definitions/uint32
minimum: 0
maximum: 255
description:
- Specifies the speed of movement after which coordinate filtering is
- linearly reduced.
+ Specifies the speed (in coordinates traveled per conversion) after
+ which coordinate filtering is linearly reduced.
azoteq,bottom-beta:
$ref: /schemas/types.yaml#/definitions/uint32
minimum: 0
maximum: 4080
description:
- Specifies the distance across which a swipe or flick gesture must
- travel in order to be acknowledged by the device. The number spec-
- ified for any one swipe or flick gesture applies to all remaining
- swipe or flick gestures.
+ Specifies the distance (in coordinates) across which a swipe or
+ flick gesture must travel in order to be acknowledged by the
+ device. The number specified for any one swipe or flick gesture
+ applies to all remaining swipe or flick gestures.
azoteq,gpio-select:
$ref: /schemas/types.yaml#/definitions/uint32-array
description: |
Specifies one or more GPIO mapped to the event as follows:
0: GPIO0
- 1: GPIO3 (IQS7222C only)
- 2: GPIO4 (IQS7222C only)
+ 1: GPIO3
+ 2: GPIO4
Note that although multiple events can be mapped to a single
GPIO, they must all be of the same type (proximity, touch or
description: |
Represents a GPIO mapped to one or more events as follows:
gpio-0: GPIO0
- gpio-1: GPIO3 (IQS7222C only)
- gpio-2: GPIO4 (IQS7222C only)
+ gpio-1: GPIO3
+ gpio-2: GPIO4
allOf:
- $ref: ../pinctrl/pincfg-node.yaml#
additionalProperties: false
allOf:
+ - $ref: touchscreen/touchscreen.yaml#
+
- if:
properties:
compatible:
contains:
- const: azoteq,iqs7222b
+ enum:
+ - azoteq,iqs7222a
+ - azoteq,iqs7222b
+ - azoteq,iqs7222c
+
+ then:
+ properties:
+ touchscreen-size-x: false
+ touchscreen-size-y: false
+ touchscreen-inverted-x: false
+ touchscreen-inverted-y: false
+ touchscreen-swapped-x-y: false
+
+ trackpad: false
+
+ patternProperties:
+ "^channel-([0-9]|1[0-9])$":
+ properties:
+ azoteq,counts-filt-enable: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - azoteq,iqs7222b
+ - azoteq,iqs7222c
+
+ then:
+ patternProperties:
+ "^channel-([0-9]|1[0-9])$":
+ properties:
+ azoteq,ulp-allow: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - azoteq,iqs7222b
+ - azoteq,iqs7222d
then:
patternProperties:
properties:
azoteq,ref-select: false
+ "^slider-[0-1]$": false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: azoteq,iqs7222b
+
+ then:
+ patternProperties:
+ "^channel-([0-9]|1[0-9])$":
patternProperties:
"^event-(prox|touch)$":
properties:
azoteq,gpio-select: false
- "^slider-[0-1]$": false
-
"^gpio-[0-2]$": false
- if:
else:
patternProperties:
- "^channel-([0-9]|1[0-9])$":
- properties:
- azoteq,ulp-allow: false
-
"^slider-[0-1]$":
patternProperties:
"^event-(press|tap|(swipe|flick)-(pos|neg))$":
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/azoteq,iqs7211.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+
+maintainers:
+ - Jeff LaBundy <jeff@labundy.com>
+
+description: |
+ The Azoteq IQS7210A, IQS7211A and IQS7211E trackpad and touchscreen control-
+ lers employ projected-capacitance sensing and can track two contacts.
+
+ Link to datasheets: https://www.azoteq.com/
+
+properties:
+ compatible:
+ enum:
+ - azoteq,iqs7210a
+ - azoteq,iqs7211a
+ - azoteq,iqs7211e
+
+ reg:
+ maxItems: 1
+
+ irq-gpios:
+ maxItems: 1
+ description:
+ Specifies the GPIO connected to the device's active-low RDY output. The
+ pin doubles as the IQS7211E's active-low MCLR input, in which case this
+ GPIO must be configured as open-drain.
+
+ reset-gpios:
+ maxItems: 1
+ description:
+ Specifies the GPIO connected to the device's active-low MCLR input. The
+ device is temporarily held in hardware reset prior to initialization if
+ this property is present.
+
+ azoteq,forced-comms:
+ type: boolean
+ description:
+ Enables forced communication; to be used with host adapters that cannot
+ tolerate clock stretching.
+
+ azoteq,forced-comms-default:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description:
+ Indicates if the device's OTP memory enables (1) or disables (0) forced
+ communication by default. Specifying this property can expedite startup
+ time if the default value is known.
+
+ If this property is not specified, communication is not initiated until
+ the device asserts its RDY pin shortly after exiting hardware reset. At
+ that point, forced communication is either enabled or disabled based on
+ the presence or absence of the 'azoteq,forced-comms' property.
+
+ azoteq,rate-active-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the report rate (in ms) during active mode.
+
+ azoteq,rate-touch-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the report rate (in ms) during idle-touch mode.
+
+ azoteq,rate-idle-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the report rate (in ms) during idle mode.
+
+ azoteq,rate-lp1-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the report rate (in ms) during low-power mode 1.
+
+ azoteq,rate-lp2-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the report rate (in ms) during low-power mode 2.
+
+ azoteq,timeout-active-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 65535000
+ description:
+ Specifies the length of time (in ms) to wait for an event before moving
+ from active mode to idle or idle-touch modes.
+
+ azoteq,timeout-touch-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 65535000
+ description:
+ Specifies the length of time (in ms) to wait for an event before moving
+ from idle-touch mode to idle mode.
+
+ azoteq,timeout-idle-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 65535000
+ description:
+ Specifies the length of time (in ms) to wait for an event before moving
+ from idle mode to low-power mode 1.
+
+ azoteq,timeout-lp1-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 65535000
+ description:
+ Specifies the length of time (in ms) to wait for an event before moving
+ from low-power mode 1 to low-power mode 2.
+
+ azoteq,timeout-lp2-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 60000
+ description:
+ Specifies the rate (in ms) at which the trackpad reference values
+ are updated during low-power modes 1 and 2.
+
+ azoteq,timeout-ati-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 60000
+ description:
+ Specifies the delay (in ms) before the automatic tuning implementation
+ (ATI) is retried in the event it fails to complete.
+
+ azoteq,timeout-comms-ms:
+ minimum: 0
+ maximum: 65535
+ description:
+ Specifies the delay (in ms) before a communication window is closed.
+
+ azoteq,timeout-press-ms:
+ multipleOf: 1000
+ minimum: 0
+ maximum: 60000
+ description:
+ Specifies the length of time (in ms) to wait before automatically
+ releasing a press event. Specify zero to allow the press state to
+ persist indefinitely.
+
+ azoteq,fosc-freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ Specifies the device's core clock frequency as follows:
+ 0: 14 MHz
+ 1: 18 MHz
+
+ azoteq,fosc-trim:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ description: Specifies the device's core clock frequency trim.
+
+ azoteq,num-contacts:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 2
+ default: 0
+ description: Specifies the number of contacts reported by the device.
+
+ azoteq,contact-split:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the contact (finger) split factor.
+
+ azoteq,trim-x:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the horizontal trim width.
+
+ azoteq,trim-y:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the vertical trim height.
+
+ trackpad:
+ type: object
+ description: Represents all channels associated with the trackpad.
+
+ properties:
+ azoteq,rx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+ items:
+ minimum: 0
+ maximum: 7
+ description:
+ Specifies the order of the CRx pin(s) associated with the trackpad.
+
+ azoteq,tx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 12
+ items:
+ minimum: 0
+ maximum: 11
+ description:
+ Specifies the order of the CTx pin(s) associated with the trackpad.
+
+ azoteq,channel-select:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 36
+ items:
+ minimum: 0
+ maximum: 255
+ description: |
+ Specifies the channels mapped to each cycle in the following order:
+ Cycle 0, slot 0
+ Cycle 0, slot 1
+ Cycle 1, slot 0
+ Cycle 1, slot 1
+ ...and so on. Specify 255 to disable a given slot.
+
+ azoteq,ati-frac-div-fine:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the trackpad's ATI fine fractional divider.
+
+ azoteq,ati-frac-mult-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ description: Specifies the trackpad's ATI coarse fractional multiplier.
+
+ azoteq,ati-frac-div-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the trackpad's ATI coarse fractional divider.
+
+ azoteq,ati-comp-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the trackpad's ATI compensation divider.
+
+ azoteq,ati-target:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description: Specifies the trackpad's ATI target.
+
+ azoteq,touch-enter:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's touch entrance factor.
+
+ azoteq,touch-exit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's touch exit factor.
+
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's stationary touch threshold.
+
+ azoteq,conv-period:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's conversion period.
+
+ azoteq,conv-frac:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the trackpad's conversion frequency fraction.
+
+ patternProperties:
+ "^event-(tap(-double|-triple)?|hold|palm|swipe-(x|y)-(pos|neg)(-hold)?)$":
+ type: object
+ $ref: ../input.yaml#
+ description:
+ Represents a gesture event reported by the trackpad. In the case of
+ axial gestures, the duration or distance specified in one direction
+ applies to both directions along the same axis.
+
+ properties:
+ linux,code: true
+
+ azoteq,gesture-max-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the maximum duration of tap/swipe gestures.
+
+ azoteq,gesture-mid-ms:
+ minimum: 0
+ maximum: 65535
+ description:
+ Specifies the maximum duration between subsequent tap gestures
+ (IQS7211E only).
+
+ azoteq,gesture-min-ms:
+ minimum: 0
+ maximum: 65535
+ description: Specifies the minimum duration of hold gestures.
+
+ azoteq,gesture-dist:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description:
+ Specifies the minimum (swipe) or maximum (tap and hold) distance
+ a finger may travel to be considered a gesture.
+
+ azoteq,gesture-dist-rep:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description:
+ Specifies the minimum distance a finger must travel to elicit a
+ repeated swipe gesture (IQS7211E only).
+
+ azoteq,gesture-angle:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 75
+ description:
+ Specifies the maximum angle (in degrees) a finger may travel to
+ be considered a swipe gesture.
+
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 42
+ description: Specifies the palm gesture threshold (IQS7211E only).
+
+ additionalProperties: false
+
+ dependencies:
+ azoteq,rx-enable: ["azoteq,tx-enable"]
+ azoteq,tx-enable: ["azoteq,rx-enable"]
+ azoteq,channel-select: ["azoteq,rx-enable"]
+
+ additionalProperties: false
+
+ alp:
+ type: object
+ $ref: ../input.yaml#
+ description: Represents the alternate low-power channel (ALP).
+
+ properties:
+ azoteq,rx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+ items:
+ minimum: 0
+ maximum: 7
+ description:
+ Specifies the CRx pin(s) associated with the ALP in no particular
+ order.
+
+ azoteq,tx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 12
+ items:
+ minimum: 0
+ maximum: 11
+ description:
+ Specifies the CTx pin(s) associated with the ALP in no particular
+ order.
+
+ azoteq,ati-frac-div-fine:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the ALP's ATI fine fractional divider.
+
+ azoteq,ati-frac-mult-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ description: Specifies the ALP's ATI coarse fractional multiplier.
+
+ azoteq,ati-frac-div-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the ALP's ATI coarse fractional divider.
+
+ azoteq,ati-comp-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the ALP's ATI compensation divider.
+
+ azoteq,ati-target:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description: Specifies the ALP's ATI target.
+
+ azoteq,ati-base:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ multipleOf: 8
+ minimum: 0
+ maximum: 255
+ description: Specifies the ALP's ATI base.
+
+ azoteq,ati-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ Specifies the ALP's ATI mode as follows:
+ 0: Partial
+ 1: Full
+
+ azoteq,sense-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ Specifies the ALP's sensing mode as follows:
+ 0: Self capacitive
+ 1: Mutual capacitive
+
+ azoteq,debounce-enter:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the ALP's debounce entrance factor.
+
+ azoteq,debounce-exit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the ALP's debounce exit factor.
+
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description: Specifies the ALP's proximity or touch threshold.
+
+ azoteq,conv-period:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the ALP's conversion period.
+
+ azoteq,conv-frac:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the ALP's conversion frequency fraction.
+
+ linux,code: true
+
+ additionalProperties: false
+
+ button:
+ type: object
+ description: Represents the inductive or capacitive button.
+
+ properties:
+ azoteq,ati-frac-div-fine:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the button's ATI fine fractional divider.
+
+ azoteq,ati-frac-mult-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ description: Specifies the button's ATI coarse fractional multiplier.
+
+ azoteq,ati-frac-div-coarse:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the button's ATI coarse fractional divider.
+
+ azoteq,ati-comp-div:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 31
+ description: Specifies the button's ATI compensation divider.
+
+ azoteq,ati-target:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description: Specifies the button's ATI target.
+
+ azoteq,ati-base:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ multipleOf: 8
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's ATI base.
+
+ azoteq,ati-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ description: |
+ Specifies the button's ATI mode as follows:
+ 0: Partial
+ 1: Full
+
+ azoteq,sense-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+ description: |
+ Specifies the button's sensing mode as follows:
+ 0: Self capacitive
+ 1: Mutual capacitive
+ 2: Inductive
+
+ azoteq,touch-enter:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's touch entrance factor.
+
+ azoteq,touch-exit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's touch exit factor.
+
+ azoteq,debounce-enter:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's debounce entrance factor.
+
+ azoteq,debounce-exit:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's debounce exit factor.
+
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 65535
+ description: Specifies the button's proximity threshold.
+
+ azoteq,conv-period:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's conversion period.
+
+ azoteq,conv-frac:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the button's conversion frequency fraction.
+
+ patternProperties:
+ "^event-(prox|touch)$":
+ type: object
+ $ref: ../input.yaml#
+ description:
+ Represents a proximity or touch event reported by the button.
+
+ properties:
+ linux,code: true
+
+ additionalProperties: false
+
+ additionalProperties: false
+
+ wakeup-source: true
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+dependencies:
+ touchscreen-size-x: ["azoteq,num-contacts"]
+ touchscreen-size-y: ["azoteq,num-contacts"]
+ touchscreen-inverted-x: ["azoteq,num-contacts"]
+ touchscreen-inverted-y: ["azoteq,num-contacts"]
+ touchscreen-swapped-x-y: ["azoteq,num-contacts"]
+
+required:
+ - compatible
+ - reg
+ - irq-gpios
+
+additionalProperties: false
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: azoteq,iqs7210a
+
+ then:
+ properties:
+ alp:
+ properties:
+ azoteq,rx-enable:
+ maxItems: 4
+ items:
+ minimum: 4
+
+ else:
+ properties:
+ azoteq,timeout-press-ms: false
+
+ alp:
+ properties:
+ azoteq,ati-mode: false
+
+ button: false
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: azoteq,iqs7211e
+
+ then:
+ properties:
+ reset-gpios: false
+
+ trackpad:
+ properties:
+ azoteq,tx-enable:
+ maxItems: 13
+ items:
+ maximum: 12
+
+ alp:
+ properties:
+ azoteq,tx-enable:
+ maxItems: 13
+ items:
+ maximum: 12
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/input/input.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touch@56 {
+ compatible = "azoteq,iqs7210a";
+ reg = <0x56>;
+ irq-gpios = <&gpio 4 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&gpio 17 (GPIO_ACTIVE_LOW |
+ GPIO_PUSH_PULL)>;
+ azoteq,num-contacts = <2>;
+
+ trackpad {
+ azoteq,rx-enable = <6>, <5>, <4>, <3>, <2>;
+ azoteq,tx-enable = <1>, <7>, <8>, <9>, <10>;
+ };
+
+ button {
+ azoteq,sense-mode = <2>;
+ azoteq,touch-enter = <40>;
+ azoteq,touch-exit = <36>;
+
+ event-touch {
+ linux,code = <KEY_HOME>;
+ };
+ };
+
+ alp {
+ azoteq,sense-mode = <1>;
+ linux,code = <KEY_POWER>;
+ };
+ };
+ };
+
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/input/input.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touch@56 {
+ compatible = "azoteq,iqs7211e";
+ reg = <0x56>;
+ irq-gpios = <&gpio 4 (GPIO_ACTIVE_LOW |
+ GPIO_OPEN_DRAIN)>;
+
+ trackpad {
+ event-tap {
+ linux,code = <KEY_PLAYPAUSE>;
+ };
+
+ event-tap-double {
+ linux,code = <KEY_SHUFFLE>;
+ };
+
+ event-tap-triple {
+ linux,code = <KEY_AGAIN>;
+ };
+
+ event-hold {
+ linux,code = <KEY_STOP>;
+ };
+
+ event-palm {
+ linux,code = <KEY_EXIT>;
+ };
+
+ event-swipe-x-pos {
+ linux,code = <KEY_REWIND>;
+ };
+
+ event-swipe-x-pos-hold {
+ linux,code = <KEY_PREVIOUS>;
+ };
+
+ event-swipe-x-neg {
+ linux,code = <KEY_FASTFORWARD>;
+ };
+
+ event-swipe-x-neg-hold {
+ linux,code = <KEY_NEXT>;
+ };
+
+ event-swipe-y-pos {
+ linux,code = <KEY_VOLUMEUP>;
+ };
+
+ event-swipe-y-pos-hold {
+ linux,code = <KEY_MUTE>;
+ };
+
+ event-swipe-y-neg {
+ linux,code = <KEY_VOLUMEDOWN>;
+ };
+
+ event-swipe-y-neg-hold {
+ linux,code = <KEY_MUTE>;
+ };
+ };
+ };
+ };
+
+...
minimum: 1
maximum: 255
+ threshold:
+ description: Allows setting the "click"-threshold in the range from 0 to 255.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+
touchscreen-size-x: true
touchscreen-size-y: true
touchscreen-fuzz-x: true
maxItems: 1
reset-gpios:
maxItems: 1
+ vdd-supply:
+ description: Power supply regulator for the chip
touchscreen-size-x: true
touchscreen-size-y: true
touchscreen-inverted-x: true
touchscreen-swapped-x-y: true
touchscreen-max-pressure: true
+ linux,keycodes:
+ description: Keycodes for the touch keys
+ minItems: 1
+ maxItems: 15
+
additionalProperties: false
required:
+++ /dev/null
-STMicroelectronics STi System Configuration Controlled IRQs
------------------------------------------------------------
-
-On STi based systems; External, CTI (Core Sight), PMU (Performance Management),
-and PL310 L2 Cache IRQs are controlled using System Configuration registers.
-This driver is used to unmask them prior to use.
-
-Required properties:
-- compatible : Should be "st,stih407-irq-syscfg"
-- st,syscfg : Phandle to Cortex-A9 IRQ system config registers
-- st,irq-device : Array of IRQs to enable - should be 2 in length
-- st,fiq-device : Array of FIQs to enable - should be 2 in length
-
-Optional properties:
-- st,invert-ext : External IRQs can be inverted at will. This property inverts
- these IRQs using bitwise logic. A number of defines have been
- provided for convenience:
- ST_IRQ_SYSCFG_EXT_1_INV
- ST_IRQ_SYSCFG_EXT_2_INV
- ST_IRQ_SYSCFG_EXT_3_INV
-Example:
-
-irq-syscfg {
- compatible = "st,stih407-irq-syscfg";
- st,syscfg = <&syscfg_cpu>;
- st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
- <ST_IRQ_SYSCFG_PMU_1>;
- st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
- <ST_IRQ_SYSCFG_DISABLED>;
-};
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/interrupt-controller/st,stih407-irq-syscfg.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics STi System Configuration Controlled IRQs
+
+maintainers:
+ - Patrice Chotard <patrice.chotard@foss.st.com>
+
+description:
+ On STi based systems; External, CTI (Core Sight), PMU (Performance
+ Management), and PL310 L2 Cache IRQs are controlled using System
+ Configuration registers. This device is used to unmask them prior to use.
+
+properties:
+ compatible:
+ const: st,stih407-irq-syscfg
+
+ st,syscfg:
+ description: Phandle to Cortex-A9 IRQ system config registers
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ st,irq-device:
+ description: Array of IRQs to enable.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: Enable the IRQ of the channel one.
+ - description: Enable the IRQ of the channel two.
+
+ st,fiq-device:
+ description: Array of FIQs to enable.
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ items:
+ - description: Enable the IRQ of the channel one.
+ - description: Enable the IRQ of the channel two.
+
+ st,invert-ext:
+ description: External IRQs can be inverted at will. This property inverts
+ these three IRQs using bitwise logic, each one being encoded respectively
+ on the first, second and fourth bit.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [ 1, 2, 3, 4, 5, 6 ]
+
+required:
+ - compatible
+ - st,syscfg
+ - st,irq-device
+ - st,fiq-device
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq-st.h>
+ irq-syscfg {
+ compatible = "st,stih407-irq-syscfg";
+ st,syscfg = <&syscfg_cpu>;
+ st,irq-device = <ST_IRQ_SYSCFG_PMU_0>,
+ <ST_IRQ_SYSCFG_PMU_1>;
+ st,fiq-device = <ST_IRQ_SYSCFG_DISABLED>,
+ <ST_IRQ_SYSCFG_DISABLED>;
+ };
+...
+++ /dev/null
-* Omnivision OV5695 MIPI CSI-2 sensor
-
-Required Properties:
-- compatible: shall be "ovti,ov5695"
-- clocks: reference to the xvclk input clock
-- clock-names: shall be "xvclk"
-- avdd-supply: Analog voltage supply, 2.8 volts
-- dovdd-supply: Digital I/O voltage supply, 1.8 volts
-- dvdd-supply: Digital core voltage supply, 1.2 volts
-- reset-gpios: Low active reset gpio
-
-The device node shall contain one 'port' child node with an
-'endpoint' subnode for its digital output video port,
-in accordance with the video interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-The endpoint optional property 'data-lanes' shall be "<1 2>".
-
-Example:
-&i2c7 {
- ov5695: camera-sensor@36 {
- compatible = "ovti,ov5695";
- reg = <0x36>;
- pinctrl-names = "default";
- pinctrl-0 = <&clk_24m_cam>;
-
- clocks = <&cru SCLK_TESTCLKOUT1>;
- clock-names = "xvclk";
-
- avdd-supply = <&pp2800_cam>;
- dovdd-supply = <&pp1800>;
- dvdd-supply = <&pp1250_cam>;
- reset-gpios = <&gpio2 5 GPIO_ACTIVE_LOW>;
-
- port {
- wcam_out: endpoint {
- remote-endpoint = <&mipi_in_wcam>;
- data-lanes = <1 2>;
- };
- };
- };
-};
+++ /dev/null
-* Omnivision 1/7.5-Inch B&W VGA CMOS Digital Image Sensor
-
-The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
-with an active array size of 640H x 480V. It is programmable through a serial
-I2C interface.
-
-Required Properties:
-- compatible: Value should be "ovti,ov7251".
-- clocks: Reference to the xclk clock.
-- clock-names: Should be "xclk".
-- clock-frequency: Frequency of the xclk clock.
-- enable-gpios: Chip enable GPIO. Polarity is GPIO_ACTIVE_HIGH. This corresponds
- to the hardware pin XSHUTDOWN which is physically active low.
-- vdddo-supply: Chip digital IO regulator.
-- vdda-supply: Chip analog regulator.
-- vddd-supply: Chip digital core regulator.
-
-The device node shall contain one 'port' child node with a single 'endpoint'
-subnode for its digital output video port, in accordance with the video
-interface bindings defined in
-Documentation/devicetree/bindings/media/video-interfaces.txt.
-
-Example:
-
- &i2c1 {
- ...
-
- ov7251: camera-sensor@60 {
- compatible = "ovti,ov7251";
- reg = <0x60>;
-
- enable-gpios = <&gpio1 6 GPIO_ACTIVE_HIGH>;
- pinctrl-names = "default";
- pinctrl-0 = <&camera_bw_default>;
-
- clocks = <&clks 200>;
- clock-names = "xclk";
- clock-frequency = <24000000>;
-
- vdddo-supply = <&camera_dovdd_1v8>;
- vdda-supply = <&camera_avdd_2v8>;
- vddd-supply = <&camera_dvdd_1v2>;
-
- port {
- ov7251_ep: endpoint {
- clock-lanes = <1>;
- data-lanes = <0>;
- remote-endpoint = <&csi0_ep>;
- };
- };
- };
- };
$id: http://devicetree.org/schemas/media/i2c/ovti,ov5693.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
-title: Omnivision OV5693 CMOS Sensor
+title: Omnivision OV5693/OV5695 CMOS Sensors
maintainers:
- Tommaso Merciai <tommaso.merciai@amarulasolutions.com>
description: |
- The Omnivision OV5693 is a high performance, 1/4-inch, 5 megapixel, CMOS
- image sensor that delivers 2592x1944 at 30fps. It provides full-frame,
+ The Omnivision OV5693/OV5695 are high performance, 1/4-inch, 5 megapixel, CMOS
+ image sensors that deliver 2592x1944 at 30fps. It provides full-frame,
sub-sampled, and windowed 10-bit MIPI images in various formats via the
Serial Camera Control Bus (SCCB) interface.
- OV5693 is controlled via I2C and two-wire Serial Camera Control Bus (SCCB).
- The sensor output is available via CSI-2 serial data output (up to 2-lane).
+ OV5693/OV5695 are controlled via I2C and two-wire Serial Camera Control Bus
+ (SCCB). The sensor output is available via CSI-2 serial data output (up to
+ 2-lane).
allOf:
- $ref: /schemas/media/video-interface-devices.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ovti,ov5693
+ then:
+ properties:
+ port:
+ properties:
+ endpoint:
+ required:
+ - link-frequencies
properties:
compatible:
- const: ovti,ov5693
+ enum:
+ - ovti,ov5693
+ - ovti,ov5695
reg:
maxItems: 1
System input clock (aka XVCLK). From 6 to 27 MHz.
maxItems: 1
+ clock-names:
+ const: xvclk
+
dovdd-supply:
description:
Digital I/O voltage supply, 1.8V.
required:
- data-lanes
- - link-frequencies
required:
- compatible
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/media/i2c/ovti,ov7251.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: OmniVision OV7251 Image Sensor
+
+description:
+ The Omnivision OV7251 is a 1/7.5-Inch CMOS active pixel digital image sensor
+ with an active array size of 640H x 480V. It is programmable through a serial
+ I2C interface.
+
+maintainers:
+ - Todor Tomov <todor.too@gmail.com>
+
+properties:
+ compatible:
+ const: ovti,ov7251
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ description: XCLK Input Clock
+
+ clock-names:
+ const: xclk
+
+ clock-frequency:
+ description: Frequency of the xclk clock in Hz.
+
+ vdda-supply:
+ description: Analog voltage supply, 2.8 volts
+
+ vddd-supply:
+ description: Digital core voltage supply, 1.2 volts
+
+ vdddo-supply:
+ description: Digital I/O voltage supply, 1.8 volts
+
+ enable-gpios:
+ maxItems: 1
+ description:
+ Reference to the GPIO connected to the XSHUTDOWN pin, if any. Polarity
+ is GPIO_ACTIVE_HIGH.
+
+ port:
+ description: Digital Output Port
+ $ref: /schemas/graph.yaml#/$defs/port-base
+ additionalProperties: false
+
+ properties:
+ endpoint:
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
+ properties:
+ clock-lanes:
+ maximum: 1
+
+ data-lanes:
+ maxItems: 1
+
+ link-frequencies: true
+
+ required:
+ - data-lanes
+ - link-frequencies
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - vdddo-supply
+ - vdda-supply
+ - port
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ camera@3c {
+ compatible = "ovti,ov7251";
+ reg = <0x3c>;
+ clocks = <&clks 1>;
+ clock-frequency = <24000000>;
+ vdddo-supply = <&ov7251_vdddo_1v8>;
+ vdda-supply = <&ov7251_vdda_2v8>;
+ vddd-supply = <&ov7251_vddd_1v5>;
+ enable-gpios = <&gpio1 19 GPIO_ACTIVE_HIGH>;
+
+ port {
+ ov7251_ep: endpoint {
+ remote-endpoint = <&csi0_ep>;
+ clock-lanes = <1>;
+ data-lanes = <0>;
+ link-frequencies = /bits/ 64 <240000000 319200000>;
+ };
+ };
+ };
+ };
+...
wcam: camera@36 {
compatible = "ovti,ov5695";
reg = <0x36>;
+ clocks = <&cru SCLK_TESTCLKOUT1>;
port {
wcam_out: endpoint {
+++ /dev/null
-Broadcom Kona PWM controller device tree bindings
-
-This controller has 6 channels.
-
-Required Properties :
-- compatible: should contain "brcm,kona-pwm"
-- reg: physical base address and length of the controller's registers
-- clocks: phandle + clock specifier pair for the external clock
-- #pwm-cells: Should be 3. See pwm.yaml in this directory for a
- description of the cells format.
-
-Refer to clocks/clock-bindings.txt for generic clock consumer properties.
-
-Example:
-
-pwm: pwm@3e01a000 {
- compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
- reg = <0x3e01a000 0xc4>;
- clocks = <&pwm_clk>;
- #pwm-cells = <3>;
-};
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/brcm,kona-pwm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Kona family PWM controller
+
+description:
+ This controller has 6 channels.
+
+maintainers:
+ - Florian Fainelli <f.fainelli@gmail.com>
+
+allOf:
+ - $ref: pwm.yaml#
+
+properties:
+ compatible:
+ items:
+ - enum:
+ - brcm,bcm11351-pwm
+ - const: brcm,kona-pwm
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ '#pwm-cells':
+ const: 3
+
+required:
+ - compatible
+ - reg
+ - clocks
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/bcm281xx.h>
+
+ pwm@3e01a000 {
+ compatible = "brcm,bcm11351-pwm", "brcm,kona-pwm";
+ reg = <0x3e01a000 0xcc>;
+ clocks = <&slave_ccu BCM281XX_SLAVE_CCU_PWM>;
+ #pwm-cells = <3>;
+ };
+...
properties:
compatible:
- enum:
- - atmel,at91rm9200-rtc
- - atmel,at91sam9x5-rtc
- - atmel,sama5d4-rtc
- - atmel,sama5d2-rtc
- - microchip,sam9x60-rtc
- - microchip,sama7g5-rtc
+ oneOf:
+ - enum:
+ - atmel,at91rm9200-rtc
+ - atmel,at91sam9x5-rtc
+ - atmel,sama5d4-rtc
+ - atmel,sama5d2-rtc
+ - microchip,sam9x60-rtc
+ - microchip,sama7g5-rtc
+ - items:
+ - const: microchip,sam9x7-rtc
+ - const: microchip,sam9x60-rtc
reg:
maxItems: 1
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/intersil,isl12022.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intersil ISL12022 Real-time Clock
+
+maintainers:
+ - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+ compatible:
+ const: isil,isl12022
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ '#clock-cells':
+ const: 0
+
+ isil,battery-trip-levels-microvolt:
+ description:
+ The battery voltages at which the first alarm and second alarm
+ should trigger (normally ~85% and ~75% of nominal V_BAT).
+ items:
+ - enum: [2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000]
+ - enum: [1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000]
+
+required:
+ - compatible
+ - reg
+
+allOf:
+ - $ref: rtc.yaml#
+ # If #clock-cells is present, interrupts must not be present
+ - if:
+ required:
+ - '#clock-cells'
+ then:
+ properties:
+ interrupts: false
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ rtc@6f {
+ compatible = "isil,isl12022";
+ reg = <0x6f>;
+ interrupts-extended = <&gpio1 5 IRQ_TYPE_LEVEL_LOW>;
+ isil,battery-trip-levels-microvolt = <2550000>, <2250000>;
+ };
+ };
+
+...
+++ /dev/null
-* Maxim DS3231 Real Time Clock
-
-Required properties:
-- compatible: Should contain "maxim,ds3231".
-- reg: I2C address for chip.
-
-Optional property:
-- #clock-cells: Should be 1.
-- clock-output-names:
- overwrite the default clock names "ds3231_clk_sqw" and "ds3231_clk_32khz".
-
-Each clock is assigned an identifier and client nodes can use this identifier
-to specify the clock which they consume. Following indices are allowed:
- - 0: square-wave output on the SQW pin
- - 1: square-wave output on the 32kHz pin
-
-- interrupts: rtc alarm/event interrupt. When this property is selected,
- clock on the SQW pin cannot be used.
-
-Example:
-
-ds3231: ds3231@51 {
- compatible = "maxim,ds3231";
- reg = <0x68>;
- #clock-cells = <1>;
-};
-
-device1 {
-...
- clocks = <&ds3231 0>;
-...
-};
-
-device2 {
-...
- clocks = <&ds3231 1>;
-...
-};
- nxp,pca2129
- nxp,pcf2127
- nxp,pcf2129
+ - nxp,pcf2131
reg:
maxItems: 1
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/st,m48t86.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST M48T86 / Dallas DS12887 RTC with SRAM
+
+maintainers:
+ - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+allOf:
+ - $ref: rtc.yaml
+
+properties:
+ compatible:
+ enum:
+ - st,m48t86
+
+ reg:
+ items:
+ - description: index register
+ - description: data register
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ rtc@10800000 {
+ compatible = "st,m48t86";
+ reg = <0x10800000 0x1>, <0x11700000 0x1>;
+ };
+
+...
- isil,isl1208
# Intersil ISL1218 Low Power RTC with Battery Backed SRAM
- isil,isl1218
- # Intersil ISL12022 Real-time Clock
- - isil,isl12022
# Real Time Clock Module with I2C-Bus
- microcrystal,rv3029
# Real Time Clock
pattern: "^easrc@.*"
compatible:
- const: fsl,imx8mn-easrc
+ oneOf:
+ - enum:
+ - fsl,imx8mn-easrc
+ - items:
+ - enum:
+ - fsl,imx8mp-easrc
+ - const: fsl,imx8mn-easrc
reg:
maxItems: 1
compatible:
enum:
- amlogic,meson-gxbb-wdt
+ - amlogic,t7-wdt
reg:
maxItems: 1
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/marvell,cn10624-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Marvell Global Timer (GTI) system watchdog
+
+maintainers:
+ - Bharat Bhushan <bbhushan2@marvell.com>
+
+allOf:
+ - $ref: watchdog.yaml#
+
+properties:
+ compatible:
+ oneOf:
+ - enum:
+ - marvell,cn9670-wdt
+ - marvell,cn10624-wdt
+
+ - items:
+ - enum:
+ - marvell,cn9880-wdt
+ - marvell,cnf9535-wdt
+ - const: marvell,cn9670-wdt
+
+ - items:
+ - enum:
+ - marvell,cn10308-wdt
+ - marvell,cnf10518-wdt
+ - const: marvell,cn10624-wdt
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ clock-names:
+ items:
+ - const: refclk
+
+ marvell,wdt-timer-index:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 63
+ description:
+ An SoC have many timers (up to 64), firmware can reserve one or more timer
+ for some other use case and configures one of the global timer as watchdog
+ timer. Firmware will update this field with the timer number configured
+ as watchdog timer.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - clock-names
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ watchdog@802000040000 {
+ compatible = "marvell,cn9670-wdt";
+ reg = <0x00008020 0x00040000 0x00000000 0x00020000>;
+ interrupts = <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>;
+ clocks = <&sclk>;
+ clock-names = "refclk";
+ marvell,wdt-timer-index = <63>;
+ };
+ };
+
+...
- items:
- enum:
- qcom,kpss-wdt-ipq4019
+ - qcom,apss-wdt-ipq5018
- qcom,apss-wdt-ipq5332
- qcom,apss-wdt-ipq9574
- qcom,apss-wdt-msm8994
power-domains:
maxItems: 1
+ memory-region:
+ maxItems: 1
+ description:
+ Contains the watchdog reserved memory. It is optional.
+ In the reserved memory, the specified values, which are
+ PON_REASON_SOF_NUM(0xBBBBCCCC), PON_REASON_MAGIC_NUM(0xDDDDDDDD),
+ and PON_REASON_EOF_NUM(0xCCCCBBBB), are pre-stored at the first
+ 3 * 4 bytes to tell that last boot was caused by watchdog reset.
+ Once the PON reason is captured by driver(rti_wdt.c), the driver
+ is supposed to wipe the whole memory region. Surely, if this
+ property is set, at least 12 bytes reserved memory starting from
+ specific memory address(0xa220000) should be set. More please
+ refer to example.
+
required:
- compatible
- reg
/*
* RTI WDT in main domain on J721e SoC. Assigned clocks are used to
* select the source clock for the watchdog, forcing it to tick with
- * a 32kHz clock in this case.
+ * a 32kHz clock in this case. Add a reserved memory(optional) to keep
+ * the watchdog reset cause persistent, which was be written in 12 bytes
+ * starting from 0xa2200000 by RTI Watchdog Firmware, then make it
+ * possible to get watchdog reset cause in driver.
+ *
+ * Reserved memory should be defined as follows:
+ * reserved-memory {
+ * wdt_reset_memory_region: wdt-memory@a2200000 {
+ * reg = <0x00 0xa2200000 0x00 0x1000>;
+ * no-map;
+ * };
+ * }
*/
#include <dt-bindings/soc/ti,sci_pm_domain.h>
power-domains = <&k3_pds 252 TI_SCI_PD_EXCLUSIVE>;
assigned-clocks = <&k3_clks 252 1>;
assigned-clock-parents = <&k3_clks 252 5>;
+ memory-region = <&wdt_reset_memory_region>;
};
:c:type:`struct ata_port_operations <ata_port_operations>`
----------------------------------------------------------
-Disable ATA port
-~~~~~~~~~~~~~~~~
-
-::
-
- void (*port_disable) (struct ata_port *);
-
-
-Called from :c:func:`ata_bus_probe` error path, as well as when unregistering
-from the SCSI module (rmmod, hot unplug). This function should do
-whatever needs to be done to take the port out of use. In most cases,
-:c:func:`ata_port_disable` can be used as this hook.
-
-Called from :c:func:`ata_bus_probe` on a failed probe. Called from
-:c:func:`ata_scsi_release`.
-
Post-IDENTIFY device configuration
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
::
- void (*eng_timeout) (struct ata_port *ap);
- void (*phy_reset) (struct ata_port *ap);
-
-
-Deprecated. Use ``->error_handler()`` instead.
-
-::
-
void (*freeze) (struct ata_port *ap);
void (*thaw) (struct ata_port *ap);
u32 val);
-Read and write standard SATA phy registers. Currently only used if
-``->phy_reset`` hook called the :c:func:`sata_phy_reset` helper function.
+Read and write standard SATA phy registers.
sc_reg is one of SCR_STATUS, SCR_CONTROL, SCR_ERROR, or SCR_ACTIVE.
Init and shutdown
:c:func:`ata_scsi_error` is the current ``transportt->eh_strategy_handler()``
for libata. As discussed above, this will be entered in two cases -
-timeout and ATAPI error completion. This function calls low level libata
-driver's :c:func:`eng_timeout` callback, the standard callback for which is
-:c:func:`ata_eng_timeout`. It checks if a qc is active and calls
-:c:func:`ata_qc_timeout` on the qc if so. Actual error handling occurs in
-:c:func:`ata_qc_timeout`.
+timeout and ATAPI error completion. This function will check if a qc is active
+and has not failed yet. Such a qc will be marked with AC_ERR_TIMEOUT such that
+EH will know to handle it later. Then it calls low level libata driver's
+:c:func:`error_handler` callback.
-If EH is invoked for timeout, :c:func:`ata_qc_timeout` stops BMDMA and
+When the :c:func:`error_handler` callback is invoked it stops BMDMA and
completes the qc. Note that as we're currently in EH, we cannot call
scsi_done. As described in SCSI EH doc, a recovered scmd should be
either retried with :c:func:`scsi_queue_insert` or finished with
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | ok |
| ia64: | TODO |
- | loongarch: | TODO |
+ | loongarch: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
system. Snapshot creation and deletion are as simple as 'mkdir
.snap/foo' and 'rmdir .snap/foo'.
+Snapshot names have two limitations:
+
+* They can not start with an underscore ('_'), as these names are reserved
+ for internal usage by the MDS.
+* They can not exceed 240 characters in size. This is because the MDS makes
+ use of long snapshot names internally, which follow the format:
+ `_<SNAPSHOT-NAME>_<INODE-NUMBER>`. Since filenames in general can't have
+ more than 255 characters, and `<node-id>` takes 13 characters, the long
+ snapshot names can take as much as 255 - 1 - 1 - 13 = 240.
+
Ceph also provides some recursive accounting on directories for nested
files and bytes. That is, a 'getfattr -d foo' on any directory in the
system will reveal the total number of nested regular files and
just the holders) associated with the glock. If there are any
held locks, then they will be contiguous entries at the head
of the list. Locks are granted in strictly the order that they
-are queued, except for those marked LM_FLAG_PRIORITY which are
-used only during recovery, and even then only for journal locks.
+are queued.
There are three lock states that users of the glock layer can request,
namely shared (SH), deferred (DF) and exclusive (EX). Those translate
Private_Dirty: 0 kB
Referenced: 892 kB
Anonymous: 0 kB
+ KSM: 0 kB
LazyFree: 0 kB
AnonHugePages: 0 kB
ShmemPmdMapped: 0 kB
a mapping associated with a file may contain anonymous pages: when MAP_PRIVATE
and a page is modified, the file page is replaced by a private anonymous copy.
+"KSM" reports how many of the pages are KSM pages. Note that KSM-placed zeropages
+are not included, only actual KSM pages.
+
"LazyFree" shows the amount of memory which is marked by madvise(MADV_FREE).
The memory isn't freed immediately with madvise(). It's freed in memory
pressure if the memory is clean. Please note that the printed value might
product_name
------------
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
:doc: product_name
product_number
--------------
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
- :doc: product_name
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+ :doc: product_number
serial_number
-------------
-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
:doc: serial_number
unique_id
If you set KCONFIG_OVERWRITECONFIG in the environment, Kconfig will not
break symlinks when .config is a symlink to somewhere else.
+KCONFIG_WARN_UNKNOWN_SYMBOLS
+----------------------------
+This environment variable makes Kconfig warn about all unrecognized
+symbols in the config input.
+
+KCONFIG_WERROR
+--------------
+If set, Kconfig treats warnings as errors.
+
`CONFIG_`
---------
If you set `CONFIG_` in the environment, Kconfig will prefix all symbols
first (and in alphabetical order), then come all other symbols,
sorted in alphabetical order.
+ In this menu, pressing the key in the (#) prefix will jump
+ directly to that location. You will be returned to the current
+ search results after exiting this new menu.
+
----------------------------------------------------------------------
User interface options for 'menuconfig'
F8 (SymSearch) searches the configuration symbols for the
given string or regular expression (regex).
+ In the SymSearch, pressing the key in the (#) prefix will
+ jump directly to that location. You will be returned to the
+ current search results after exiting this new menu.
+
NCONFIG_MODE
------------
This mode shows all sub-menus in one large tree.
that supports C and the GNU C extensions required by the kernel, and is
pronounced "klang," not "see-lang."
-Clang
------
-
-The compiler used can be swapped out via ``CC=`` command line argument to ``make``.
-``CC=`` should be set when selecting a config and during a build. ::
-
- make CC=clang defconfig
-
- make CC=clang
+Building with LLVM
+------------------
-Cross Compiling
----------------
+Invoke ``make`` via::
-A single Clang compiler binary will typically contain all supported backends,
-which can help simplify cross compiling. ::
-
- make ARCH=arm64 CC=clang CROSS_COMPILE=aarch64-linux-gnu-
+ make LLVM=1
-``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
-``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
-example: ::
+to compile for the host target. For cross compiling::
- clang --target=aarch64-linux-gnu foo.c
+ make LLVM=1 ARCH=arm64
-LLVM Utilities
---------------
+The LLVM= argument
+------------------
-LLVM has substitutes for GNU binutils utilities. They can be enabled individually.
-The full list of supported make variables::
+LLVM has substitutes for GNU binutils utilities. They can be enabled
+individually. The full list of supported make variables::
make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \
OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
-To simplify the above command, Kbuild supports the ``LLVM`` variable::
-
- make LLVM=1
+``LLVM=1`` expands to the above.
If your LLVM tools are not available in your PATH, you can supply their
location using the LLVM variable with a trailing slash::
make LLVM=/path/to/llvm/
-which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc.
+which will use ``/path/to/llvm/clang``, ``/path/to/llvm/ld.lld``, etc. The
+following may also be used::
+
+ PATH=/path/to/llvm:$PATH make LLVM=1
If your LLVM tools have a version suffix and you want to test with that
explicit version rather than the unsuffixed executables like ``LLVM=1``, you
which will use ``clang-14``, ``ld.lld-14``, etc.
+To support combinations of out of tree paths with version suffixes, we
+recommend::
+
+ PATH=/path/to/llvm/:$PATH make LLVM=-14
+
``LLVM=0`` is not the same as omitting ``LLVM`` altogether, it will behave like
-``LLVM=1``. If you only wish to use certain LLVM utilities, use their respective
-make variables.
+``LLVM=1``. If you only wish to use certain LLVM utilities, use their
+respective make variables.
+
+The same value used for ``LLVM=`` should be set for each invocation of ``make``
+if configuring and building via distinct commands. ``LLVM=`` should also be set
+as an environment variable when running scripts that will eventually run
+``make``.
-The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
-disable it.
+Cross Compiling
+---------------
-Omitting CROSS_COMPILE
+A single Clang compiler binary (and corresponding LLVM utilities) will
+typically contain all supported back ends, which can help simplify cross
+compiling especially when ``LLVM=1`` is used. If you use only LLVM tools,
+``CROSS_COMPILE`` or target-triple-prefixes become unnecessary. Example::
+
+ make LLVM=1 ARCH=arm64
+
+As an example of mixing LLVM and GNU utilities, for a target like ``ARCH=s390``
+which does not yet have ``ld.lld`` or ``llvm-objcopy`` support, you could
+invoke ``make`` via::
+
+ make LLVM=1 ARCH=s390 LD=s390x-linux-gnu-ld.bfd \
+ OBJCOPY=s390x-linux-gnu-objcopy
+
+This example will invoke ``s390x-linux-gnu-ld.bfd`` as the linker and
+``s390x-linux-gnu-objcopy``, so ensure those are reachable in your ``$PATH``.
+
+``CROSS_COMPILE`` is not used to prefix the Clang compiler binary (or
+corresponding LLVM utilities) as is the case for GNU utilities when ``LLVM=1``
+is not set.
+
+The LLVM_IAS= argument
----------------------
-As explained above, ``CROSS_COMPILE`` is used to set ``--target=<triple>``.
+Clang can assemble assembler code. You can pass ``LLVM_IAS=0`` to disable this
+behavior and have Clang invoke the corresponding non-integrated assembler
+instead. Example::
+
+ make LLVM=1 LLVM_IAS=0
+
+``CROSS_COMPILE`` is necessary when cross compiling and ``LLVM_IAS=0``
+is used in order to set ``--prefix=`` for the compiler to find the
+corresponding non-integrated assembler (typically, you don't want to use the
+system assembler when targeting another architecture). Example::
-If ``CROSS_COMPILE`` is not specified, the ``--target=<triple>`` is inferred
-from ``ARCH``.
+ make LLVM=1 ARCH=arm LLVM_IAS=0 CROSS_COMPILE=arm-linux-gnueabi-
-That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary.
-For example, to cross-compile the arm64 kernel::
+Ccache
+------
- make ARCH=arm64 LLVM=1
+``ccache`` can be used with ``clang`` to improve subsequent builds, (though
+KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds
+in order to avoid 100% cache misses, see Reproducible_builds_ for more info):
-If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive
-``--prefix=<path>`` to search for the GNU assembler and linker. ::
+ KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang"
- make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu-
+.. _KBUILD_BUILD_TIMESTAMP: kbuild.html#kbuild-build-timestamp
+.. _Reproducible_builds: reproducible-builds.html#timestamps
Supported Architectures
-----------------------
* - hexagon
- Maintained
- ``LLVM=1``
+ * - loongarch
+ - Maintained
+ - ``LLVM=1``
* - mips
- Maintained
- ``LLVM=1``
* - powerpc
- Maintained
- - ``CC=clang``
+ - ``LLVM=1``
* - riscv
- - Maintained
+ - Supported
- ``LLVM=1``
* - s390
- Maintained
Getting LLVM
-------------
-We provide prebuilt stable versions of LLVM on `kernel.org <https://kernel.org/pub/tools/llvm/>`_.
+We provide prebuilt stable versions of LLVM on `kernel.org
+<https://kernel.org/pub/tools/llvm/>`_. These have been optimized with profile
+data for building Linux kernels, which should improve kernel build times
+relative to other distributions of LLVM.
+
Below are links that may be useful for building LLVM from source or procuring
it through a distribution's package manager.
repository link above for any new networking-related commits. You may
also check the following website for the current status:
- https://patchwork.hopto.org/net-next.html
+ https://netdev.bots.linux.dev/net-next.html
The ``net`` tree continues to collect fixes for the vX.Y content, and is
fed back to Linus at regular (~weekly) intervals. Meaning that the
https://patchwork.kernel.org/project/netdevbpf/list/
The "State" field will tell you exactly where things are at with your
-patch. Patches are indexed by the ``Message-ID`` header of the emails
+patch:
+
+================== =============================================================
+Patch state Description
+================== =============================================================
+New, Under review pending review, patch is in the maintainer’s queue for
+ review; the two states are used interchangeably (depending on
+ the exact co-maintainer handling patchwork at the time)
+Accepted patch was applied to the appropriate networking tree, this is
+ usually set automatically by the pw-bot
+Needs ACK waiting for an ack from an area expert or testing
+Changes requested patch has not passed the review, new revision is expected
+ with appropriate code and commit message changes
+Rejected patch has been rejected and new revision is not expected
+Not applicable patch is expected to be applied outside of the networking
+ subsystem
+Awaiting upstream patch should be reviewed and handled by appropriate
+ sub-maintainer, who will send it on to the networking trees;
+ patches set to ``Awaiting upstream`` in netdev's patchwork
+ will usually remain in this state, whether the sub-maintainer
+ requested changes, accepted or rejected the patch
+Deferred patch needs to be reposted later, usually due to dependency
+ or because it was posted for a closed tree
+Superseded new version of the patch was posted, usually set by the
+ pw-bot
+RFC not to be applied, usually not in maintainer’s review queue,
+ pw-bot can automatically set patches to this state based
+ on subject tags
+================== =============================================================
+
+Patches are indexed by the ``Message-ID`` header of the emails
which carried them so if you have trouble finding your patch append
the value of ``Message-ID`` to the URL above.
Bot records its activity here:
- https://patchwork.hopto.org/pw-bot.html
+ https://netdev.bots.linux.dev/pw-bot.html
Review timelines
~~~~~~~~~~~~~~~~
体系架构
~~~~~~~~
-在x86_64、arm、arm64、powerpc、riscv、s390和xtensa上支持通用KASAN,
+在x86_64、arm、arm64、powerpc、riscv、s390、xtensa和loongarch上支持通用KASAN,
而基于标签的KASAN模式只在arm64上支持。
编译器
for most efficient handling of dumps (larger buffer fits more dumped
objects and therefore fewer recvmsg() calls are needed).
+.. _classic_netlink:
+
Classic Netlink
===============
EINVAL invalid register ID, or no such register or used with VMs in
protected virtualization mode on s390
EPERM (arm64) register access not allowed before vcpu finalization
+ EBUSY (riscv) changing register value not allowed after the vcpu
+ has run at least once
====== ============================================================
(These error codes are indicative only: do not rely on a specific error
---------------------
:Capability: basic
-:Architectures: arm64, mips
+:Architectures: arm64, mips, riscv
:Type: vcpu ioctl
:Parameters: struct kvm_reg_list (in/out)
:Returns: 0 on success; -1 on error
DISTRIBUTED LOCK MANAGER (DLM)
M: Christine Caulfield <ccaulfie@redhat.com>
M: David Teigland <teigland@redhat.com>
-L: cluster-devel@redhat.com
+L: gfs2@lists.linux.dev
S: Supported
W: http://sources.redhat.com/cluster/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm.git
GFS2 FILE SYSTEM
M: Bob Peterson <rpeterso@redhat.com>
M: Andreas Gruenbacher <agruenba@redhat.com>
-L: cluster-devel@redhat.com
+L: gfs2@lists.linux.dev
S: Supported
B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=gfs2
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gfs2/linux-gfs2.git
F: scripts/mk*
F: scripts/mod/
F: scripts/package/
+F: usr/
KERNEL HARDENING (not covered by other areas)
M: Kees Cook <keescook@chromium.org>
F: arch/x86/include/uapi/asm/vmx.h
F: arch/x86/kvm/
F: arch/x86/kvm/*/
+F: tools/testing/selftests/kvm/*/x86_64/
+F: tools/testing/selftests/kvm/x86_64/
KERNFS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
F: include/linux/mm.h
F: include/linux/mmzone.h
F: include/linux/pagewalk.h
+F: include/linux/rmap.h
F: include/trace/events/ksm.h
F: mm/
F: tools/mm/
F: Documentation/admin-guide/rtc.rst
F: Documentation/devicetree/bindings/rtc/
F: drivers/rtc/
-F: include/linux/platform_data/rtc-*
F: include/linux/rtc.h
F: include/linux/rtc/
F: include/uapi/linux/rtc.h
TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
M: Shenghao Ding <shenghao-ding@ti.com>
M: Kevin Lu <kevin-lu@ti.com>
-M: Baojun Xu <x1077012@ti.com>
+M: Baojun Xu <baojun.xu@ti.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
F: Documentation/devicetree/bindings/sound/tas2552.txt
# Installation targets should not require compiler. Unfortunately, vdso_install
# is an exception where build artifacts may be updated. This must be fixed.
no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
- headers_install modules_install kernelrelease image_name
-no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \
+ headers_install modules_install modules_sign kernelrelease image_name
+no-sync-config-targets := $(no-dot-config-targets) %install modules_sign kernelrelease \
image_name
single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.rsi %.s %.symtypes %/
YACC = bison
AWK = awk
INSTALLKERNEL := installkernel
-DEPMOD = depmod
PERL = perl
PYTHON3 = python3
CHECK = sparse
KBUILD_CFLAGS += -fno-common
KBUILD_CFLAGS += -fno-PIE
KBUILD_CFLAGS += -fno-strict-aliasing
-KBUILD_CFLAGS += -Wall
-KBUILD_CFLAGS += -Wundef
-KBUILD_CFLAGS += -Werror=implicit-function-declaration
-KBUILD_CFLAGS += -Werror=implicit-int
-KBUILD_CFLAGS += -Werror=return-type
-KBUILD_CFLAGS += -Werror=strict-prototypes
-KBUILD_CFLAGS += -Wno-format-security
-KBUILD_CFLAGS += -Wno-trigraphs
KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_RUSTFLAGS := $(rust_common_flags) \
endif # need-config
KBUILD_CFLAGS += -fno-delete-null-pointer-checks
-KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
-KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
-KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
KBUILD_CFLAGS += -O2
KBUILD_CFLAGS += -fno-reorder-blocks -fno-ipa-cp-clone -fno-partial-inlining
endif
-ifneq ($(CONFIG_FRAME_WARN),0)
-KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
-endif
-
stackp-flags-y := -fno-stack-protector
stackp-flags-$(CONFIG_STACKPROTECTOR) := -fstack-protector
stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG) := -fstack-protector-strong
KBUILD_CFLAGS += $(stackp-flags-y)
-KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
-KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
-KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
-
KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings
KBUILD_RUSTFLAGS += $(KBUILD_RUSTFLAGS-y)
-ifdef CONFIG_CC_IS_CLANG
-# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
-KBUILD_CFLAGS += -Wno-gnu
-else
-
-# gcc inanely warns about local variables called 'main'
-KBUILD_CFLAGS += -Wno-main
-endif
-
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
-KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
-
-# These result in bogus false positives
-KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
-
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
KBUILD_RUSTFLAGS += -Cforce-frame-pointers=y
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc
-# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
-KBUILD_CFLAGS += -Wvla
-
-# disable pointer signed / unsigned warnings in gcc 4.0
-KBUILD_CFLAGS += -Wno-pointer-sign
-
-# In order to make sure new function cast mismatches are not introduced
-# in the kernel (to avoid tripping CFI checking), the kernel should be
-# globally built with -Wcast-function-type.
-KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
-
# To gain proper coverage for CONFIG_UBSAN_BOUNDS and CONFIG_FORTIFY_SOURCE,
# the kernel uses only C99 flexible arrays for dynamically sized trailing
# arrays. Enforce this for everything that may examine structure sizes and
# perform bounds checking.
KBUILD_CFLAGS += $(call cc-option, -fstrict-flex-arrays=3)
-# disable stringop warnings in gcc 8+
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
-
-# We'll want to enable this eventually, but it's not going away for 5.7 at least
-KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
-
-# Another good warning that we'll want to enable eventually
-KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
-
-# Enabled with W=2, disabled by default as noisy
-ifdef CONFIG_CC_IS_GCC
-KBUILD_CFLAGS += -Wno-maybe-uninitialized
-endif
-
-# The allocators already balk at large sizes, so silence the compiler
-# warnings for bounds checks involving those possible values. While
-# -Wno-alloc-size-larger-than would normally be used here, earlier versions
-# of gcc (<9.1) weirdly don't handle the option correctly when _other_
-# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
-# doesn't work (as it is documented to), silently resolving to "0" prior to
-# version 9.1 (and producing an error more recently). Numeric values larger
-# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
-# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
-# choice, we must perform a versioned check to disable this warning.
-# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
-KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
-KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
-
# disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += -fno-strict-overflow
KBUILD_CFLAGS += -fconserve-stack
endif
-# Prohibit date/time macros, which would make the build non-deterministic
-KBUILD_CFLAGS += -Werror=date-time
-
-# enforce correct pointer usage
-KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
-
-# Require designated initializers for all marked structures
-KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
-
# change __FILE__ to the relative path from the srctree
KBUILD_CPPFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
modules_prepare: prepare
$(Q)$(MAKE) $(build)=scripts scripts/module.lds
-export modules_sign_only :=
-
-ifeq ($(CONFIG_MODULE_SIG),y)
-PHONY += modules_sign
-modules_sign: modules_install
- @:
-
-# modules_sign is a subset of modules_install.
-# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
-ifeq ($(filter modules_install,$(MAKECMDGOALS)),)
-modules_sign_only := y
-endif
-endif
-
endif # CONFIG_MODULES
-modinst_pre :=
-ifneq ($(filter modules_install,$(MAKECMDGOALS)),)
-modinst_pre := __modinst_pre
-endif
-
-modules_install: $(modinst_pre)
-PHONY += __modinst_pre
-__modinst_pre:
- @rm -rf $(MODLIB)/kernel
- @rm -f $(MODLIB)/source
- @mkdir -p $(MODLIB)
-ifdef CONFIG_MODULES
- @ln -s $(abspath $(srctree)) $(MODLIB)/source
- @if [ ! $(objtree) -ef $(MODLIB)/build ]; then \
- rm -f $(MODLIB)/build ; \
- ln -s $(CURDIR) $(MODLIB)/build ; \
- fi
- @sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order
-endif
- @cp -f modules.builtin $(MODLIB)/
- @cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
-
###
# Cleaning is done on three levels.
# make clean Delete most generated files
# make distclean Remove editor backup files, patch leftover files and the like
# Directories & files removed with 'make clean'
-CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \
+CLEAN_FILES += vmlinux.symvers modules-only.symvers \
modules.builtin modules.builtin.modinfo modules.nsdeps \
compile_commands.json .thinlto-cache rust/test rust/doc \
rust-project.json .vmlinux.objs .vmlinux.export.c
certs/signing_key.pem \
certs/x509.genkey \
vmlinux-gdb.py \
- *.spec rpmbuild \
+ kernel.spec rpmbuild \
rust/libmacros.so
# clean - Delete most, but leave enough to build external modules
@echo ' mrproper - Remove all generated files + config + various backup files'
@echo ' distclean - mrproper + remove editor backup and patch files'
@echo ''
- @echo 'Configuration targets:'
@$(MAKE) -f $(srctree)/scripts/kconfig/Makefile help
@echo ''
@echo 'Other generic targets:'
@echo ' rust-analyzer - generate rust-project.json rust-analyzer support file'
@echo ''
+ifndef CONFIG_MODULES
+modules modules_install: __external_modules_error
__external_modules_error:
@echo >&2 '***'
@echo >&2 '*** The present kernel disabled CONFIG_MODULES.'
@echo >&2 '*** You cannot build or install external modules.'
@echo >&2 '***'
@false
+endif
endif # KBUILD_EXTMOD
# ---------------------------------------------------------------------------
# Modules
-PHONY += modules modules_install modules_prepare
+PHONY += modules modules_install modules_sign modules_prepare
+
+modules_install:
+ $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst \
+ sign-only=$(if $(filter modules_install,$(MAKECMDGOALS)),,y)
+
+ifeq ($(CONFIG_MODULE_SIG),y)
+# modules_sign is a subset of modules_install.
+# 'make modules_install modules_sign' is equivalent to 'make modules_install'.
+modules_sign: modules_install
+ @:
+else
+modules_sign:
+ @echo >&2 '***'
+ @echo >&2 '*** CONFIG_MODULE_SIG is disabled. You cannot sign modules.'
+ @echo >&2 '***'
+ @false
+endif
ifdef CONFIG_MODULES
modules_check: $(MODORDER)
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/modules-check.sh $<
-quiet_cmd_depmod = DEPMOD $(MODLIB)
- cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
- $(KERNELRELEASE)
-
-modules_install:
- $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.modinst
- $(call cmd,depmod)
-
else # CONFIG_MODULES
-# Modules not configured
-# ---------------------------------------------------------------------------
-
-PHONY += __external_modules_error
-
-modules modules_install: __external_modules_error
+modules:
@:
KBUILD_MODULES :=
image_name:
@echo $(KBUILD_IMAGE)
+PHONY += run-command
+run-command:
+ $(Q)$(KBUILD_RUN_COMMAND)
+
quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)))
cmd_rmfiles = rm -rf $(rm-files)
generated-y += syscall_table.h
generic-y += agp.h
generic-y += asm-offsets.h
-generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
* arch/alpha/lib/callback_srm.S
*/
+#include <linux/export.h>
#include <asm/console.h>
-#include <asm/export.h>
.text
#define HWRPB_CRB_OFFSET 0xc0
*
* Zero an entire page.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.global clear_page
* a successful copy). There is also some rather minor exception setup
* stuff.
*/
-#include <asm/export.h>
+#include <linux/export.h>
/* Allow an exception for an insn; exit if we get one. */
#define EX(x,y...) \
*
* Copy an entire page.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.global copy_page
* exception setup stuff..
*/
-#include <asm/export.h>
+#include <linux/export.h>
/* Allow an exception for an insn; exit if we get one. */
#define EXI(x,y...) \
* added by Ivan Kokshaysky <ink@jurassic.park.msu.ru>
*/
-#include <asm/export.h>
+#include <linux/export.h>
.globl csum_ipv6_magic
.align 4
.ent csum_ipv6_magic
* $28 - compare status
*/
-#include <asm/export.h>
+#include <linux/export.h>
#define halt .long 0
/*
*
* Zero an entire page.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.global clear_page
* want to leave a hole (and we also want to avoid repeating lots of work)
*/
-#include <asm/export.h>
+#include <linux/export.h>
/* Allow an exception for an insn; exit if we get one. */
#define EX(x,y...) \
99: x,##y; \
destination pages are in the dcache, but it is my guess that this is
less important than the dcache miss case. */
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.global copy_page
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
*/
-#include <asm/export.h>
+#include <linux/export.h>
/* Allow an exception for an insn; exit if we get one. */
#define EXI(x,y...) \
99: x,##y; \
* may cause additional delay in rare cases (load-load replay traps).
*/
-#include <asm/export.h>
+#include <linux/export.h>
.globl csum_ipv6_magic
.align 4
.ent csum_ipv6_magic
* Try not to change the actual algorithm if possible for consistency.
*/
-#include <asm/export.h>
+#include <linux/export.h>
#define halt .long 0
/*
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
* Try not to change the actual algorithm if possible for consistency.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
* Temp usage notes:
* $1,$2, - scratch
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
* as fixes will need to be made in multiple places. The performance gain
* is worth it.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noat
.set noreorder
.text
* string once.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
* Try not to change the actual algorithm if possible for consistency.
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/regdef.h>
.set noreorder
* U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
* Try not to change the actual algorithm if possible for consistency.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
* L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/regdef.h>
.set noreorder
- only minimum number of quadwords may be accessed
- the third argument is an unsigned long
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
* This is hand-massaged output from the original memcpy.c. We defer to
* memcpy whenever possible; the backwards copy loops are not unrolled.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noat
.set noreorder
.text
* The scheduling comments are according to the EV5 documentation (and done by
* hand, so they might well be incorrect, please do tell me about it..)
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noat
.set noreorder
.text
*
* Append a null-terminated string from SRC to DST.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
* Return the address of a given character within a null-terminated
* string, or null if it is not found.
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/regdef.h>
.set noreorder
* Copy a null-terminated string from SRC to DST. Return a pointer
* to the null-terminator in the source.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 3
* do this instead of the 9 instructions that
* binary search needs).
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
* past count, whereas libc may write to count+1. This follows the generic
* implementation in lib/string.c and is, IMHO, more sensible.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 3
* version has cropped that bit o' nastiness as well as assuming that
* __stxncpy is in range of a branch.
*/
-#include <asm/export.h>
+#include <linux/export.h>
.set noat
.set noreorder
* Return the address of the last occurrence of a given character
* within a null-terminated string, or null if it is not found.
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/regdef.h>
.set noreorder
# along with GCC; see the file COPYING. If not, write to the
# Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA.
-#include <asm/export.h>
+#include <linux/export.h>
.set noreorder
.set noat
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select GENERIC_IOREMAP
+ select GENERIC_STRNCPY_FROM_USER if MMU
+ select GENERIC_STRNLEN_USER if MMU
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
kernel-user gutter)
config ARC_CURR_IN_REG
- bool "Dedicate Register r25 for current_task pointer"
+ bool "cache current task pointer in gp"
default y
help
- This reserved Register R25 to point to Current Task in
- kernel mode. This saves memory access for each such access
+ This reserves gp register to point to Current Task in
+ kernel mode eliding memory access for each access
config ARC_EMUL_UNALIGNED
endif
endif
-
ifdef CONFIG_ARC_CURR_IN_REG
# For a global register definition, make sure it gets passed to every file
# We had a customer reported bug where some code built in kernel was NOT using
-# any kernel headers, and missing the r25 global register
+# any kernel headers, and missing the global register
# Can't do unconditionally because of recursive include issues
# due to <linux/thread_info.h>
LINUXINCLUDE += -include $(srctree)/arch/arc/include/asm/current.h
+cflags-y += -ffixed-gp
endif
cflags-y += -fsection-anchors
# small data is default for elf32 tool-chain. If not usable, disable it
# This also allows repurposing GP as scratch reg to gcc reg allocator
disable_small_data := y
-cflags-$(disable_small_data) += -mno-sdata -fcall-used-gp
+cflags-$(disable_small_data) += -mno-sdata
cflags-$(CONFIG_CPU_BIG_ENDIAN) += -mbig-endian
ldflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB
#define ARC_REG_ICCM_BUILD 0x78 /* ICCM size (common) */
#define ARC_REG_XY_MEM_BCR 0x79
#define ARC_REG_MAC_BCR 0x7a
-#define ARC_REG_MUL_BCR 0x7b
+#define ARC_REG_MPY_BCR 0x7b
#define ARC_REG_SWAP_BCR 0x7c
#define ARC_REG_NORM_BCR 0x7d
#define ARC_REG_MIXMAX_BCR 0x7e
#endif
};
-struct bcr_uarch_build_arcv2 {
+struct bcr_uarch_build {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:8, prod:8, maj:8, min:8;
#else
#endif
};
+struct bcr_mmu_3 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
+ u_itlb:4, u_dtlb:4;
+#else
+ unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
+ ways:4, ver:8;
+#endif
+};
+
+struct bcr_mmu_4 {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
+ n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
+#else
+ /* DTLB ITLB JES JE JA */
+ unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
+ pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
+#endif
+};
+
+struct bcr_cache {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
+#else
+ unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
+#endif
+};
+
+struct bcr_slc_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:24, way:2, lsz:2, sz:4;
+#else
+ unsigned int sz:4, lsz:2, way:2, pad:24;
+#endif
+};
+
+struct bcr_clust_cfg {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
+#else
+ unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
+#endif
+};
+
+struct bcr_volatile {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned int start:4, limit:4, pad:22, order:1, disable:1;
+#else
+ unsigned int disable:1, order:1, pad:22, limit:4, start:4;
+#endif
+};
+
struct bcr_mpy {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int pad:8, x1616:8, dsp:4, cycles:2, type:2, ver:8;
#endif
};
-/*
- *******************************************************************
- * Generic structures to hold build configuration used at runtime
- */
-
-struct cpuinfo_arc_mmu {
- unsigned int ver:4, pg_sz_k:8, s_pg_sz_m:8, pad:10, sasid:1, pae:1;
- unsigned int sets:12, ways:4, u_dtlb:8, u_itlb:8;
-};
-
-struct cpuinfo_arc_cache {
- unsigned int sz_k:14, line_len:8, assoc:4, alias:1, vipt:1, pad:4;
-};
-
-struct cpuinfo_arc_bpu {
- unsigned int ver, full, num_cache, num_pred, ret_stk;
-};
-
-struct cpuinfo_arc_ccm {
- unsigned int base_addr, sz;
-};
-
-struct cpuinfo_arc {
- struct cpuinfo_arc_cache icache, dcache, slc;
- struct cpuinfo_arc_mmu mmu;
- struct cpuinfo_arc_bpu bpu;
- struct bcr_identity core;
- struct bcr_isa_arcv2 isa;
- const char *release, *name;
- unsigned int vec_base;
- struct cpuinfo_arc_ccm iccm, dccm;
- struct {
- unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2,
- fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4,
- ap_num:4, ap_full:1, smart:1, rtt:1, pad3:1,
- timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4;
- } extn;
- struct bcr_mpy extn_mpy;
-};
-
-extern struct cpuinfo_arc cpuinfo_arc700[];
-
static inline int is_isa_arcv2(void)
{
return IS_ENABLED(CONFIG_ISA_ARCV2);
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
- : "cc"); \
+ : "cc", "memory"); \
} \
#define ATOMIC_OP_RETURN(op, asm_op) \
: [val] "=&r" (val) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
- : "cc"); \
+ : "cc", "memory"); \
\
return val; \
}
[orig] "=&r" (orig) \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
- : "cc"); \
+ : "cc", "memory"); \
\
return orig; \
}
" bnz 1b \n" \
: "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
- : "cc"); \
+ : "cc", "memory"); \
} \
#define ATOMIC64_OP_RETURN(op, op1, op2) \
" bnz 1b \n" \
: [val] "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
- : "cc"); /* memory clobber comes from smp_mb() */ \
+ : "cc", "memory"); \
\
return val; \
}
" bnz 1b \n" \
: "=&r"(orig), "=&r"(val) \
: "r"(&v->counter), "ir"(a) \
- : "cc"); /* memory clobber comes from smp_mb() */ \
+ : "cc", "memory"); \
\
return orig; \
}
#ifdef CONFIG_ARC_CURR_IN_REG
-register struct task_struct *curr_arc asm("r25");
+register struct task_struct *curr_arc asm("gp");
#define current (curr_arc)
#else
#ifdef ARC_DW2_UNWIND_AS_CFI
-#define CFI_STARTPROC .cfi_startproc
-#define CFI_ENDPROC .cfi_endproc
-#define CFI_DEF_CFA .cfi_def_cfa
-#define CFI_REGISTER .cfi_register
-#define CFI_REL_OFFSET .cfi_rel_offset
-#define CFI_UNDEFINED .cfi_undefined
+#define CFI_STARTPROC .cfi_startproc
+#define CFI_ENDPROC .cfi_endproc
+#define CFI_DEF_CFA .cfi_def_cfa
+#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset
+#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register
+#define CFI_OFFSET .cfi_offset
+#define CFI_REL_OFFSET .cfi_rel_offset
+#define CFI_REGISTER .cfi_register
+#define CFI_RESTORE .cfi_restore
+#define CFI_UNDEFINED .cfi_undefined
#else
#define CFI_IGNORE #
-#define CFI_STARTPROC CFI_IGNORE
-#define CFI_ENDPROC CFI_IGNORE
-#define CFI_DEF_CFA CFI_IGNORE
-#define CFI_REGISTER CFI_IGNORE
-#define CFI_REL_OFFSET CFI_IGNORE
-#define CFI_UNDEFINED CFI_IGNORE
+#define CFI_STARTPROC CFI_IGNORE
+#define CFI_ENDPROC CFI_IGNORE
+#define CFI_DEF_CFA CFI_IGNORE
+#define CFI_DEF_CFA_OFFSET CFI_IGNORE
+#define CFI_DEF_CFA_REGISTER CFI_IGNORE
+#define CFI_OFFSET CFI_IGNORE
+#define CFI_REL_OFFSET CFI_IGNORE
+#define CFI_REGISTER CFI_IGNORE
+#define CFI_RESTORE CFI_IGNORE
+#define CFI_UNDEFINED CFI_IGNORE
#endif /* !ARC_DW2_UNWIND_AS_CFI */
* | orig_r0 |
* | event/ECR |
* | bta |
- * | user_r25 |
* | gp |
* | fp |
* | sp |
/*------------------------------------------------------------------------*/
.macro INTERRUPT_PROLOGUE
- ; (A) Before jumping to Interrupt Vector, hardware micro-ops did following:
+ ; Before jumping to Interrupt Vector, hardware micro-ops did following:
; 1. SP auto-switched to kernel mode stack
; 2. STATUS32.Z flag set if in U mode at time of interrupt (U:1,K:0)
; 3. Auto save: (mandatory) Push PC and STAT32 on stack
; hardware does even if CONFIG_ARC_IRQ_NO_AUTOSAVE
- ; 4. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
+ ; 4a. Auto save: (optional) r0-r11, blink, LPE,LPS,LPC, JLI,LDI,EI
;
- ; (B) Manually saved some regs: r12,r25,r30, sp,fp,gp, ACCL pair
+ ; Now
+ ; 4b. If Auto-save (optional) not enabled in hw, manually save them
+ ; 5. Manually save: r12,r30, sp,fp,gp, ACCL pair
+ ;
+ ; At the end, SP points to pt_regs
#ifdef CONFIG_ARC_IRQ_NO_AUTOSAVE
; carve pt_regs on stack (case #3), PC/STAT32 already on stack
.endm
/*------------------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
- ; (A) Before jumping to Exception Vector, hardware micro-ops did following:
+ ; Before jumping to Exception Vector, hardware micro-ops did following:
; 1. SP auto-switched to kernel mode stack
; 2. STATUS32.Z flag set if in U mode at time of exception (U:1,K:0)
;
- ; (B) Manually save the complete reg file below
+ ; Now manually save rest of reg file
+ ; At the end, SP points to pt_regs
- sub sp, sp, SZ_PT_REGS ; carve pt_regs
+ sub sp, sp, SZ_PT_REGS ; carve space for pt_regs
; _HARD saves r10 clobbered by _SOFT as scratch hence comes first
; OUTPUT: r10 has ECR expected by EV_Trap
.endm
+.macro EXCEPTION_PROLOGUE
+
+ EXCEPTION_PROLOGUE_KEEP_AE ; return ECR in r10
+
+ lr r0, [efa]
+ mov r1, sp
+
+ FAKE_RET_FROM_EXCPN ; clobbers r9
+.endm
+
/*------------------------------------------------------------------------
* This macro saves the registers manually which would normally be autosaved
* by hardware on taken interrupts. It is used by
*/
.macro __SAVE_REGFILE_SOFT
- ST2 gp, fp, PT_r26 ; gp (r26), fp (r27)
-
- st r12, [sp, PT_sp + 4]
- st r30, [sp, PT_sp + 8]
+ st fp, [sp, PT_fp] ; r27
+ st r30, [sp, PT_r30]
+ st r12, [sp, PT_r12]
+ st r26, [sp, PT_r26] ; gp
; Saving pt_regs->sp correctly requires some extra work due to the way
; Auto stack switch works
; ISA requires ADD.nz to have same dest and src reg operands
mov.nz r10, sp
- add.nz r10, r10, SZ_PT_REGS ; K mode SP
+ add2.nz r10, r10, SZ_PT_REGS/4 ; K mode SP
st r10, [sp, PT_sp] ; SP (pt_regs->sp)
-#ifdef CONFIG_ARC_CURR_IN_REG
- st r25, [sp, PT_user_r25]
- GET_CURR_TASK_ON_CPU r25
-#endif
-
#ifdef CONFIG_ARC_HAS_ACCL_REGS
ST2 r58, r59, PT_r58
#endif
/* clobbers r10, r11 registers pair */
DSP_SAVE_REGFILE_IRQ
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ GET_CURR_TASK_ON_CPU gp
+#endif
+
.endm
/*------------------------------------------------------------------------*/
.macro __RESTORE_REGFILE_SOFT
- LD2 gp, fp, PT_r26 ; gp (r26), fp (r27)
-
- ld r12, [sp, PT_r12]
+ ld fp, [sp, PT_fp]
ld r30, [sp, PT_r30]
+ ld r12, [sp, PT_r12]
+ ld r26, [sp, PT_r26]
; Restore SP (into AUX_USER_SP) only if returning to U mode
; - for K mode, it will be implicitly restored as stack is unwound
sr r10, [AUX_USER_SP]
1:
-#ifdef CONFIG_ARC_CURR_IN_REG
- ld r25, [sp, PT_user_r25]
-#endif
-
/* clobbers r10, r11 registers pair */
DSP_RESTORE_REGFILE_IRQ
btst r0, STATUS_U_BIT ; Z flag set if K, used in restoring SP
- ld r10, [sp, PT_event + 4]
+ ld r10, [sp, PT_bta]
sr r10, [erbta]
LD2 r10, r11, PT_ret
.macro FAKE_RET_FROM_EXCPN
lr r9, [status32]
- bic r9, r9, STATUS_AE_MASK
- or r9, r9, STATUS_IE_MASK
+ bclr r9, r9, STATUS_AE_BIT
+ bset r9, r9, STATUS_IE_BIT
kflag r9
.endm
*
* After this it is safe to call the "C" handlers
*-------------------------------------------------------------*/
-.macro EXCEPTION_PROLOGUE
+.macro EXCEPTION_PROLOGUE_KEEP_AE
/* Need at least 1 reg to code the early exception prologue */
PROLOG_FREEUP_REG r9, @ex_saved_reg1
/* ARC700 doesn't provide auto-stack switching */
SWITCH_TO_KERNEL_STK
-#ifdef CONFIG_ARC_CURR_IN_REG
- /* Treat r25 as scratch reg (save on stack) and load with "current" */
- PUSH r25
- GET_CURR_TASK_ON_CPU r25
-#else
- sub sp, sp, 4
-#endif
-
st.a r0, [sp, -8] /* orig_r0 needed for syscall (skip ECR slot) */
sub sp, sp, 4 /* skip pt_regs->sp, already saved above */
PUSHAX erbta
lr r10, [ecr]
- st r10, [sp, PT_event] /* EV_Trap expects r10 to have ECR */
+ st r10, [sp, PT_event]
+
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* gp already saved on stack: now load with "current" */
+ GET_CURR_TASK_ON_CPU gp
+#endif
+ ; OUTPUT: r10 has ECR expected by EV_Trap
+.endm
+
+.macro EXCEPTION_PROLOGUE
+
+ EXCEPTION_PROLOGUE_KEEP_AE ; return ECR in r10
+
+ lr r0, [efa]
+ mov r1, sp
+
+ FAKE_RET_FROM_EXCPN ; clobbers r9
.endm
/*--------------------------------------------------------------
POP gp
RESTORE_R12_TO_R0
-#ifdef CONFIG_ARC_CURR_IN_REG
- ld r25, [sp, 12]
-#endif
ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
+ /* orig_r0, ECR skipped automatically */
.endm
/* Dummy ECR values for Interrupts */
SWITCH_TO_KERNEL_STK
-#ifdef CONFIG_ARC_CURR_IN_REG
- /* Treat r25 as scratch reg (save on stack) and load with "current" */
- PUSH r25
- GET_CURR_TASK_ON_CPU r25
-#else
- sub sp, sp, 4
-#endif
PUSH 0x003\LVL\()abcd /* Dummy ECR */
sub sp, sp, 8 /* skip orig_r0 (not needed)
PUSHAX lp_start
PUSHAX bta_l\LVL\()
+#ifdef CONFIG_ARC_CURR_IN_REG
+ /* gp already saved on stack: now load with "current" */
+ GET_CURR_TASK_ON_CPU gp
+#endif
.endm
/*--------------------------------------------------------------
POP gp
RESTORE_R12_TO_R0
-#ifdef CONFIG_ARC_CURR_IN_REG
- ld r25, [sp, 12]
-#endif
- ld sp, [sp] /* restore original sp */
- /* orig_r0, ECR, user_r25 skipped automatically */
+ ld sp, [sp] /* restore original sp; orig_r0, ECR skipped implicitly */
.endm
/* Get thread_info of "current" tsk */
#include <asm/processor.h> /* For VMALLOC_START */
#include <asm/mmu.h>
+#ifdef __ASSEMBLY__
+
#ifdef CONFIG_ISA_ARCOMPACT
#include <asm/entry-compact.h> /* ISA specific bits */
#else
* Helpers to save/restore callee-saved regs:
* used by several macros below
*-------------------------------------------------------------*/
-.macro SAVE_R13_TO_R24
+.macro SAVE_R13_TO_R25
PUSH r13
PUSH r14
PUSH r15
PUSH r22
PUSH r23
PUSH r24
+ PUSH r25
.endm
-.macro RESTORE_R24_TO_R13
+.macro RESTORE_R25_TO_R13
+ POP r25
POP r24
POP r23
POP r22
POP r13
.endm
-/*--------------------------------------------------------------
- * Collect User Mode callee regs as struct callee_regs - needed by
- * fork/do_signal/unaligned-access-emulation.
- * (By default only scratch regs are saved on entry to kernel)
- *
- * Special handling for r25 if used for caching Task Pointer.
- * It would have been saved in task->thread.user_r25 already, but to keep
- * the interface same it is copied into regular r25 placeholder in
- * struct callee_regs.
- *-------------------------------------------------------------*/
+/*
+ * save user mode callee regs as struct callee_regs
+ * - needed by fork/do_signal/unaligned-access-emulation.
+ */
.macro SAVE_CALLEE_SAVED_USER
+ SAVE_R13_TO_R25
+.endm
- mov r12, sp ; save SP as ref to pt_regs
- SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
- ; Retrieve orig r25 and save it with rest of callee_regs
- ld r12, [r12, PT_user_r25]
- PUSH r12
-#else
- PUSH r25
-#endif
-
+/*
+ * restore user mode callee regs as struct callee_regs
+ * - could have been changed by ptrace tracer or unaligned-access fixup
+ */
+.macro RESTORE_CALLEE_SAVED_USER
+ RESTORE_R25_TO_R13
.endm
-/*--------------------------------------------------------------
- * Save kernel Mode callee regs at the time of Contect Switch.
- *
- * Special handling for r25 if used for caching Task Pointer.
- * Kernel simply skips saving it since it will be loaded with
- * incoming task pointer anyways
- *-------------------------------------------------------------*/
+/*
+ * save/restore kernel mode callee regs at the time of context switch
+ */
.macro SAVE_CALLEE_SAVED_KERNEL
-
- SAVE_R13_TO_R24
-
-#ifdef CONFIG_ARC_CURR_IN_REG
- sub sp, sp, 4
-#else
- PUSH r25
-#endif
+ SAVE_R13_TO_R25
.endm
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_KERNEL
- *-------------------------------------------------------------*/
.macro RESTORE_CALLEE_SAVED_KERNEL
-
-#ifdef CONFIG_ARC_CURR_IN_REG
- add sp, sp, 4 /* skip usual r25 placeholder */
-#else
- POP r25
-#endif
- RESTORE_R24_TO_R13
-.endm
-
-/*--------------------------------------------------------------
- * Opposite of SAVE_CALLEE_SAVED_USER
- *
- * ptrace tracer or unaligned-access fixup might have changed a user mode
- * callee reg which is saved back to usual r25 storage location
- *-------------------------------------------------------------*/
-.macro RESTORE_CALLEE_SAVED_USER
-
-#ifdef CONFIG_ARC_CURR_IN_REG
- POP r12
-#else
- POP r25
-#endif
- RESTORE_R24_TO_R13
-
- ; SP is back to start of pt_regs
-#ifdef CONFIG_ARC_CURR_IN_REG
- st r12, [sp, PT_user_r25]
-#endif
+ RESTORE_R25_TO_R13
.endm
/*--------------------------------------------------------------
#ifdef CONFIG_SMP
-/*-------------------------------------------------
+/*
* Retrieve the current running task on this CPU
- * 1. Determine curr CPU id.
- * 2. Use it to index into _current_task[ ]
+ * - loads it from backing _current_task[] (and can't use the
+ * caching reg for current task
*/
.macro GET_CURR_TASK_ON_CPU reg
GET_CPU_ID \reg
add2 \tmp, @_current_task, \tmp
st \tsk, [\tmp]
#ifdef CONFIG_ARC_CURR_IN_REG
- mov r25, \tsk
+ mov gp, \tsk
#endif
.endm
.macro SET_CURR_TASK_ON_CPU tsk, tmp
st \tsk, [@_current_task]
#ifdef CONFIG_ARC_CURR_IN_REG
- mov r25, \tsk
+ mov gp, \tsk
#endif
.endm
#endif /* SMP / UNI */
-/* ------------------------------------------------------------------
+/*
* Get the ptr to some field of Current Task at @off in task struct
- * -Uses r25 for Current task ptr if that is enabled
+ * - Uses current task cached in reg if enabled
*/
-
#ifdef CONFIG_ARC_CURR_IN_REG
.macro GET_CURR_TASK_FIELD_PTR off, reg
- add \reg, r25, \off
+ add \reg, gp, \off
.endm
#else
#endif /* CONFIG_ARC_CURR_IN_REG */
+#else /* !__ASSEMBLY__ */
+
+extern void do_signal(struct pt_regs *);
+extern void do_notify_resume(struct pt_regs *);
+extern int do_privilege_fault(unsigned long, struct pt_regs *);
+extern int do_extension_fault(unsigned long, struct pt_regs *);
+extern int insterror_is_error(unsigned long, struct pt_regs *);
+extern int do_memory_error(unsigned long, struct pt_regs *);
+extern int trap_is_brkpt(unsigned long, struct pt_regs *);
+extern int do_misaligned_error(unsigned long, struct pt_regs *);
+extern int do_trap5_error(unsigned long, struct pt_regs *);
+extern int do_misaligned_access(unsigned long, struct pt_regs *, struct callee_regs *);
+extern void do_machine_check_fault(unsigned long, struct pt_regs *);
+extern void do_non_swi_trap(unsigned long, struct pt_regs *);
+extern void do_insterror_or_kprobe(unsigned long, struct pt_regs *);
+extern void do_page_fault(unsigned long, struct pt_regs *);
+
+#endif
+
#endif /* __ASM_ARC_ENTRY_H */
#include <asm-generic/irq.h>
extern void arc_init_IRQ(void);
+extern void arch_do_IRQ(unsigned int, struct pt_regs *);
#endif
unsigned long asid[NR_CPUS]; /* 8 bit MMU PID + Generation cycle */
} mm_context_t;
+extern void do_tlb_overlap_fault(unsigned long, unsigned long, struct pt_regs *);
+
#endif
#include <asm/mmu-arcv2.h>
* struct thread_info
*/
struct thread_struct {
- unsigned long ksp; /* kernel mode stack pointer */
unsigned long callee_reg; /* pointer to callee regs */
unsigned long fault_address; /* dbls as brkpt holder as well */
#ifdef CONFIG_ARC_DSP_SAVE_RESTORE_REGS
#endif
};
-#define INIT_THREAD { \
- .ksp = sizeof(init_stack) + (unsigned long) init_stack, \
-}
+#define INIT_THREAD { }
/* Forward declaration, a strange C thing */
struct task_struct;
* Where about of Task's sp, fp, blink when it was last seen in kernel mode.
* Look in process.c for details of kernel stack layout
*/
-#define TSK_K_ESP(tsk) (tsk->thread.ksp)
+#define TSK_K_ESP(tsk) (task_thread_info(tsk)->ksp)
#define TSK_K_REG(tsk, off) (*((unsigned long *)(TSK_K_ESP(tsk) + \
sizeof(struct callee_regs) + off)))
#ifndef __ASSEMBLY__
+typedef union {
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ unsigned long state:8, vec:8, cause:8, param:8;
+#else
+ unsigned long param:8, cause:8, vec:8, state:8;
+#endif
+ };
+ unsigned long full;
+} ecr_reg;
+
/* THE pt_regs: Defines how regs are saved during entry into kernel */
#ifdef CONFIG_ISA_ARCOMPACT
* Last word used by Linux for extra state mgmt (syscall-restart)
* For interrupts, use artificial ECR values to note current prio-level
*/
- union {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned long state:8, ecr_vec:8,
- ecr_cause:8, ecr_param:8;
-#else
- unsigned long ecr_param:8, ecr_cause:8,
- ecr_vec:8, state:8;
-#endif
- };
- unsigned long event;
- };
-
- unsigned long user_r25;
+ ecr_reg ecr;
};
-#define MAX_REG_OFFSET offsetof(struct pt_regs, user_r25)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, ecr)
#else
unsigned long orig_r0;
- union {
- struct {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned long state:8, ecr_vec:8,
- ecr_cause:8, ecr_param:8;
-#else
- unsigned long ecr_param:8, ecr_cause:8,
- ecr_vec:8, state:8;
-#endif
- };
- unsigned long event;
- };
-
- unsigned long bta; /* bta_l1, bta_l2, erbta */
+ ecr_reg ecr; /* Exception Cause Reg */
- unsigned long user_r25;
+ unsigned long bta; /* erbta */
- unsigned long r26; /* gp */
unsigned long fp;
- unsigned long sp; /* user/kernel sp depending on where we came from */
-
- unsigned long r12, r30;
+ unsigned long r30;
+ unsigned long r12;
+ unsigned long r26; /* gp */
#ifdef CONFIG_ARC_HAS_ACCL_REGS
unsigned long r58, r59; /* ACCL/ACCH used by FPU / DSP MPY */
unsigned long DSP_CTRL;
#endif
+ unsigned long sp; /* user/kernel sp depending on entry */
+
/*------- Below list auto saved by h/w -----------*/
unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
/* return 1 if PC in delay slot */
#define delay_mode(regs) ((regs->status32 & STATUS_DE_MASK) == STATUS_DE_MASK)
-#define in_syscall(regs) ((regs->ecr_vec == ECR_V_TRAP) && !regs->ecr_param)
-#define in_brkpt_trap(regs) ((regs->ecr_vec == ECR_V_TRAP) && regs->ecr_param)
+#define in_syscall(regs) ((regs->ecr.vec == ECR_V_TRAP) && !regs->ecr.param)
+#define in_brkpt_trap(regs) ((regs->ecr.vec == ECR_V_TRAP) && regs->ecr.param)
#define STATE_SCALL_RESTARTED 0x01
-#define syscall_wont_restart(reg) (reg->state |= STATE_SCALL_RESTARTED)
-#define syscall_restartable(reg) !(reg->state & STATE_SCALL_RESTARTED)
+#define syscall_wont_restart(regs) (regs->ecr.state |= STATE_SCALL_RESTARTED)
+#define syscall_restartable(regs) !(regs->ecr.state & STATE_SCALL_RESTARTED)
#define current_pt_regs() \
({ \
return *(unsigned long *)((unsigned long)regs + offset);
}
+extern int syscall_trace_entry(struct pt_regs *);
+extern void syscall_trace_exit(struct pt_regs *);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_PTRACE_H */
#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2))
extern void arc_mmu_init(void);
-extern char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_mmu_bcr(void);
+extern int arc_mmu_mumbojumbo(int cpu_id, char *buf, int len);
extern void arc_cache_init(void);
-extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
-extern void read_decode_cache_bcr(void);
+extern int arc_cache_mumbojumbo(int cpu_id, char *buf, int len);
+
+extern void __init handle_uboot_args(void);
#endif /* __ASMARC_SETUP_H */
extern void __init smp_init_cpus(void);
extern void first_lines_of_secondary(void);
extern const char *arc_platform_smp_cpuinfo(void);
+extern void arc_platform_smp_wait_to_boot(int);
+extern void start_kernel_secondary(void);
/*
* API expected BY platform smp code (FROM arch smp code)
*/
struct thread_info {
unsigned long flags; /* low level flags */
+ unsigned long ksp; /* kernel mode stack top in __switch_to */
int preempt_count; /* 0 => preemptable, <0 => BUG */
- struct task_struct *task; /* main task structure */
- __u32 cpu; /* current CPU */
+ int cpu; /* current CPU */
unsigned long thr_ptr; /* TLS ptr */
+ struct task_struct *task; /* main task structure */
};
/*
- * macros/functions for gaining access to the thread information structure
- *
- * preempt_count needs to be 1 initially, until the scheduler is functional.
+ * initilaize thread_info for any @tsk
+ * - this is not related to init_task per se
*/
#define INIT_THREAD_INFO(tsk) \
{ \
if (n == 0)
return 0;
- /* unaligned */
- if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+ /* fallback for unaligned access when hardware doesn't support */
+ if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+ (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
unsigned char tmp;
if (n == 0)
return 0;
- /* unaligned */
- if (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3)) {
+ /* fallback for unaligned access when hardware doesn't support */
+ if (!IS_ENABLED(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) &&
+ (((unsigned long)to & 0x3) || ((unsigned long)from & 0x3))) {
unsigned char tmp;
return res;
}
-static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
+static inline unsigned long __clear_user(void __user *to, unsigned long n)
{
long res = n;
unsigned char *d_char = to;
return res;
}
-#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
#define INLINE_COPY_TO_USER
#define INLINE_COPY_FROM_USER
-#define __clear_user(d, n) __arc_clear_user(d, n)
-#else
-extern unsigned long arc_clear_user_noinline(void __user *to,
- unsigned long n);
-#define __clear_user(d, n) arc_clear_user_noinline(d, n)
-#endif
+#define __clear_user __clear_user
#include <asm-generic/uaccess.h>
obj-y := head.o arcksyms.o setup.o irq.o reset.o ptrace.o process.o devtree.o
obj-y += signal.o traps.o sys.o troubleshoot.o stacktrace.o disasm.o
+obj-y += ctx_sw_asm.o
+
obj-$(CONFIG_ISA_ARCOMPACT) += entry-compact.o intc-compact.o
obj-$(CONFIG_ISA_ARCV2) += entry-arcv2.o intc-arcv2.o
CFLAGS_fpu.o += -mdpfp
endif
-ifdef CONFIG_ARC_DW2_UNWIND
-CFLAGS_ctx_sw.o += -fno-omit-frame-pointer
-obj-y += ctx_sw.o
-else
-obj-y += ctx_sw_asm.o
-endif
-
extra-y := vmlinux.lds
BLANK();
- DEFINE(THREAD_KSP, offsetof(struct thread_struct, ksp));
DEFINE(THREAD_CALLEE_REG, offsetof(struct thread_struct, callee_reg));
DEFINE(THREAD_FAULT_ADDR,
offsetof(struct thread_struct, fault_address));
BLANK();
+ DEFINE(THREAD_INFO_KSP, offsetof(struct thread_info, ksp));
DEFINE(THREAD_INFO_FLAGS, offsetof(struct thread_info, flags));
DEFINE(THREAD_INFO_PREEMPT_COUNT,
offsetof(struct thread_info, preempt_count));
BLANK();
DEFINE(PT_status32, offsetof(struct pt_regs, status32));
- DEFINE(PT_event, offsetof(struct pt_regs, event));
+ DEFINE(PT_event, offsetof(struct pt_regs, ecr));
+ DEFINE(PT_bta, offsetof(struct pt_regs, bta));
DEFINE(PT_sp, offsetof(struct pt_regs, sp));
DEFINE(PT_r0, offsetof(struct pt_regs, r0));
DEFINE(PT_r1, offsetof(struct pt_regs, r1));
DEFINE(PT_r26, offsetof(struct pt_regs, r26));
DEFINE(PT_ret, offsetof(struct pt_regs, ret));
DEFINE(PT_blink, offsetof(struct pt_regs, blink));
+ OFFSET(PT_fp, pt_regs, fp);
DEFINE(PT_lpe, offsetof(struct pt_regs, lp_end));
DEFINE(PT_lpc, offsetof(struct pt_regs, lp_count));
- DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
-
- DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
- DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
-
#ifdef CONFIG_ISA_ARCV2
OFFSET(PT_r12, pt_regs, r12);
OFFSET(PT_r30, pt_regs, r30);
OFFSET(PT_DSP_CTRL, pt_regs, DSP_CTRL);
#endif
+ DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
+ DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
+
return 0;
}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * Vineetg: Aug 2009
- * -"C" version of lowest level context switch asm macro called by schedular
- * gcc doesn't generate the dward CFI info for hand written asm, hence can't
- * backtrace out of it (e.g. tasks sleeping in kernel).
- * So we cheat a bit by writing almost similar code in inline-asm.
- * -This is a hacky way of doing things, but there is no other simple way.
- * I don't want/intend to extend unwinding code to understand raw asm
- */
-
-#include <asm/asm-offsets.h>
-#include <linux/sched.h>
-#include <linux/sched/debug.h>
-
-#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4)
-
-struct task_struct *__sched
-__switch_to(struct task_struct *prev_task, struct task_struct *next_task)
-{
- unsigned int tmp;
- unsigned int prev = (unsigned int)prev_task;
- unsigned int next = (unsigned int)next_task;
-
- __asm__ __volatile__(
- /* FP/BLINK save generated by gcc (standard function prologue */
- "st.a r13, [sp, -4] \n\t"
- "st.a r14, [sp, -4] \n\t"
- "st.a r15, [sp, -4] \n\t"
- "st.a r16, [sp, -4] \n\t"
- "st.a r17, [sp, -4] \n\t"
- "st.a r18, [sp, -4] \n\t"
- "st.a r19, [sp, -4] \n\t"
- "st.a r20, [sp, -4] \n\t"
- "st.a r21, [sp, -4] \n\t"
- "st.a r22, [sp, -4] \n\t"
- "st.a r23, [sp, -4] \n\t"
- "st.a r24, [sp, -4] \n\t"
-#ifndef CONFIG_ARC_CURR_IN_REG
- "st.a r25, [sp, -4] \n\t"
-#else
- "sub sp, sp, 4 \n\t" /* usual r25 placeholder */
-#endif
-
- /* set ksp of outgoing task in tsk->thread.ksp */
-#if KSP_WORD_OFF <= 255
- "st.as sp, [%3, %1] \n\t"
-#else
- /*
- * Workaround for NR_CPUS=4k
- * %1 is bigger than 255 (S9 offset for st.as)
- */
- "add2 r24, %3, %1 \n\t"
- "st sp, [r24] \n\t"
-#endif
-
- /*
- * setup _current_task with incoming tsk.
- * optionally, set r25 to that as well
- * For SMP extra work to get to &_current_task[cpu]
- * (open coded SET_CURR_TASK_ON_CPU)
- */
-#ifndef CONFIG_SMP
- "st %2, [@_current_task] \n\t"
-#else
- "lr r24, [identity] \n\t"
- "lsr r24, r24, 8 \n\t"
- "bmsk r24, r24, 7 \n\t"
- "add2 r24, @_current_task, r24 \n\t"
- "st %2, [r24] \n\t"
-#endif
-#ifdef CONFIG_ARC_CURR_IN_REG
- "mov r25, %2 \n\t"
-#endif
-
- /* get ksp of incoming task from tsk->thread.ksp */
- "ld.as sp, [%2, %1] \n\t"
-
- /* start loading it's CALLEE reg file */
-
-#ifndef CONFIG_ARC_CURR_IN_REG
- "ld.ab r25, [sp, 4] \n\t"
-#else
- "add sp, sp, 4 \n\t"
-#endif
- "ld.ab r24, [sp, 4] \n\t"
- "ld.ab r23, [sp, 4] \n\t"
- "ld.ab r22, [sp, 4] \n\t"
- "ld.ab r21, [sp, 4] \n\t"
- "ld.ab r20, [sp, 4] \n\t"
- "ld.ab r19, [sp, 4] \n\t"
- "ld.ab r18, [sp, 4] \n\t"
- "ld.ab r17, [sp, 4] \n\t"
- "ld.ab r16, [sp, 4] \n\t"
- "ld.ab r15, [sp, 4] \n\t"
- "ld.ab r14, [sp, 4] \n\t"
- "ld.ab r13, [sp, 4] \n\t"
-
- /* last (ret value) = prev : although for ARC it mov r0, r0 */
- "mov %0, %3 \n\t"
-
- /* FP/BLINK restore generated by gcc (standard func epilogue */
-
- : "=r"(tmp)
- : "n"(KSP_WORD_OFF), "r"(next), "r"(prev)
- : "blink"
- );
-
- return (struct task_struct *)tmp;
-}
#include <asm/entry.h> /* For the SAVE_* macros */
#include <asm/asm-offsets.h>
-#define KSP_WORD_OFF ((TASK_THREAD + THREAD_KSP) / 4)
-
-;################### Low Level Context Switch ##########################
+; IN
+; - r0: prev task (also current)
+; - r1: next task
+; OUT
+; - r0: prev task (so r0 not touched)
.section .sched.text,"ax",@progbits
- .align 4
- .global __switch_to
- .type __switch_to, @function
-__switch_to:
- CFI_STARTPROC
-
- /* Save regs on kernel mode stack of task */
- st.a blink, [sp, -4]
- st.a fp, [sp, -4]
- SAVE_CALLEE_SAVED_KERNEL
+ENTRY_CFI(__switch_to)
- /* Save the now KSP in task->thread.ksp */
-#if KSP_WORD_OFF <= 255
- st.as sp, [r0, KSP_WORD_OFF]
-#else
- /* Workaround for NR_CPUS=4k as ST.as can only take s9 offset */
- add2 r24, r0, KSP_WORD_OFF
- st sp, [r24]
-#endif
- /*
- * Return last task in r0 (return reg)
- * On ARC, Return reg = First Arg reg = r0.
- * Since we already have last task in r0,
- * don't need to do anything special to return it
- */
+ /* save kernel stack frame regs of @prev task */
+ push blink
+ CFI_DEF_CFA_OFFSET 4
+ CFI_OFFSET r31, -4
+
+ push fp
+ CFI_DEF_CFA_OFFSET 8
+ CFI_OFFSET r27, -8
+
+ mov fp, sp
+ CFI_DEF_CFA_REGISTER r27
+
+ /* kernel mode callee regs of @prev */
+ SAVE_CALLEE_SAVED_KERNEL
/*
- * switch to new task, contained in r1
- * Temp reg r3 is required to get the ptr to store val
+ * save final SP to @prev->thread_info.ksp
+ * @prev is "current" so thread_info derived from SP
*/
- SET_CURR_TASK_ON_CPU r1, r3
+ GET_CURR_THR_INFO_FROM_SP r10
+ st sp, [r10, THREAD_INFO_KSP]
+
+ /* update @next in _current_task[] and GP register caching it */
+ SET_CURR_TASK_ON_CPU r1, r10
- /* reload SP with kernel mode stack pointer in task->thread.ksp */
- ld.as sp, [r1, (TASK_THREAD + THREAD_KSP)/4]
+ /* load SP from @next->thread_info.ksp */
+ ld r10, [r1, TASK_THREAD_INFO]
+ ld sp, [r10, THREAD_INFO_KSP]
- /* restore the registers */
+ /* restore callee regs, stack frame regs of @next */
RESTORE_CALLEE_SAVED_KERNEL
- ld.ab fp, [sp, 4]
- ld.ab blink, [sp, 4]
- j [blink]
+ pop fp
+ CFI_RESTORE r27
+ CFI_DEF_CFA r28, 4
+
+ pop blink
+ CFI_RESTORE r31
+ CFI_DEF_CFA_OFFSET 0
+
+ j [blink]
END_CFI(__switch_to)
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <asm/mach_desc.h>
+#include <asm/serial.h>
#ifdef CONFIG_SERIAL_EARLYCON
EXCEPTION_PROLOGUE
- lr r0, [efa]
- mov r1, sp
-
- FAKE_RET_FROM_EXCPN
-
bl do_memory_error
b ret_from_exception
END(mem_service)
EXCEPTION_PROLOGUE
- lr r0, [efa] ; Faulting Data address
- mov r1, sp
-
- FAKE_RET_FROM_EXCPN
-
SAVE_CALLEE_SAVED_USER
mov r2, sp ; callee_regs
EXCEPTION_PROLOGUE
- lr r0, [efa] ; Faulting Data address
- mov r1, sp ; pt_regs
-
- FAKE_RET_FROM_EXCPN
-
mov blink, ret_from_exception
b do_page_fault
ENTRY(EV_TLBProtV)
- EXCEPTION_PROLOGUE
-
- mov r2, r10 ; ECR set into r10 already
- lr r0, [efa] ; Faulting Data address (not part of pt_regs saved above)
-
- ; Exception auto-disables further Intr/exceptions.
- ; Re-enable them by pretending to return from exception
- ; (so rest of handler executes in pure K mode)
-
- FAKE_RET_FROM_EXCPN
-
- mov r1, sp ; Handle to pt_regs
+ EXCEPTION_PROLOGUE ; ECR returned in r10
;------ (5) Type of Protection Violation? ----------
;
; -Access Violation : 00_23_(00|01|02|03)_00
; x r w r+w
; -Unaligned Access : 00_23_04_00
- ;
- bbit1 r2, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
+ bbit1 r10, ECR_C_BIT_PROTV_MISALIG_DATA, 4f
;========= (6a) Access Violation Processing ========
bl do_page_fault
ENTRY(call_do_page_fault)
EXCEPTION_PROLOGUE
- lr r0, [efa] ; Faulting Data address
- mov r1, sp
- FAKE_RET_FROM_EXCPN
mov blink, ret_from_exception
b do_page_fault
EXCEPTION_PROLOGUE
- lr r0, [efa]
- mov r1, sp
-
- FAKE_RET_FROM_EXCPN
-
bl do_insterror_or_kprobe
b ret_from_exception
END(instr_service)
ENTRY(EV_MachineCheck)
- EXCEPTION_PROLOGUE
+ EXCEPTION_PROLOGUE_KEEP_AE ; ECR returned in r10
- lr r2, [ecr]
lr r0, [efa]
mov r1, sp
; MC excpetions disable MMU
ARC_MMU_REENABLE r3
- lsr r3, r2, 8
+ lsr r3, r10, 8
bmsk r3, r3, 7
brne r3, ECR_C_MCHK_DUP_TLB, 1f
EXCEPTION_PROLOGUE
- lr r0, [efa]
- mov r1, sp
-
- FAKE_RET_FROM_EXCPN
-
bl do_privilege_fault
b ret_from_exception
END(EV_PrivilegeV)
EXCEPTION_PROLOGUE
- lr r0, [efa]
- mov r1, sp
-
- FAKE_RET_FROM_EXCPN
-
bl do_extension_fault
b ret_from_exception
END(EV_Extension)
; syscall Tracing
; ---------------------------------------------
tracesys:
- ; save EFA in case tracer wants the PC of traced task
- ; using ERET won't work since next-PC has already committed
+ ; safekeep EFA (r12) if syscall tracer wanted PC
+ ; for traps, ERET is pre-commit so points to next-PC
GET_CURR_TASK_FIELD_PTR TASK_THREAD, r11
st r12, [r11, THREAD_FAULT_ADDR] ; thread.fault_address
- ; PRE Sys Call Ptrace hook
- mov r0, sp ; pt_regs needed
- bl @syscall_trace_entry
+ ; PRE syscall trace hook
+ mov r0, sp ; pt_regs
+ bl @syscall_trace_enter
; Tracing code now returns the syscall num (orig or modif)
mov r8, r0
; Do the Sys Call as we normally would.
- ; Validate the Sys Call number
cmp r8, NR_syscalls - 1
mov.hi r0, -ENOSYS
bhi tracesys_exit
ld r6, [sp, PT_r6]
ld r7, [sp, PT_r7]
ld.as r9, [sys_call_table, r8]
- jl [r9] ; Entry into Sys Call Handler
+ jl [r9]
tracesys_exit:
- st r0, [sp, PT_r0] ; sys call return value in pt_regs
+ st r0, [sp, PT_r0]
- ;POST Sys Call Ptrace Hook
+ ; POST syscall trace hook
mov r0, sp ; pt_regs needed
bl @syscall_trace_exit
- b ret_from_exception ; NOT ret_from_system_call at is saves r0 which
- ; we'd done before calling post hook above
+
+ ; don't call ret_from_system_call as it saves r0, already done above
+ b ret_from_exception
; ---------------------------------------------
; Breakpoint TRAP
; ---------------------------------------------
trap_with_param:
mov r0, r12 ; EFA in case ptracer/gdb wants stop_pc
- mov r1, sp
+ mov r1, sp ; pt_regs
- ; Save callee regs in case gdb wants to have a look
- ; SP will grow up by size of CALLEE Reg-File
- ; NOTE: clobbers r12
+ ; save callee regs in case tracer/gdb wants to peek
SAVE_CALLEE_SAVED_USER
- ; save location of saved Callee Regs @ thread_struct->pc
+ ; safekeep ref to callee regs
GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10
st sp, [r10, THREAD_CALLEE_REG]
- ; Call the trap handler
+ ; call the non syscall trap handler
bl do_non_swi_trap
- ; unwind stack to discard Callee saved Regs
+ ; unwind stack to discard callee regs
DISCARD_CALLEE_SAVED_USER
b ret_from_exception
ENTRY(EV_Trap)
- EXCEPTION_PROLOGUE
+ EXCEPTION_PROLOGUE_KEEP_AE
lr r12, [efa]
FAKE_RET_FROM_EXCPN
- ;============ TRAP 1 :breakpoints
- ; Check ECR for trap with arg (PROLOGUE ensures r10 has ECR)
+ ;============ TRAP N : breakpoints, kprobes etc
bmsk.f 0, r10, 7
bnz trap_with_param
- ;============ TRAP (no param): syscall top level
+ ;============ TRAP 0 (no param): syscall
- ; If syscall tracing ongoing, invoke pre-post-hooks
+ ; syscall tracing ongoing, invoke pre-post-hooks around syscall
GET_CURR_THR_INFO_FLAGS r10
and.f 0, r10, _TIF_SYSCALL_WORK
bnz tracesys ; this never comes back
;============ Normal syscall case
- ; syscall num shd not exceed the total system calls avail
cmp r8, NR_syscalls - 1
mov.hi r0, -ENOSYS
bhi .Lret_from_system_call
- ; Offset into the syscall_table and call handler
ld.as r9,[sys_call_table, r8]
- jl [r9] ; Entry into Sys Call Handler
+ jl [r9]
.Lret_from_system_call:
-
st r0, [sp, PT_r0] ; sys call return value in pt_regs
; fall through to ret_from_exception
; tracer might call PEEKUSR(CALLEE reg)
;
; NOTE: SP will grow up by size of CALLEE Reg-File
- SAVE_CALLEE_SAVED_USER ; clobbers r12
+ SAVE_CALLEE_SAVED_USER
; save location of saved Callee Regs @ thread_struct->callee
GET_CURR_TASK_FIELD_PTR TASK_THREAD, r10
write_aux_reg(AUX_IRQ_ENABLE, 1);
}
-void arcv2_irq_enable(struct irq_data *data)
+static void arcv2_irq_enable(struct irq_data *data)
{
/* set default priority */
write_aux_reg(AUX_IRQ_SELECT, data->hwirq);
* with trap_s 4 (compiled) breakpoints, continuation needs to
* start after the breakpoint.
*/
- if (regs->ecr_param == 3)
+ if (regs->ecr.param == 3)
instruction_pointer(regs) -= BREAK_INSTR_SIZE;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
IS_AVAIL1(mp.idu, "IDU "),
IS_AVAIL1(mp.dbg, "DEBUG "),
IS_AVAIL1(mp.gfrc, "GFRC"));
-
- cpuinfo_arc700[0].extn.gfrc = mp.gfrc;
}
struct plat_smp_ops plat_smp_ops = {
* | unused |
* | |
* ------------------
- * | r25 | <==== top of Stack (thread.ksp)
+ * | r25 | <==== top of Stack (thread_info.ksp)
* ~ ~
* | --to-- | (CALLEE Regs of kernel mode)
* | r13 |
* | SP |
* | orig_r0 |
* | event/ECR |
- * | user_r25 |
* ------------------ <===== END of PAGE
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
c_callee = ((struct callee_regs *)childksp) - 1;
/*
- * __switch_to() uses thread.ksp to start unwinding stack
+ * __switch_to() uses thread_info.ksp to start unwinding stack
* For kernel threads we don't need to create callee regs, the
* stack layout nevertheless needs to remain the same.
* Also, since __switch_to anyways unwinds callee regs, we use
* this to populate kernel thread entry-pt/args into callee regs,
* so that ret_from_kernel_thread() becomes simpler.
*/
- p->thread.ksp = (unsigned long)c_callee; /* THREAD_KSP */
+ task_thread_info(p)->ksp = (unsigned long)c_callee; /* THREAD_INFO_KSP */
/* __switch_to expects FP(0), BLINK(return addr) at top */
childksp[0] = 0; /* fp */
*/
c_callee->r25 = task_thread_info(p)->thr_ptr;
-#ifdef CONFIG_ARC_CURR_IN_REG
- /*
- * setup usermode thread pointer #2:
- * however for this special use of r25 in kernel, __switch_to() sets
- * r25 for kernel needs and only in the final return path is usermode
- * r25 setup, from pt_regs->user_r25. So set that up as well
- */
- c_regs->user_r25 = c_callee->r25;
-#endif
-
return 0;
}
REG_OFFSET_NAME(r0),
REG_OFFSET_NAME(sp),
REG_OFFSET_NAME(orig_r0),
- REG_OFFSET_NAME(event),
- REG_OFFSET_NAME(user_r25),
+ REG_OFFSET_NAME(ecr),
REG_OFFSET_END,
};
static const struct pt_regs_offset regoffset_table[] = {
REG_OFFSET_NAME(orig_r0),
- REG_OFFSET_NAME(event),
+ REG_OFFSET_NAME(ecr),
REG_OFFSET_NAME(bta),
- REG_OFFSET_NAME(user_r25),
REG_OFFSET_NAME(r26),
REG_OFFSET_NAME(fp),
REG_OFFSET_NAME(sp),
return ret;
}
-asmlinkage int syscall_trace_entry(struct pt_regs *regs)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
{
if (test_thread_flag(TIF_SYSCALL_TRACE))
if (ptrace_report_syscall_entry(regs))
#include <asm/mach_desc.h>
#include <asm/smp.h>
#include <asm/dsp-impl.h>
+#include <soc/arc/mcip.h>
#define FIX_PTR(x) __asm__ __volatile__(";" : "+r"(x))
struct task_struct *_current_task[NR_CPUS]; /* For stack switching */
-struct cpuinfo_arc cpuinfo_arc700[NR_CPUS];
+struct cpuinfo_arc {
+ int arcver;
+ unsigned int t0:1, t1:1;
+ struct {
+ unsigned long base;
+ unsigned int sz;
+ } iccm, dccm;
+};
+
+#ifdef CONFIG_ISA_ARCV2
-static const struct id_to_str arc_legacy_rel[] = {
+static const struct id_to_str arc_hs_rel[] = {
/* ID.ARCVER, Release */
-#ifdef CONFIG_ISA_ARCOMPACT
- { 0x34, "R4.10"},
- { 0x35, "R4.11"},
-#else
{ 0x51, "R2.0" },
{ 0x52, "R2.1" },
{ 0x53, "R3.0" },
-#endif
- { 0x00, NULL }
};
static const struct id_to_str arc_hs_ver54_rel[] = {
{ 3, "R4.00a"},
{ 0xFF, NULL }
};
+#endif
-static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu)
+static int
+arcompact_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
{
- if (is_isa_arcompact()) {
- struct bcr_iccm_arcompact iccm;
- struct bcr_dccm_arcompact dccm;
+ int n = 0;
+#ifdef CONFIG_ISA_ARCOMPACT
+ char *cpu_nm, *isa_nm = "ARCompact";
+ struct bcr_fp_arcompact fpu_sp, fpu_dp;
+ int atomic = 0, be, present;
+ int bpu_full, bpu_cache, bpu_pred;
+ struct bcr_bpu_arcompact bpu;
+ struct bcr_iccm_arcompact iccm;
+ struct bcr_dccm_arcompact dccm;
+ struct bcr_generic isa;
- READ_BCR(ARC_REG_ICCM_BUILD, iccm);
- if (iccm.ver) {
- cpu->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
- cpu->iccm.base_addr = iccm.base << 16;
- }
+ READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
- READ_BCR(ARC_REG_DCCM_BUILD, dccm);
- if (dccm.ver) {
- unsigned long base;
- cpu->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
+ if (!isa.ver) /* ISA BCR absent, use Kconfig info */
+ atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
+ else {
+ /* ARC700_BUILD only has 2 bits of isa info */
+ atomic = isa.info & 1;
+ }
- base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
- cpu->dccm.base_addr = base & ~0xF;
- }
- } else {
- struct bcr_iccm_arcv2 iccm;
- struct bcr_dccm_arcv2 dccm;
- unsigned long region;
-
- READ_BCR(ARC_REG_ICCM_BUILD, iccm);
- if (iccm.ver) {
- cpu->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */
- if (iccm.sz00 == 0xF && iccm.sz01 > 0)
- cpu->iccm.sz <<= iccm.sz01;
-
- region = read_aux_reg(ARC_REG_AUX_ICCM);
- cpu->iccm.base_addr = region & 0xF0000000;
- }
+ be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
- READ_BCR(ARC_REG_DCCM_BUILD, dccm);
- if (dccm.ver) {
- cpu->dccm.sz = 256 << dccm.sz0;
- if (dccm.sz0 == 0xF && dccm.sz1 > 0)
- cpu->dccm.sz <<= dccm.sz1;
+ if (info->arcver < 0x34)
+ cpu_nm = "ARC750";
+ else
+ cpu_nm = "ARC770";
- region = read_aux_reg(ARC_REG_AUX_DCCM);
- cpu->dccm.base_addr = region & 0xF0000000;
- }
- }
-}
+ n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s%s%s\n",
+ c, cpu_nm, isa_nm,
+ IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+ IS_AVAIL1(be, "[Big-Endian]"));
-static void decode_arc_core(struct cpuinfo_arc *cpu)
-{
- struct bcr_uarch_build_arcv2 uarch;
- const struct id_to_str *tbl;
-
- if (cpu->core.family < 0x54) { /* includes arc700 */
+ READ_BCR(ARC_REG_FP_BCR, fpu_sp);
+ READ_BCR(ARC_REG_DPFP_BCR, fpu_dp);
- for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) {
- if (cpu->core.family == tbl->id) {
- cpu->release = tbl->str;
- break;
- }
- }
+ if (fpu_sp.ver | fpu_dp.ver)
+ n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
+ IS_AVAIL1(fpu_sp.ver, "SP "),
+ IS_AVAIL1(fpu_dp.ver, "DP "));
- if (is_isa_arcompact())
- cpu->name = "ARC700";
- else if (tbl->str)
- cpu->name = "HS38";
- else
- cpu->name = cpu->release = "Unknown";
+ READ_BCR(ARC_REG_BPU_BCR, bpu);
+ bpu_full = bpu.fam ? 1 : 0;
+ bpu_cache = 256 << (bpu.ent - 1);
+ bpu_pred = 256 << (bpu.ent - 1);
- return;
+ n += scnprintf(buf + n, len - n,
+ "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n",
+ IS_AVAIL1(bpu_full, "full"),
+ IS_AVAIL1(!bpu_full, "partial"),
+ bpu_cache, bpu_pred);
+
+ READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+ if (iccm.ver) {
+ info->iccm.sz = 4096 << iccm.sz; /* 8K to 512K */
+ info->iccm.base = iccm.base << 16;
}
- /*
- * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
- * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
- * releases only update it.
- */
- READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
-
- if (uarch.prod == 4) {
- cpu->name = "HS48";
- cpu->extn.dual = 1;
+ READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+ if (dccm.ver) {
+ unsigned long base;
+ info->dccm.sz = 2048 << dccm.sz; /* 2K to 256K */
- } else {
- cpu->name = "HS38";
+ base = read_aux_reg(ARC_REG_DCCM_BASE_BUILD);
+ info->dccm.base = base & ~0xF;
}
- for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
- if (uarch.maj == tbl->id) {
- cpu->release = tbl->str;
- break;
- }
- }
+ /* ARCompact ISA specific sanity checks */
+ present = fpu_dp.ver; /* SP has no arch visible regs */
+ CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
+#endif
+ return n;
+
}
-static void read_arc_build_cfg_regs(void)
+static int arcv2_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
{
- struct bcr_timer timer;
- struct bcr_generic bcr;
- struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
+ int n = 0;
+#ifdef CONFIG_ISA_ARCV2
+ const char *release, *cpu_nm, *isa_nm = "ARCv2";
+ int dual_issue = 0, dual_enb = 0, mpy_opt, present;
+ int bpu_full, bpu_cache, bpu_pred, bpu_ret_stk;
+ char mpy_nm[16], lpb_nm[32];
struct bcr_isa_arcv2 isa;
- struct bcr_actionpoint ap;
-
- FIX_PTR(cpu);
+ struct bcr_mpy mpy;
+ struct bcr_fp_arcv2 fpu;
+ struct bcr_bpu_arcv2 bpu;
+ struct bcr_lpb lpb;
+ struct bcr_iccm_arcv2 iccm;
+ struct bcr_dccm_arcv2 dccm;
+ struct bcr_erp erp;
- READ_BCR(AUX_IDENTITY, cpu->core);
- decode_arc_core(cpu);
-
- READ_BCR(ARC_REG_TIMERS_BCR, timer);
- cpu->extn.timer0 = timer.t0;
- cpu->extn.timer1 = timer.t1;
- cpu->extn.rtc = timer.rtc;
-
- cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
+ /*
+ * Initial HS cores bumped AUX IDENTITY.ARCVER for each release until
+ * ARCVER 0x54 which introduced AUX MICRO_ARCH_BUILD and subsequent
+ * releases only update it.
+ */
- READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
+ cpu_nm = "HS38";
- /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */
- read_decode_ccm_bcr(cpu);
+ if (info->arcver > 0x50 && info->arcver <= 0x53) {
+ release = arc_hs_rel[info->arcver - 0x51].str;
+ } else {
+ const struct id_to_str *tbl;
+ struct bcr_uarch_build uarch;
- read_decode_mmu_bcr();
- read_decode_cache_bcr();
+ READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch);
- if (is_isa_arcompact()) {
- struct bcr_fp_arcompact sp, dp;
- struct bcr_bpu_arcompact bpu;
-
- READ_BCR(ARC_REG_FP_BCR, sp);
- READ_BCR(ARC_REG_DPFP_BCR, dp);
- cpu->extn.fpu_sp = sp.ver ? 1 : 0;
- cpu->extn.fpu_dp = dp.ver ? 1 : 0;
-
- READ_BCR(ARC_REG_BPU_BCR, bpu);
- cpu->bpu.ver = bpu.ver;
- cpu->bpu.full = bpu.fam ? 1 : 0;
- if (bpu.ent) {
- cpu->bpu.num_cache = 256 << (bpu.ent - 1);
- cpu->bpu.num_pred = 256 << (bpu.ent - 1);
+ for (tbl = &arc_hs_ver54_rel[0]; tbl->id != 0xFF; tbl++) {
+ if (uarch.maj == tbl->id) {
+ release = tbl->str;
+ break;
+ }
}
- } else {
- struct bcr_fp_arcv2 spdp;
- struct bcr_bpu_arcv2 bpu;
-
- READ_BCR(ARC_REG_FP_V2_BCR, spdp);
- cpu->extn.fpu_sp = spdp.sp ? 1 : 0;
- cpu->extn.fpu_dp = spdp.dp ? 1 : 0;
-
- READ_BCR(ARC_REG_BPU_BCR, bpu);
- cpu->bpu.ver = bpu.ver;
- cpu->bpu.full = bpu.ft;
- cpu->bpu.num_cache = 256 << bpu.bce;
- cpu->bpu.num_pred = 2048 << bpu.pte;
- cpu->bpu.ret_stk = 4 << bpu.rse;
-
- /* if dual issue hardware, is it enabled ? */
- if (cpu->extn.dual) {
+ if (uarch.prod == 4) {
unsigned int exec_ctrl;
+ cpu_nm = "HS48";
+ dual_issue = 1;
+ /* if dual issue hardware, is it enabled ? */
READ_BCR(AUX_EXEC_CTRL, exec_ctrl);
- cpu->extn.dual_enb = !(exec_ctrl & 1);
+ dual_enb = !(exec_ctrl & 1);
}
}
- READ_BCR(ARC_REG_AP_BCR, ap);
- if (ap.ver) {
- cpu->extn.ap_num = 2 << ap.num;
- cpu->extn.ap_full = !ap.min;
- }
-
- READ_BCR(ARC_REG_SMART_BCR, bcr);
- cpu->extn.smart = bcr.ver ? 1 : 0;
-
- READ_BCR(ARC_REG_RTT_BCR, bcr);
- cpu->extn.rtt = bcr.ver ? 1 : 0;
-
READ_BCR(ARC_REG_ISA_CFG_BCR, isa);
- /* some hacks for lack of feature BCR info in old ARC700 cores */
- if (is_isa_arcompact()) {
- if (!isa.ver) /* ISA BCR absent, use Kconfig info */
- cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC);
- else {
- /* ARC700_BUILD only has 2 bits of isa info */
- struct bcr_generic bcr = *(struct bcr_generic *)&isa;
- cpu->isa.atomic = bcr.info & 1;
- }
-
- cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN);
+ n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
+ c, cpu_nm, release, isa_nm,
+ IS_AVAIL1(isa.be, "[Big-Endian]"),
+ IS_AVAIL3(dual_issue, dual_enb, " Dual-Issue "));
+
+ READ_BCR(ARC_REG_MPY_BCR, mpy);
+ mpy_opt = 2; /* stock MPY/MPYH */
+ if (mpy.dsp) /* OPT 7-9 */
+ mpy_opt = mpy.dsp + 6;
+
+ scnprintf(mpy_nm, 16, "mpy[opt %d] ", mpy_opt);
+
+ READ_BCR(ARC_REG_FP_V2_BCR, fpu);
+
+ n += scnprintf(buf + n, len - n, "ISA Extn\t: %s%s%s%s%s%s%s%s%s%s%s\n",
+ IS_AVAIL2(isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
+ IS_AVAIL2(isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
+ IS_AVAIL2(isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
+ IS_AVAIL1(mpy.ver, mpy_nm),
+ IS_AVAIL1(isa.div_rem, "div_rem "),
+ IS_AVAIL1((fpu.sp | fpu.dp), " FPU:"),
+ IS_AVAIL1(fpu.sp, " sp"),
+ IS_AVAIL1(fpu.dp, " dp"));
+
+ READ_BCR(ARC_REG_BPU_BCR, bpu);
+ bpu_full = bpu.ft;
+ bpu_cache = 256 << bpu.bce;
+ bpu_pred = 2048 << bpu.pte;
+ bpu_ret_stk = 4 << bpu.rse;
+
+ READ_BCR(ARC_REG_LPB_BUILD, lpb);
+ if (lpb.ver) {
+ unsigned int ctl;
+ ctl = read_aux_reg(ARC_REG_LPB_CTRL);
+
+ scnprintf(lpb_nm, sizeof(lpb_nm), " Loop Buffer:%d %s",
+ lpb.entries, IS_DISABLED_RUN(!ctl));
+ }
- /* there's no direct way to distinguish 750 vs. 770 */
- if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3))
- cpu->name = "ARC750";
- } else {
- cpu->isa = isa;
+ n += scnprintf(buf + n, len - n,
+ "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d%s\n",
+ IS_AVAIL1(bpu_full, "full"),
+ IS_AVAIL1(!bpu_full, "partial"),
+ bpu_cache, bpu_pred, bpu_ret_stk,
+ lpb_nm);
+
+ READ_BCR(ARC_REG_ICCM_BUILD, iccm);
+ if (iccm.ver) {
+ unsigned long base;
+ info->iccm.sz = 256 << iccm.sz00; /* 512B to 16M */
+ if (iccm.sz00 == 0xF && iccm.sz01 > 0)
+ info->iccm.sz <<= iccm.sz01;
+ base = read_aux_reg(ARC_REG_AUX_ICCM);
+ info->iccm.base = base & 0xF0000000;
}
-}
-static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len)
-{
- struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
- struct bcr_identity *core = &cpu->core;
- char mpy_opt[16];
- int n = 0;
+ READ_BCR(ARC_REG_DCCM_BUILD, dccm);
+ if (dccm.ver) {
+ unsigned long base;
+ info->dccm.sz = 256 << dccm.sz0;
+ if (dccm.sz0 == 0xF && dccm.sz1 > 0)
+ info->dccm.sz <<= dccm.sz1;
+ base = read_aux_reg(ARC_REG_AUX_DCCM);
+ info->dccm.base = base & 0xF0000000;
+ }
- FIX_PTR(cpu);
+ /* Error Protection: ECC/Parity */
+ READ_BCR(ARC_REG_ERP_BUILD, erp);
+ if (erp.ver) {
+ struct ctl_erp ctl;
+ READ_BCR(ARC_REG_ERP_CTRL, ctl);
+ /* inverted bits: 0 means enabled */
+ n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
+ IS_AVAIL3(erp.ic, !ctl.dpi, "IC "),
+ IS_AVAIL3(erp.dc, !ctl.dpd, "DC "),
+ IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
+ }
- n += scnprintf(buf + n, len - n,
- "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
- core->family, core->cpu_id, core->chip_id);
+ /* ARCv2 ISA specific sanity checks */
+ present = fpu.sp | fpu.dp | mpy.dsp; /* DSP and/or FPU */
+ CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
- n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n",
- cpu_id, cpu->name, cpu->release,
- is_isa_arcompact() ? "ARCompact" : "ARCv2",
- IS_AVAIL1(cpu->isa.be, "[Big-Endian]"),
- IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue "));
+ dsp_config_check();
+#endif
+ return n;
+}
- n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ",
- IS_AVAIL1(cpu->extn.timer0, "Timer0 "),
- IS_AVAIL1(cpu->extn.timer1, "Timer1 "),
- IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
- IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT));
+static char *arc_cpu_mumbojumbo(int c, struct cpuinfo_arc *info, char *buf, int len)
+{
+ struct bcr_identity ident;
+ struct bcr_timer timer;
+ struct bcr_generic bcr;
+ struct mcip_bcr mp;
+ struct bcr_actionpoint ap;
+ unsigned long vec_base;
+ int ap_num, ap_full, smart, rtt, n;
- if (cpu->extn_mpy.ver) {
- if (is_isa_arcompact()) {
- scnprintf(mpy_opt, 16, "mpy");
- } else {
+ memset(info, 0, sizeof(struct cpuinfo_arc));
- int opt = 2; /* stock MPY/MPYH */
+ READ_BCR(AUX_IDENTITY, ident);
+ info->arcver = ident.family;
- if (cpu->extn_mpy.dsp) /* OPT 7-9 */
- opt = cpu->extn_mpy.dsp + 6;
+ n = scnprintf(buf, len,
+ "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n",
+ ident.family, ident.cpu_id, ident.chip_id);
- scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt);
- }
+ if (is_isa_arcompact()) {
+ n += arcompact_mumbojumbo(c, info, buf + n, len - n);
+ } else if (is_isa_arcv2()){
+ n += arcv2_mumbojumbo(c, info, buf + n, len - n);
}
- n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n",
- IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC),
- IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64),
- IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS),
- IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt),
- IS_AVAIL1(cpu->isa.div_rem, "div_rem "));
+ n += arc_mmu_mumbojumbo(c, buf + n, len - n);
+ n += arc_cache_mumbojumbo(c, buf + n, len - n);
- if (cpu->bpu.ver) {
- n += scnprintf(buf + n, len - n,
- "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d",
- IS_AVAIL1(cpu->bpu.full, "full"),
- IS_AVAIL1(!cpu->bpu.full, "partial"),
- cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk);
-
- if (is_isa_arcv2()) {
- struct bcr_lpb lpb;
-
- READ_BCR(ARC_REG_LPB_BUILD, lpb);
- if (lpb.ver) {
- unsigned int ctl;
- ctl = read_aux_reg(ARC_REG_LPB_CTRL);
-
- n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s",
- lpb.entries,
- IS_DISABLED_RUN(!ctl));
- }
- }
- n += scnprintf(buf + n, len - n, "\n");
- }
+ READ_BCR(ARC_REG_TIMERS_BCR, timer);
+ info->t0 = timer.t0;
+ info->t1 = timer.t1;
- return buf;
-}
+ READ_BCR(ARC_REG_MCIP_BCR, mp);
+ vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
-static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len)
-{
- int n = 0;
- struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id];
+ n += scnprintf(buf + n, len - n,
+ "Timers\t\t: %s%s%s%s%s%s\nVector Table\t: %#lx\n",
+ IS_AVAIL1(timer.t0, "Timer0 "),
+ IS_AVAIL1(timer.t1, "Timer1 "),
+ IS_AVAIL2(timer.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+ IS_AVAIL2(mp.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT),
+ vec_base);
- FIX_PTR(cpu);
+ READ_BCR(ARC_REG_AP_BCR, ap);
+ if (ap.ver) {
+ ap_num = 2 << ap.num;
+ ap_full = !ap.min;
+ }
- n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base);
+ READ_BCR(ARC_REG_SMART_BCR, bcr);
+ smart = bcr.ver ? 1 : 0;
- if (cpu->extn.fpu_sp || cpu->extn.fpu_dp)
- n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n",
- IS_AVAIL1(cpu->extn.fpu_sp, "SP "),
- IS_AVAIL1(cpu->extn.fpu_dp, "DP "));
+ READ_BCR(ARC_REG_RTT_BCR, bcr);
+ rtt = bcr.ver ? 1 : 0;
- if (cpu->extn.ap_num | cpu->extn.smart | cpu->extn.rtt) {
+ if (ap.ver | smart | rtt) {
n += scnprintf(buf + n, len - n, "DEBUG\t\t: %s%s",
- IS_AVAIL1(cpu->extn.smart, "smaRT "),
- IS_AVAIL1(cpu->extn.rtt, "RTT "));
- if (cpu->extn.ap_num) {
+ IS_AVAIL1(smart, "smaRT "),
+ IS_AVAIL1(rtt, "RTT "));
+ if (ap.ver) {
n += scnprintf(buf + n, len - n, "ActionPoint %d/%s",
- cpu->extn.ap_num,
- cpu->extn.ap_full ? "full":"min");
+ ap_num,
+ ap_full ? "full":"min");
}
n += scnprintf(buf + n, len - n, "\n");
}
- if (cpu->dccm.sz || cpu->iccm.sz)
- n += scnprintf(buf + n, len - n, "Extn [CCM]\t: DCCM @ %x, %d KB / ICCM: @ %x, %d KB\n",
- cpu->dccm.base_addr, TO_KB(cpu->dccm.sz),
- cpu->iccm.base_addr, TO_KB(cpu->iccm.sz));
-
- if (is_isa_arcv2()) {
-
- /* Error Protection: ECC/Parity */
- struct bcr_erp erp;
- READ_BCR(ARC_REG_ERP_BUILD, erp);
-
- if (erp.ver) {
- struct ctl_erp ctl;
- READ_BCR(ARC_REG_ERP_CTRL, ctl);
-
- /* inverted bits: 0 means enabled */
- n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n",
- IS_AVAIL3(erp.ic, !ctl.dpi, "IC "),
- IS_AVAIL3(erp.dc, !ctl.dpd, "DC "),
- IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU "));
- }
- }
+ if (info->dccm.sz || info->iccm.sz)
+ n += scnprintf(buf + n, len - n,
+ "Extn [CCM]\t: DCCM @ %lx, %d KB / ICCM: @ %lx, %d KB\n",
+ info->dccm.base, TO_KB(info->dccm.sz),
+ info->iccm.base, TO_KB(info->iccm.sz));
return buf;
}
panic("Disable %s, hardware NOT present\n", opt_name);
}
-static void arc_chk_core_config(void)
+/*
+ * ISA agnostic sanity checks
+ */
+static void arc_chk_core_config(struct cpuinfo_arc *info)
{
- struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
- int present = 0;
-
- if (!cpu->extn.timer0)
+ if (!info->t0)
panic("Timer0 is not present!\n");
- if (!cpu->extn.timer1)
+ if (!info->t1)
panic("Timer1 is not present!\n");
#ifdef CONFIG_ARC_HAS_DCCM
* DCCM can be arbit placed in hardware.
* Make sure it's placement/sz matches what Linux is built with
*/
- if ((unsigned int)__arc_dccm_base != cpu->dccm.base_addr)
+ if ((unsigned int)__arc_dccm_base != info->dccm.base)
panic("Linux built with incorrect DCCM Base address\n");
- if (CONFIG_ARC_DCCM_SZ * SZ_1K != cpu->dccm.sz)
+ if (CONFIG_ARC_DCCM_SZ * SZ_1K != info->dccm.sz)
panic("Linux built with incorrect DCCM Size\n");
#endif
#ifdef CONFIG_ARC_HAS_ICCM
- if (CONFIG_ARC_ICCM_SZ * SZ_1K != cpu->iccm.sz)
+ if (CONFIG_ARC_ICCM_SZ * SZ_1K != info->iccm.sz)
panic("Linux built with incorrect ICCM Size\n");
#endif
-
- /*
- * FP hardware/software config sanity
- * -If hardware present, kernel needs to save/restore FPU state
- * -If not, it will crash trying to save/restore the non-existant regs
- */
-
- if (is_isa_arcompact()) {
- /* only DPDP checked since SP has no arch visible regs */
- present = cpu->extn.fpu_dp;
- CHK_OPT_STRICT(CONFIG_ARC_FPU_SAVE_RESTORE, present);
- } else {
- /* Accumulator Low:High pair (r58:59) present if DSP MPY or FPU */
- present = cpu->extn_mpy.dsp | cpu->extn.fpu_sp | cpu->extn.fpu_dp;
- CHK_OPT_STRICT(CONFIG_ARC_HAS_ACCL_REGS, present);
-
- dsp_config_check();
- }
}
/*
void setup_processor(void)
{
+ struct cpuinfo_arc info;
+ int c = smp_processor_id();
char str[512];
- int cpu_id = smp_processor_id();
- read_arc_build_cfg_regs();
- arc_init_IRQ();
+ pr_info("%s", arc_cpu_mumbojumbo(c, &info, str, sizeof(str)));
+ pr_info("%s", arc_platform_smp_cpuinfo());
- pr_info("%s", arc_cpu_mumbojumbo(cpu_id, str, sizeof(str)));
+ arc_chk_core_config(&info);
+ arc_init_IRQ();
arc_mmu_init();
arc_cache_init();
- pr_info("%s", arc_extn_mumbojumbo(cpu_id, str, sizeof(str)));
- pr_info("%s", arc_platform_smp_cpuinfo());
-
- arc_chk_core_config();
}
static inline bool uboot_arg_invalid(unsigned long addr)
char *str;
int cpu_id = ptr_to_cpu(v);
struct device *cpu_dev = get_cpu_device(cpu_id);
+ struct cpuinfo_arc info;
struct clk *cpu_clk;
unsigned long freq = 0;
if (!str)
goto done;
- seq_printf(m, arc_cpu_mumbojumbo(cpu_id, str, PAGE_SIZE));
+ seq_printf(m, arc_cpu_mumbojumbo(cpu_id, &info, str, PAGE_SIZE));
cpu_clk = clk_get(cpu_dev, NULL);
if (IS_ERR(cpu_clk)) {
loops_per_jiffy / (500000 / HZ),
(loops_per_jiffy / (5000 / HZ)) % 100);
- seq_printf(m, arc_mmu_mumbojumbo(cpu_id, str, PAGE_SIZE));
- seq_printf(m, arc_cache_mumbojumbo(cpu_id, str, PAGE_SIZE));
- seq_printf(m, arc_extn_mumbojumbo(cpu_id, str, PAGE_SIZE));
seq_printf(m, arc_platform_smp_cpuinfo());
free_page((unsigned long)str);
#include <linux/sched/task_stack.h>
#include <asm/ucontext.h>
+#include <asm/entry.h>
struct rt_sigframe {
struct siginfo info;
#include <linux/export.h>
#include <linux/of_fdt.h>
-#include <asm/processor.h>
-#include <asm/setup.h>
#include <asm/mach_desc.h>
+#include <asm/setup.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
#ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
* arch-common ISR to handle for inter-processor interrupts
* Has hooks for platform specific IPI
*/
-irqreturn_t do_IPI(int irq, void *dev_id)
+static irqreturn_t do_IPI(int irq, void *dev_id)
{
unsigned long pending;
unsigned long __maybe_unused copy;
#include <asm/arcregs.h>
#include <asm/unwind.h>
+#include <asm/stacktrace.h>
#include <asm/switch_to.h>
/*-------------------------------------------------------------------------
#include <linux/ptrace.h>
#include <linux/kprobes.h>
#include <linux/kgdb.h>
+#include <asm/entry.h>
#include <asm/setup.h>
#include <asm/unaligned.h>
#include <asm/kprobes.h>
*/
void do_non_swi_trap(unsigned long address, struct pt_regs *regs)
{
- unsigned int param = regs->ecr_param;
-
- switch (param) {
+ switch (regs->ecr.param) {
case 1:
trap_is_brkpt(address, regs);
break;
/* For Data fault, this is data address not instruction addr */
address = current->thread.fault_address;
- vec = regs->ecr_vec;
- cause_code = regs->ecr_cause;
+ vec = regs->ecr.vec;
+ cause_code = regs->ecr.cause;
/* For DTLB Miss or ProtV, display the memory involved too */
if (vec == ECR_V_DTLB_MISS) {
pr_cont("Misaligned r/w from 0x%08lx\n", address);
#endif
} else if (vec == ECR_V_TRAP) {
- if (regs->ecr_param == 5)
+ if (regs->ecr.param == 5)
pr_cont("gcc generated __builtin_trap\n");
} else {
pr_cont("Check Programmer's Manual\n");
if (user_mode(regs))
show_faulting_vma(regs->ret); /* faulting code, not data */
- pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\nSTAT: 0x%08lx",
- regs->event, current->thread.fault_address, regs->ret,
- regs->status32);
+ pr_info("ECR: 0x%08lx EFA: 0x%08lx ERET: 0x%08lx\n",
+ regs->ecr.full, current->thread.fault_address, regs->ret);
+
+ pr_info("STAT32: 0x%08lx", regs->status32);
#define STS_BIT(r, bit) r->status32 & STATUS_##bit##_MASK ? #bit" " : ""
#endif
ENTRY_CFI(memset)
- PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
mov.f 0, r2
;;; if size is zero
jz.d [blink]
mov r3, r0 ; don't clobber ret val
+ PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
+
;;; if length < 8
brls.d.nt r2, 8, .Lsmallchunk
mov.f lp_count,r2
unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */
unsigned long perip_end = 0xFFFFFFFF; /* legacy value */
+static struct cpuinfo_arc_cache {
+ unsigned int sz_k, line_len, colors;
+} ic_info, dc_info, slc_info;
+
void (*_cache_line_loop_ic_fn)(phys_addr_t paddr, unsigned long vaddr,
unsigned long sz, const int op, const int full_page);
void (*__dma_cache_inv)(phys_addr_t start, unsigned long sz);
void (*__dma_cache_wback)(phys_addr_t start, unsigned long sz);
-char *arc_cache_mumbojumbo(int c, char *buf, int len)
-{
- int n = 0;
- struct cpuinfo_arc_cache *p;
-
-#define PR_CACHE(p, cfg, str) \
- if (!(p)->line_len) \
- n += scnprintf(buf + n, len - n, str"\t\t: N/A\n"); \
- else \
- n += scnprintf(buf + n, len - n, \
- str"\t\t: %uK, %dway/set, %uB Line, %s%s%s\n", \
- (p)->sz_k, (p)->assoc, (p)->line_len, \
- (p)->vipt ? "VIPT" : "PIPT", \
- (p)->alias ? " aliasing" : "", \
- IS_USED_CFG(cfg));
-
- PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache");
- PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache");
-
- p = &cpuinfo_arc700[c].slc;
- if (p->line_len)
- n += scnprintf(buf + n, len - n,
- "SLC\t\t: %uK, %uB Line%s\n",
- p->sz_k, p->line_len, IS_USED_RUN(slc_enable));
-
- n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
- perip_base,
- IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
-
- return buf;
-}
-
-/*
- * Read the Cache Build Confuration Registers, Decode them and save into
- * the cpuinfo structure for later use.
- * No Validation done here, simply read/convert the BCRs
- */
-static void read_decode_cache_bcr_arcv2(int cpu)
+static int read_decode_cache_bcr_arcv2(int c, char *buf, int len)
{
- struct cpuinfo_arc_cache *p_slc = &cpuinfo_arc700[cpu].slc;
+ struct cpuinfo_arc_cache *p_slc = &slc_info;
+ struct bcr_identity ident;
struct bcr_generic sbcr;
-
- struct bcr_slc_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:24, way:2, lsz:2, sz:4;
-#else
- unsigned int sz:4, lsz:2, way:2, pad:24;
-#endif
- } slc_cfg;
-
- struct bcr_clust_cfg {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:7, c:1, num_entries:8, num_cores:8, ver:8;
-#else
- unsigned int ver:8, num_cores:8, num_entries:8, c:1, pad:7;
-#endif
- } cbcr;
-
- struct bcr_volatile {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int start:4, limit:4, pad:22, order:1, disable:1;
-#else
- unsigned int disable:1, order:1, pad:22, limit:4, start:4;
-#endif
- } vol;
-
+ struct bcr_clust_cfg cbcr;
+ struct bcr_volatile vol;
+ int n = 0;
READ_BCR(ARC_REG_SLC_BCR, sbcr);
if (sbcr.ver) {
+ struct bcr_slc_cfg slc_cfg;
READ_BCR(ARC_REG_SLC_CFG, slc_cfg);
p_slc->sz_k = 128 << slc_cfg.sz;
l2_line_sz = p_slc->line_len = (slc_cfg.lsz == 0) ? 128 : 64;
+ n += scnprintf(buf + n, len - n,
+ "SLC\t\t: %uK, %uB Line%s\n",
+ p_slc->sz_k, p_slc->line_len, IS_USED_RUN(slc_enable));
}
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
ioc_enable = 0;
}
+ READ_BCR(AUX_IDENTITY, ident);
+
/* HS 2.0 didn't have AUX_VOL */
- if (cpuinfo_arc700[cpu].core.family > 0x51) {
+ if (ident.family > 0x51) {
READ_BCR(AUX_VOL, vol);
perip_base = vol.start << 28;
/* HS 3.0 has limit and strict-ordering fields */
- if (cpuinfo_arc700[cpu].core.family > 0x52)
+ if (ident.family > 0x52)
perip_end = (vol.limit << 28) - 1;
}
+
+ n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
+ perip_base,
+ IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
+
+ return n;
}
-void read_decode_cache_bcr(void)
+int arc_cache_mumbojumbo(int c, char *buf, int len)
{
- struct cpuinfo_arc_cache *p_ic, *p_dc;
- unsigned int cpu = smp_processor_id();
- struct bcr_cache {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int pad:12, line_len:4, sz:4, config:4, ver:8;
-#else
- unsigned int ver:8, config:4, sz:4, line_len:4, pad:12;
-#endif
- } ibcr, dbcr;
+ struct cpuinfo_arc_cache *p_ic = &ic_info, *p_dc = &dc_info;
+ struct bcr_cache ibcr, dbcr;
+ int vipt, assoc;
+ int n = 0;
- p_ic = &cpuinfo_arc700[cpu].icache;
READ_BCR(ARC_REG_IC_BCR, ibcr);
-
if (!ibcr.ver)
goto dc_chk;
- if (ibcr.ver <= 3) {
+ if (is_isa_arcompact() && (ibcr.ver <= 3)) {
BUG_ON(ibcr.config != 3);
- p_ic->assoc = 2; /* Fixed to 2w set assoc */
- } else if (ibcr.ver >= 4) {
- p_ic->assoc = 1 << ibcr.config; /* 1,2,4,8 */
+ assoc = 2; /* Fixed to 2w set assoc */
+ } else if (is_isa_arcv2() && (ibcr.ver >= 4)) {
+ assoc = 1 << ibcr.config; /* 1,2,4,8 */
}
p_ic->line_len = 8 << ibcr.line_len;
p_ic->sz_k = 1 << (ibcr.sz - 1);
- p_ic->vipt = 1;
- p_ic->alias = p_ic->sz_k/p_ic->assoc/TO_KB(PAGE_SIZE) > 1;
+ p_ic->colors = p_ic->sz_k/assoc/TO_KB(PAGE_SIZE);
+
+ n += scnprintf(buf + n, len - n,
+ "I-Cache\t\t: %uK, %dway/set, %uB Line, VIPT%s%s\n",
+ p_ic->sz_k, assoc, p_ic->line_len,
+ p_ic->colors > 1 ? " aliasing" : "",
+ IS_USED_CFG(CONFIG_ARC_HAS_ICACHE));
dc_chk:
- p_dc = &cpuinfo_arc700[cpu].dcache;
READ_BCR(ARC_REG_DC_BCR, dbcr);
-
if (!dbcr.ver)
goto slc_chk;
- if (dbcr.ver <= 3) {
+ if (is_isa_arcompact() && (dbcr.ver <= 3)) {
BUG_ON(dbcr.config != 2);
- p_dc->assoc = 4; /* Fixed to 4w set assoc */
- p_dc->vipt = 1;
- p_dc->alias = p_dc->sz_k/p_dc->assoc/TO_KB(PAGE_SIZE) > 1;
- } else if (dbcr.ver >= 4) {
- p_dc->assoc = 1 << dbcr.config; /* 1,2,4,8 */
- p_dc->vipt = 0;
- p_dc->alias = 0; /* PIPT so can't VIPT alias */
+ vipt = 1;
+ assoc = 4; /* Fixed to 4w set assoc */
+ p_dc->colors = p_dc->sz_k/assoc/TO_KB(PAGE_SIZE);
+ } else if (is_isa_arcv2() && (dbcr.ver >= 4)) {
+ vipt = 0;
+ assoc = 1 << dbcr.config; /* 1,2,4,8 */
+ p_dc->colors = 1; /* PIPT so can't VIPT alias */
}
p_dc->line_len = 16 << dbcr.line_len;
p_dc->sz_k = 1 << (dbcr.sz - 1);
+ n += scnprintf(buf + n, len - n,
+ "D-Cache\t\t: %uK, %dway/set, %uB Line, %s%s%s\n",
+ p_dc->sz_k, assoc, p_dc->line_len,
+ vipt ? "VIPT" : "PIPT",
+ p_dc->colors > 1 ? " aliasing" : "",
+ IS_USED_CFG(CONFIG_ARC_HAS_DCACHE));
+
slc_chk:
if (is_isa_arcv2())
- read_decode_cache_bcr_arcv2(cpu);
+ n += read_decode_cache_bcr_arcv2(c, buf + n, len - n);
+
+ return n;
}
/*
#endif /* CONFIG_ARC_HAS_ICACHE */
-noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
+static noinline void slc_op_rgn(phys_addr_t paddr, unsigned long sz, const int op)
{
#ifdef CONFIG_ISA_ARCV2
/*
#endif
}
-noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
+static __maybe_unused noinline void slc_op_line(phys_addr_t paddr, unsigned long sz, const int op)
{
#ifdef CONFIG_ISA_ARCV2
/*
* 3. All Caches need to be disabled when setting up IOC to elide any in-flight
* Coherency transactions
*/
-noinline void __init arc_ioc_setup(void)
+static noinline void __init arc_ioc_setup(void)
{
unsigned int ioc_base, mem_sz;
* one core suffices for all
* - IOC setup / dma callbacks only need to be done once
*/
-void __init arc_cache_init_master(void)
+static noinline void __init arc_cache_init_master(void)
{
- unsigned int __maybe_unused cpu = smp_processor_id();
-
if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) {
- struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache;
+ struct cpuinfo_arc_cache *ic = &ic_info;
if (!ic->line_len)
panic("cache support enabled but non-existent cache\n");
* In MMU v4 (HS38x) the aliasing icache config uses IVIL/PTAG
* pair to provide vaddr/paddr respectively, just as in MMU v3
*/
- if (is_isa_arcv2() && ic->alias)
+ if (is_isa_arcv2() && ic->colors > 1)
_cache_line_loop_ic_fn = __cache_line_loop_v3;
else
_cache_line_loop_ic_fn = __cache_line_loop;
}
if (IS_ENABLED(CONFIG_ARC_HAS_DCACHE)) {
- struct cpuinfo_arc_cache *dc = &cpuinfo_arc700[cpu].dcache;
+ struct cpuinfo_arc_cache *dc = &dc_info;
if (!dc->line_len)
panic("cache support enabled but non-existent cache\n");
/* check for D-Cache aliasing on ARCompact: ARCv2 has PIPT */
if (is_isa_arcompact()) {
int handled = IS_ENABLED(CONFIG_ARC_CACHE_VIPT_ALIASING);
- int num_colors = dc->sz_k/dc->assoc/TO_KB(PAGE_SIZE);
- if (dc->alias) {
+ if (dc->colors > 1) {
if (!handled)
panic("Enable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
- if (CACHE_COLORS_NUM != num_colors)
+ if (CACHE_COLORS_NUM != dc->colors)
panic("CACHE_COLORS_NUM not optimized for config\n");
- } else if (!dc->alias && handled) {
+ } else if (handled && dc->colors == 1) {
panic("Disable CONFIG_ARC_CACHE_VIPT_ALIASING\n");
}
}
void __ref arc_cache_init(void)
{
unsigned int __maybe_unused cpu = smp_processor_id();
- char str[256];
-
- pr_info("%s", arc_cache_mumbojumbo(0, str, sizeof(str)));
if (!cpu)
arc_cache_init_master();
return 0;
}
-
-#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
-
-unsigned long arc_clear_user_noinline(void __user *to,
- unsigned long n)
-{
- return __arc_clear_user(to, n);
-}
-EXPORT_SYMBOL(arc_clear_user_noinline);
-
-#endif
#include <linux/kdebug.h>
#include <linux/perf_event.h>
#include <linux/mm_types.h>
+#include <asm/entry.h>
#include <asm/mmu.h>
/*
if (faulthandler_disabled() || !mm)
goto no_context;
- if (regs->ecr_cause & ECR_C_PROTV_STORE) /* ST/EX */
+ if (regs->ecr.cause & ECR_C_PROTV_STORE) /* ST/EX */
write = 1;
- else if ((regs->ecr_vec == ECR_V_PROTV) &&
- (regs->ecr_cause == ECR_C_PROTV_INST_FETCH))
+ else if ((regs->ecr.vec == ECR_V_PROTV) &&
+ (regs->ecr.cause == ECR_C_PROTV_INST_FETCH))
exec = 1;
flags = FAULT_FLAG_DEFAULT;
#include <linux/highmem.h>
#include <asm/page.h>
#include <asm/sections.h>
+#include <asm/setup.h>
#include <asm/arcregs.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __aligned(PAGE_SIZE);
/* A copy of the ASID from the PID reg is kept in asid_cache */
DEFINE_PER_CPU(unsigned int, asid_cache) = MM_CTXT_FIRST_CYCLE;
-static int __read_mostly pae_exists;
+static struct cpuinfo_arc_mmu {
+ unsigned int ver, pg_sz_k, s_pg_sz_m, pae, sets, ways;
+} mmuinfo;
/*
* Utility Routine to erase a J-TLB entry
noinline void local_flush_tlb_all(void)
{
- struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+ struct cpuinfo_arc_mmu *mmu = &mmuinfo;
unsigned long flags;
unsigned int entry;
int num_tlb = mmu->sets * mmu->ways;
/*
* Routine to create a TLB entry
*/
-void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
+static void create_tlb(struct vm_area_struct *vma, unsigned long vaddr, pte_t *ptep)
{
unsigned long flags;
unsigned int asid_or_sasid, rwx;
* the cpuinfo structure for later use.
* No Validation is done here, simply read/convert the BCRs
*/
-void read_decode_mmu_bcr(void)
+int arc_mmu_mumbojumbo(int c, char *buf, int len)
{
- struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
- unsigned int tmp;
- struct bcr_mmu_3 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int ver:8, ways:4, sets:4, res:3, sasid:1, pg_sz:4,
- u_itlb:4, u_dtlb:4;
-#else
- unsigned int u_dtlb:4, u_itlb:4, pg_sz:4, sasid:1, res:3, sets:4,
- ways:4, ver:8;
-#endif
- } *mmu3;
-
- struct bcr_mmu_4 {
-#ifdef CONFIG_CPU_BIG_ENDIAN
- unsigned int ver:8, sasid:1, sz1:4, sz0:4, res:2, pae:1,
- n_ways:2, n_entry:2, n_super:2, u_itlb:3, u_dtlb:3;
-#else
- /* DTLB ITLB JES JE JA */
- unsigned int u_dtlb:3, u_itlb:3, n_super:2, n_entry:2, n_ways:2,
- pae:1, res:2, sz0:4, sz1:4, sasid:1, ver:8;
-#endif
- } *mmu4;
+ struct cpuinfo_arc_mmu *mmu = &mmuinfo;
+ unsigned int bcr, u_dtlb, u_itlb, sasid;
+ struct bcr_mmu_3 *mmu3;
+ struct bcr_mmu_4 *mmu4;
+ char super_pg[64] = "";
+ int n = 0;
- tmp = read_aux_reg(ARC_REG_MMU_BCR);
- mmu->ver = (tmp >> 24);
+ bcr = read_aux_reg(ARC_REG_MMU_BCR);
+ mmu->ver = (bcr >> 24);
if (is_isa_arcompact() && mmu->ver == 3) {
- mmu3 = (struct bcr_mmu_3 *)&tmp;
+ mmu3 = (struct bcr_mmu_3 *)&bcr;
mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1);
mmu->sets = 1 << mmu3->sets;
mmu->ways = 1 << mmu3->ways;
- mmu->u_dtlb = mmu3->u_dtlb;
- mmu->u_itlb = mmu3->u_itlb;
- mmu->sasid = mmu3->sasid;
+ u_dtlb = mmu3->u_dtlb;
+ u_itlb = mmu3->u_itlb;
+ sasid = mmu3->sasid;
} else {
- mmu4 = (struct bcr_mmu_4 *)&tmp;
+ mmu4 = (struct bcr_mmu_4 *)&bcr;
mmu->pg_sz_k = 1 << (mmu4->sz0 - 1);
mmu->s_pg_sz_m = 1 << (mmu4->sz1 - 11);
mmu->sets = 64 << mmu4->n_entry;
mmu->ways = mmu4->n_ways * 2;
- mmu->u_dtlb = mmu4->u_dtlb * 4;
- mmu->u_itlb = mmu4->u_itlb * 4;
- mmu->sasid = mmu4->sasid;
- pae_exists = mmu->pae = mmu4->pae;
+ u_dtlb = mmu4->u_dtlb * 4;
+ u_itlb = mmu4->u_itlb * 4;
+ sasid = mmu4->sasid;
+ mmu->pae = mmu4->pae;
}
-}
-char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len)
-{
- int n = 0;
- struct cpuinfo_arc_mmu *p_mmu = &cpuinfo_arc700[cpu_id].mmu;
- char super_pg[64] = "";
-
- if (p_mmu->s_pg_sz_m)
- scnprintf(super_pg, 64, "%dM Super Page %s",
- p_mmu->s_pg_sz_m,
- IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE));
+ if (mmu->s_pg_sz_m)
+ scnprintf(super_pg, 64, "/%dM%s",
+ mmu->s_pg_sz_m,
+ IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) ? " (THP enabled)":"");
n += scnprintf(buf + n, len - n,
- "MMU [v%x]\t: %dk PAGE, %s, swalk %d lvl, JTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n",
- p_mmu->ver, p_mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
- p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways,
- p_mmu->u_dtlb, p_mmu->u_itlb,
- IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
-
- return buf;
+ "MMU [v%x]\t: %dk%s, swalk %d lvl, JTLB %dx%d, uDTLB %d, uITLB %d%s%s%s\n",
+ mmu->ver, mmu->pg_sz_k, super_pg, CONFIG_PGTABLE_LEVELS,
+ mmu->sets, mmu->ways,
+ u_dtlb, u_itlb,
+ IS_AVAIL1(sasid, ", SASID"),
+ IS_AVAIL2(mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40));
+
+ return n;
}
int pae40_exist_but_not_enab(void)
{
- return pae_exists && !is_pae40_enabled();
+ return mmuinfo.pae && !is_pae40_enabled();
}
void arc_mmu_init(void)
{
- struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
- char str[256];
+ struct cpuinfo_arc_mmu *mmu = &mmuinfo;
int compat = 0;
- pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str)));
-
/*
* Can't be done in processor.h due to header include dependencies
*/
void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
struct pt_regs *regs)
{
- struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu;
+ struct cpuinfo_arc_mmu *mmu = &mmuinfo;
unsigned long flags;
int set, n_ways = mmu->ways;
*/
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <linux/libfdt.h>
#include <asm/asm-offsets.h>
+# Help: DRAM base at 0x00000000
CONFIG_DRAM_BASE=0x00000000
+# Help: DRAM base at 0xc0000000
CONFIG_DRAM_BASE=0xc0000000
+# Help: DRAM base at 0xd0000000
CONFIG_DRAM_BASE=0xd0000000
+# Help: Enable Large Physical Address Extension mode
CONFIG_ARM_LPAE=y
CONFIG_VMSPLIT_2G=y
return false;
}
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
+
/* PMU Version in DFR Register */
#define ARMV8_PMU_DFR_VER_NI 0
#define ARMV8_PMU_DFR_VER_V3P4 0x5
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * arch/arm/include/asm/ide.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-/*
- * This file contains the ARM architecture specific IDE code.
- */
-
-#ifndef __ASMARM_IDE_H
-#define __ASMARM_IDE_H
-
-#ifdef __KERNEL__
-
-#define __ide_mm_insw(port,addr,len) readsw(port,addr,len)
-#define __ide_mm_insl(port,addr,len) readsl(port,addr,len)
-#define __ide_mm_outsw(port,addr,len) writesw(port,addr,len)
-#define __ide_mm_outsl(port,addr,len) writesl(port,addr,len)
-
-#endif /* __KERNEL__ */
-
-#endif /* __ASMARM_IDE_H */
+# Help: Virtualization guest
#
# Base options for platforms
#
#define HCR_DCT (UL(1) << 57)
#define HCR_ATA_SHIFT 56
#define HCR_ATA (UL(1) << HCR_ATA_SHIFT)
+#define HCR_TTLBOS (UL(1) << 55)
+#define HCR_TTLBIS (UL(1) << 54)
+#define HCR_ENSCXT (UL(1) << 53)
+#define HCR_TOCU (UL(1) << 52)
#define HCR_AMVOFFEN (UL(1) << 51)
+#define HCR_TICAB (UL(1) << 50)
#define HCR_TID4 (UL(1) << 49)
#define HCR_FIEN (UL(1) << 47)
#define HCR_FWB (UL(1) << 46)
+#define HCR_NV2 (UL(1) << 45)
+#define HCR_AT (UL(1) << 44)
+#define HCR_NV1 (UL(1) << 43)
+#define HCR_NV (UL(1) << 42)
#define HCR_API (UL(1) << 41)
#define HCR_APK (UL(1) << 40)
#define HCR_TEA (UL(1) << 37)
HCR_BSU_IS | HCR_FB | HCR_TACR | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO | HCR_PTW | HCR_TID3)
-#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA)
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
BIT(18) | \
GENMASK(16, 15))
+/*
+ * FGT register definitions
+ *
+ * RES0 and polarity masks as of DDI0487J.a, to be updated as needed.
+ * We're not using the generated masks as they are usually ahead of
+ * the published ARM ARM, which we use as a reference.
+ *
+ * Once we get to a point where the two describe the same thing, we'll
+ * merge the definitions. One day.
+ */
+#define __HFGRTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51))
+#define __HFGRTR_EL2_MASK GENMASK(49, 0)
+#define __HFGRTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+
+#define __HFGWTR_EL2_RES0 (GENMASK(63, 56) | GENMASK(53, 51) | \
+ BIT(46) | BIT(42) | BIT(40) | BIT(28) | \
+ GENMASK(26, 25) | BIT(21) | BIT(18) | \
+ GENMASK(15, 14) | GENMASK(10, 9) | BIT(2))
+#define __HFGWTR_EL2_MASK GENMASK(49, 0)
+#define __HFGWTR_EL2_nMASK (GENMASK(55, 54) | BIT(50))
+
+#define __HFGITR_EL2_RES0 GENMASK(63, 57)
+#define __HFGITR_EL2_MASK GENMASK(54, 0)
+#define __HFGITR_EL2_nMASK GENMASK(56, 55)
+
+#define __HDFGRTR_EL2_RES0 (BIT(49) | BIT(42) | GENMASK(39, 38) | \
+ GENMASK(21, 20) | BIT(8))
+#define __HDFGRTR_EL2_MASK ~__HDFGRTR_EL2_nMASK
+#define __HDFGRTR_EL2_nMASK GENMASK(62, 59)
+
+#define __HDFGWTR_EL2_RES0 (BIT(63) | GENMASK(59, 58) | BIT(51) | BIT(47) | \
+ BIT(43) | GENMASK(40, 38) | BIT(34) | BIT(30) | \
+ BIT(22) | BIT(9) | BIT(6))
+#define __HDFGWTR_EL2_MASK ~__HDFGWTR_EL2_nMASK
+#define __HDFGWTR_EL2_nMASK GENMASK(62, 60)
+
+/* Similar definitions for HCRX_EL2 */
+#define __HCRX_EL2_RES0 (GENMASK(63, 16) | GENMASK(13, 12))
+#define __HCRX_EL2_MASK (0)
+#define __HCRX_EL2_nMASK (GENMASK(15, 14) | GENMASK(4, 0))
+
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
#define HPFAR_MASK (~UL(0xf))
/*
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa_nsh,
__KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid,
+ __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_range,
__KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context,
__KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff,
__KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr,
extern void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
phys_addr_t ipa,
int level);
+extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4)
#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5)
#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6)
+#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7)
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
HSTR_EL2, /* Hypervisor System Trap Register */
HACR_EL2, /* Hypervisor Auxiliary Control Register */
+ HCRX_EL2, /* Extended Hypervisor Configuration Register */
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
TCR_EL2, /* Translation Control Register (EL2) */
TPIDR_EL2, /* EL2 Software Thread ID Register */
CNTHCTL_EL2, /* Counter-timer Hypervisor Control register */
SP_EL2, /* EL2 Stack Pointer */
+ HFGRTR_EL2,
+ HFGWTR_EL2,
+ HFGITR_EL2,
+ HDFGRTR_EL2,
+ HDFGWTR_EL2,
CNTHP_CTL_EL2,
CNTHP_CVAL_EL2,
CNTHV_CTL_EL2,
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
- /* Target CPU and feature flags */
- int target;
+ /* feature flags */
DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES);
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
#define VCPU_SVE_FINALIZED __vcpu_single_flag(cflags, BIT(1))
/* PTRAUTH exposed to guest */
#define GUEST_HAS_PTRAUTH __vcpu_single_flag(cflags, BIT(2))
+/* KVM_ARM_VCPU_INIT completed */
+#define VCPU_INITIALIZED __vcpu_single_flag(cflags, BIT(3))
/* Exception pending */
#define PENDING_EXCEPTION __vcpu_single_flag(iflags, BIT(0))
u64 exits;
};
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
#define kvm_call_hyp_nvhe(f, ...) f(__VA_ARGS__)
#endif /* __KVM_NVHE_HYPERVISOR__ */
-void force_vm_exit(const cpumask_t *mask);
-
int handle_exit(struct kvm_vcpu *vcpu, int exception_index);
void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index);
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu);
int __init kvm_sys_reg_table_init(void);
+int __init populate_nv_trap_config(void);
bool lock_all_vcpus(struct kvm *kvm);
void unlock_all_vcpus(struct kvm *kvm);
return cpus_have_const_cap(ARM64_SPECTRE_V3A);
}
-void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
-
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
static inline bool kvm_vm_is_protected(struct kvm *kvm)
{
return false;
}
-void kvm_init_protected_traps(struct kvm_vcpu *vcpu);
-
int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature);
bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
void __iomem **haddr);
int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
void **haddr);
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);
void stage2_unmap_vm(struct kvm *kvm);
test_bit(KVM_ARM_VCPU_HAS_EL2, vcpu->arch.features));
}
+extern bool __check_nv_sr_forward(struct kvm_vcpu *vcpu);
+
struct sys_reg_params;
struct sys_reg_desc;
* kvm_pgtable_prot format.
*/
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
+
+/**
+ * kvm_tlb_flush_vmid_range() - Invalidate/flush a range of TLB entries
+ *
+ * @mmu: Stage-2 KVM MMU struct
+ * @addr: The base Intermediate physical address from which to invalidate
+ * @size: Size of the range from the base to invalidate
+ */
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size);
#endif /* __ARM64_KVM_PGTABLE_H__ */
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)
+#define SYS_IC_IALLUIS sys_insn(1, 0, 7, 1, 0)
+#define SYS_IC_IALLU sys_insn(1, 0, 7, 5, 0)
+#define SYS_IC_IVAU sys_insn(1, 3, 7, 5, 1)
+
+#define SYS_DC_IVAC sys_insn(1, 0, 7, 6, 1)
+#define SYS_DC_IGVAC sys_insn(1, 0, 7, 6, 3)
+#define SYS_DC_IGDVAC sys_insn(1, 0, 7, 6, 5)
+
+#define SYS_DC_CVAC sys_insn(1, 3, 7, 10, 1)
+#define SYS_DC_CGVAC sys_insn(1, 3, 7, 10, 3)
+#define SYS_DC_CGDVAC sys_insn(1, 3, 7, 10, 5)
+
+#define SYS_DC_CVAU sys_insn(1, 3, 7, 11, 1)
+
+#define SYS_DC_CVAP sys_insn(1, 3, 7, 12, 1)
+#define SYS_DC_CGVAP sys_insn(1, 3, 7, 12, 3)
+#define SYS_DC_CGDVAP sys_insn(1, 3, 7, 12, 5)
+
+#define SYS_DC_CVADP sys_insn(1, 3, 7, 13, 1)
+#define SYS_DC_CGVADP sys_insn(1, 3, 7, 13, 3)
+#define SYS_DC_CGDVADP sys_insn(1, 3, 7, 13, 5)
+
+#define SYS_DC_CIVAC sys_insn(1, 3, 7, 14, 1)
+#define SYS_DC_CIGVAC sys_insn(1, 3, 7, 14, 3)
+#define SYS_DC_CIGDVAC sys_insn(1, 3, 7, 14, 5)
+
+/* Data cache zero operations */
+#define SYS_DC_ZVA sys_insn(1, 3, 7, 4, 1)
+#define SYS_DC_GVA sys_insn(1, 3, 7, 4, 3)
+#define SYS_DC_GZVA sys_insn(1, 3, 7, 4, 4)
+
/*
* Automatically generated definitions for system registers, the
* manual encodings below are in the process of being converted to
#define SYS_DBGDTRTX_EL0 sys_reg(2, 3, 0, 5, 0)
#define SYS_DBGVCR32_EL2 sys_reg(2, 4, 0, 7, 0)
+#define SYS_BRBINF_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 0))
+#define SYS_BRBINFINJ_EL1 sys_reg(2, 1, 9, 1, 0)
+#define SYS_BRBSRC_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 1))
+#define SYS_BRBSRCINJ_EL1 sys_reg(2, 1, 9, 1, 1)
+#define SYS_BRBTGT_EL1(n) sys_reg(2, 1, 8, (n & 15), (((n & 16) >> 2) | 2))
+#define SYS_BRBTGTINJ_EL1 sys_reg(2, 1, 9, 1, 2)
+#define SYS_BRBTS_EL1 sys_reg(2, 1, 9, 0, 2)
+
+#define SYS_BRBCR_EL1 sys_reg(2, 1, 9, 0, 0)
+#define SYS_BRBFCR_EL1 sys_reg(2, 1, 9, 0, 1)
+#define SYS_BRBIDR0_EL1 sys_reg(2, 1, 9, 2, 0)
+
+#define SYS_TRCITECR_EL1 sys_reg(3, 0, 1, 2, 3)
+#define SYS_TRCACATR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (2 | (m >> 3)))
+#define SYS_TRCACVR(m) sys_reg(2, 1, 2, ((m & 7) << 1), (0 | (m >> 3)))
+#define SYS_TRCAUTHSTATUS sys_reg(2, 1, 7, 14, 6)
+#define SYS_TRCAUXCTLR sys_reg(2, 1, 0, 6, 0)
+#define SYS_TRCBBCTLR sys_reg(2, 1, 0, 15, 0)
+#define SYS_TRCCCCTLR sys_reg(2, 1, 0, 14, 0)
+#define SYS_TRCCIDCCTLR0 sys_reg(2, 1, 3, 0, 2)
+#define SYS_TRCCIDCCTLR1 sys_reg(2, 1, 3, 1, 2)
+#define SYS_TRCCIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 0)
+#define SYS_TRCCLAIMCLR sys_reg(2, 1, 7, 9, 6)
+#define SYS_TRCCLAIMSET sys_reg(2, 1, 7, 8, 6)
+#define SYS_TRCCNTCTLR(m) sys_reg(2, 1, 0, (4 | (m & 3)), 5)
+#define SYS_TRCCNTRLDVR(m) sys_reg(2, 1, 0, (0 | (m & 3)), 5)
+#define SYS_TRCCNTVR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 5)
+#define SYS_TRCCONFIGR sys_reg(2, 1, 0, 4, 0)
+#define SYS_TRCDEVARCH sys_reg(2, 1, 7, 15, 6)
+#define SYS_TRCDEVID sys_reg(2, 1, 7, 2, 7)
+#define SYS_TRCEVENTCTL0R sys_reg(2, 1, 0, 8, 0)
+#define SYS_TRCEVENTCTL1R sys_reg(2, 1, 0, 9, 0)
+#define SYS_TRCEXTINSELR(m) sys_reg(2, 1, 0, (8 | (m & 3)), 4)
+#define SYS_TRCIDR0 sys_reg(2, 1, 0, 8, 7)
+#define SYS_TRCIDR10 sys_reg(2, 1, 0, 2, 6)
+#define SYS_TRCIDR11 sys_reg(2, 1, 0, 3, 6)
+#define SYS_TRCIDR12 sys_reg(2, 1, 0, 4, 6)
+#define SYS_TRCIDR13 sys_reg(2, 1, 0, 5, 6)
+#define SYS_TRCIDR1 sys_reg(2, 1, 0, 9, 7)
+#define SYS_TRCIDR2 sys_reg(2, 1, 0, 10, 7)
+#define SYS_TRCIDR3 sys_reg(2, 1, 0, 11, 7)
+#define SYS_TRCIDR4 sys_reg(2, 1, 0, 12, 7)
+#define SYS_TRCIDR5 sys_reg(2, 1, 0, 13, 7)
+#define SYS_TRCIDR6 sys_reg(2, 1, 0, 14, 7)
+#define SYS_TRCIDR7 sys_reg(2, 1, 0, 15, 7)
+#define SYS_TRCIDR8 sys_reg(2, 1, 0, 0, 6)
+#define SYS_TRCIDR9 sys_reg(2, 1, 0, 1, 6)
+#define SYS_TRCIMSPEC(m) sys_reg(2, 1, 0, (m & 7), 7)
+#define SYS_TRCITEEDCR sys_reg(2, 1, 0, 2, 1)
+#define SYS_TRCOSLSR sys_reg(2, 1, 1, 1, 4)
+#define SYS_TRCPRGCTLR sys_reg(2, 1, 0, 1, 0)
+#define SYS_TRCQCTLR sys_reg(2, 1, 0, 1, 1)
+#define SYS_TRCRSCTLR(m) sys_reg(2, 1, 1, (m & 15), (0 | (m >> 4)))
+#define SYS_TRCRSR sys_reg(2, 1, 0, 10, 0)
+#define SYS_TRCSEQEVR(m) sys_reg(2, 1, 0, (m & 3), 4)
+#define SYS_TRCSEQRSTEVR sys_reg(2, 1, 0, 6, 4)
+#define SYS_TRCSEQSTR sys_reg(2, 1, 0, 7, 4)
+#define SYS_TRCSSCCR(m) sys_reg(2, 1, 1, (m & 7), 2)
+#define SYS_TRCSSCSR(m) sys_reg(2, 1, 1, (8 | (m & 7)), 2)
+#define SYS_TRCSSPCICR(m) sys_reg(2, 1, 1, (m & 7), 3)
+#define SYS_TRCSTALLCTLR sys_reg(2, 1, 0, 11, 0)
+#define SYS_TRCSTATR sys_reg(2, 1, 0, 3, 0)
+#define SYS_TRCSYNCPR sys_reg(2, 1, 0, 13, 0)
+#define SYS_TRCTRACEIDR sys_reg(2, 1, 0, 0, 1)
+#define SYS_TRCTSCTLR sys_reg(2, 1, 0, 12, 0)
+#define SYS_TRCVICTLR sys_reg(2, 1, 0, 0, 2)
+#define SYS_TRCVIIECTLR sys_reg(2, 1, 0, 1, 2)
+#define SYS_TRCVIPCSSCTLR sys_reg(2, 1, 0, 3, 2)
+#define SYS_TRCVISSCTLR sys_reg(2, 1, 0, 2, 2)
+#define SYS_TRCVMIDCCTLR0 sys_reg(2, 1, 3, 2, 2)
+#define SYS_TRCVMIDCCTLR1 sys_reg(2, 1, 3, 3, 2)
+#define SYS_TRCVMIDCVR(m) sys_reg(2, 1, 3, ((m & 7) << 1), 1)
+
+/* ETM */
+#define SYS_TRCOSLAR sys_reg(2, 1, 1, 0, 4)
+
#define SYS_MIDR_EL1 sys_reg(3, 0, 0, 0, 0)
#define SYS_MPIDR_EL1 sys_reg(3, 0, 0, 0, 5)
#define SYS_REVIDR_EL1 sys_reg(3, 0, 0, 0, 6)
#define SYS_ERXCTLR_EL1 sys_reg(3, 0, 5, 4, 1)
#define SYS_ERXSTATUS_EL1 sys_reg(3, 0, 5, 4, 2)
#define SYS_ERXADDR_EL1 sys_reg(3, 0, 5, 4, 3)
+#define SYS_ERXPFGF_EL1 sys_reg(3, 0, 5, 4, 4)
+#define SYS_ERXPFGCTL_EL1 sys_reg(3, 0, 5, 4, 5)
+#define SYS_ERXPFGCDN_EL1 sys_reg(3, 0, 5, 4, 6)
#define SYS_ERXMISC0_EL1 sys_reg(3, 0, 5, 5, 0)
#define SYS_ERXMISC1_EL1 sys_reg(3, 0, 5, 5, 1)
+#define SYS_ERXMISC2_EL1 sys_reg(3, 0, 5, 5, 2)
+#define SYS_ERXMISC3_EL1 sys_reg(3, 0, 5, 5, 3)
#define SYS_TFSR_EL1 sys_reg(3, 0, 5, 6, 0)
#define SYS_TFSRE0_EL1 sys_reg(3, 0, 5, 6, 1)
#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6)
#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define SYS_ACCDATA_EL1 sys_reg(3, 0, 13, 0, 5)
+
#define SYS_CNTKCTL_EL1 sys_reg(3, 0, 14, 1, 0)
#define SYS_AIDR_EL1 sys_reg(3, 1, 0, 0, 7)
#define SYS_VTCR_EL2 sys_reg(3, 4, 2, 1, 2)
#define SYS_TRFCR_EL2 sys_reg(3, 4, 1, 2, 1)
-#define SYS_HDFGRTR_EL2 sys_reg(3, 4, 3, 1, 4)
-#define SYS_HDFGWTR_EL2 sys_reg(3, 4, 3, 1, 5)
#define SYS_HAFGRTR_EL2 sys_reg(3, 4, 3, 1, 6)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_SP_EL2 sys_reg(3, 6, 4, 1, 0)
+/* AT instructions */
+#define AT_Op0 1
+#define AT_CRn 7
+
+#define OP_AT_S1E1R sys_insn(AT_Op0, 0, AT_CRn, 8, 0)
+#define OP_AT_S1E1W sys_insn(AT_Op0, 0, AT_CRn, 8, 1)
+#define OP_AT_S1E0R sys_insn(AT_Op0, 0, AT_CRn, 8, 2)
+#define OP_AT_S1E0W sys_insn(AT_Op0, 0, AT_CRn, 8, 3)
+#define OP_AT_S1E1RP sys_insn(AT_Op0, 0, AT_CRn, 9, 0)
+#define OP_AT_S1E1WP sys_insn(AT_Op0, 0, AT_CRn, 9, 1)
+#define OP_AT_S1E2R sys_insn(AT_Op0, 4, AT_CRn, 8, 0)
+#define OP_AT_S1E2W sys_insn(AT_Op0, 4, AT_CRn, 8, 1)
+#define OP_AT_S12E1R sys_insn(AT_Op0, 4, AT_CRn, 8, 4)
+#define OP_AT_S12E1W sys_insn(AT_Op0, 4, AT_CRn, 8, 5)
+#define OP_AT_S12E0R sys_insn(AT_Op0, 4, AT_CRn, 8, 6)
+#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
+
+/* TLBI instructions */
+#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
+#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
+#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)
+#define OP_TLBI_VAAE1OS sys_insn(1, 0, 8, 1, 3)
+#define OP_TLBI_VALE1OS sys_insn(1, 0, 8, 1, 5)
+#define OP_TLBI_VAALE1OS sys_insn(1, 0, 8, 1, 7)
+#define OP_TLBI_RVAE1IS sys_insn(1, 0, 8, 2, 1)
+#define OP_TLBI_RVAAE1IS sys_insn(1, 0, 8, 2, 3)
+#define OP_TLBI_RVALE1IS sys_insn(1, 0, 8, 2, 5)
+#define OP_TLBI_RVAALE1IS sys_insn(1, 0, 8, 2, 7)
+#define OP_TLBI_VMALLE1IS sys_insn(1, 0, 8, 3, 0)
+#define OP_TLBI_VAE1IS sys_insn(1, 0, 8, 3, 1)
+#define OP_TLBI_ASIDE1IS sys_insn(1, 0, 8, 3, 2)
+#define OP_TLBI_VAAE1IS sys_insn(1, 0, 8, 3, 3)
+#define OP_TLBI_VALE1IS sys_insn(1, 0, 8, 3, 5)
+#define OP_TLBI_VAALE1IS sys_insn(1, 0, 8, 3, 7)
+#define OP_TLBI_RVAE1OS sys_insn(1, 0, 8, 5, 1)
+#define OP_TLBI_RVAAE1OS sys_insn(1, 0, 8, 5, 3)
+#define OP_TLBI_RVALE1OS sys_insn(1, 0, 8, 5, 5)
+#define OP_TLBI_RVAALE1OS sys_insn(1, 0, 8, 5, 7)
+#define OP_TLBI_RVAE1 sys_insn(1, 0, 8, 6, 1)
+#define OP_TLBI_RVAAE1 sys_insn(1, 0, 8, 6, 3)
+#define OP_TLBI_RVALE1 sys_insn(1, 0, 8, 6, 5)
+#define OP_TLBI_RVAALE1 sys_insn(1, 0, 8, 6, 7)
+#define OP_TLBI_VMALLE1 sys_insn(1, 0, 8, 7, 0)
+#define OP_TLBI_VAE1 sys_insn(1, 0, 8, 7, 1)
+#define OP_TLBI_ASIDE1 sys_insn(1, 0, 8, 7, 2)
+#define OP_TLBI_VAAE1 sys_insn(1, 0, 8, 7, 3)
+#define OP_TLBI_VALE1 sys_insn(1, 0, 8, 7, 5)
+#define OP_TLBI_VAALE1 sys_insn(1, 0, 8, 7, 7)
+#define OP_TLBI_VMALLE1OSNXS sys_insn(1, 0, 9, 1, 0)
+#define OP_TLBI_VAE1OSNXS sys_insn(1, 0, 9, 1, 1)
+#define OP_TLBI_ASIDE1OSNXS sys_insn(1, 0, 9, 1, 2)
+#define OP_TLBI_VAAE1OSNXS sys_insn(1, 0, 9, 1, 3)
+#define OP_TLBI_VALE1OSNXS sys_insn(1, 0, 9, 1, 5)
+#define OP_TLBI_VAALE1OSNXS sys_insn(1, 0, 9, 1, 7)
+#define OP_TLBI_RVAE1ISNXS sys_insn(1, 0, 9, 2, 1)
+#define OP_TLBI_RVAAE1ISNXS sys_insn(1, 0, 9, 2, 3)
+#define OP_TLBI_RVALE1ISNXS sys_insn(1, 0, 9, 2, 5)
+#define OP_TLBI_RVAALE1ISNXS sys_insn(1, 0, 9, 2, 7)
+#define OP_TLBI_VMALLE1ISNXS sys_insn(1, 0, 9, 3, 0)
+#define OP_TLBI_VAE1ISNXS sys_insn(1, 0, 9, 3, 1)
+#define OP_TLBI_ASIDE1ISNXS sys_insn(1, 0, 9, 3, 2)
+#define OP_TLBI_VAAE1ISNXS sys_insn(1, 0, 9, 3, 3)
+#define OP_TLBI_VALE1ISNXS sys_insn(1, 0, 9, 3, 5)
+#define OP_TLBI_VAALE1ISNXS sys_insn(1, 0, 9, 3, 7)
+#define OP_TLBI_RVAE1OSNXS sys_insn(1, 0, 9, 5, 1)
+#define OP_TLBI_RVAAE1OSNXS sys_insn(1, 0, 9, 5, 3)
+#define OP_TLBI_RVALE1OSNXS sys_insn(1, 0, 9, 5, 5)
+#define OP_TLBI_RVAALE1OSNXS sys_insn(1, 0, 9, 5, 7)
+#define OP_TLBI_RVAE1NXS sys_insn(1, 0, 9, 6, 1)
+#define OP_TLBI_RVAAE1NXS sys_insn(1, 0, 9, 6, 3)
+#define OP_TLBI_RVALE1NXS sys_insn(1, 0, 9, 6, 5)
+#define OP_TLBI_RVAALE1NXS sys_insn(1, 0, 9, 6, 7)
+#define OP_TLBI_VMALLE1NXS sys_insn(1, 0, 9, 7, 0)
+#define OP_TLBI_VAE1NXS sys_insn(1, 0, 9, 7, 1)
+#define OP_TLBI_ASIDE1NXS sys_insn(1, 0, 9, 7, 2)
+#define OP_TLBI_VAAE1NXS sys_insn(1, 0, 9, 7, 3)
+#define OP_TLBI_VALE1NXS sys_insn(1, 0, 9, 7, 5)
+#define OP_TLBI_VAALE1NXS sys_insn(1, 0, 9, 7, 7)
+#define OP_TLBI_IPAS2E1IS sys_insn(1, 4, 8, 0, 1)
+#define OP_TLBI_RIPAS2E1IS sys_insn(1, 4, 8, 0, 2)
+#define OP_TLBI_IPAS2LE1IS sys_insn(1, 4, 8, 0, 5)
+#define OP_TLBI_RIPAS2LE1IS sys_insn(1, 4, 8, 0, 6)
+#define OP_TLBI_ALLE2OS sys_insn(1, 4, 8, 1, 0)
+#define OP_TLBI_VAE2OS sys_insn(1, 4, 8, 1, 1)
+#define OP_TLBI_ALLE1OS sys_insn(1, 4, 8, 1, 4)
+#define OP_TLBI_VALE2OS sys_insn(1, 4, 8, 1, 5)
+#define OP_TLBI_VMALLS12E1OS sys_insn(1, 4, 8, 1, 6)
+#define OP_TLBI_RVAE2IS sys_insn(1, 4, 8, 2, 1)
+#define OP_TLBI_RVALE2IS sys_insn(1, 4, 8, 2, 5)
+#define OP_TLBI_ALLE2IS sys_insn(1, 4, 8, 3, 0)
+#define OP_TLBI_VAE2IS sys_insn(1, 4, 8, 3, 1)
+#define OP_TLBI_ALLE1IS sys_insn(1, 4, 8, 3, 4)
+#define OP_TLBI_VALE2IS sys_insn(1, 4, 8, 3, 5)
+#define OP_TLBI_VMALLS12E1IS sys_insn(1, 4, 8, 3, 6)
+#define OP_TLBI_IPAS2E1OS sys_insn(1, 4, 8, 4, 0)
+#define OP_TLBI_IPAS2E1 sys_insn(1, 4, 8, 4, 1)
+#define OP_TLBI_RIPAS2E1 sys_insn(1, 4, 8, 4, 2)
+#define OP_TLBI_RIPAS2E1OS sys_insn(1, 4, 8, 4, 3)
+#define OP_TLBI_IPAS2LE1OS sys_insn(1, 4, 8, 4, 4)
+#define OP_TLBI_IPAS2LE1 sys_insn(1, 4, 8, 4, 5)
+#define OP_TLBI_RIPAS2LE1 sys_insn(1, 4, 8, 4, 6)
+#define OP_TLBI_RIPAS2LE1OS sys_insn(1, 4, 8, 4, 7)
+#define OP_TLBI_RVAE2OS sys_insn(1, 4, 8, 5, 1)
+#define OP_TLBI_RVALE2OS sys_insn(1, 4, 8, 5, 5)
+#define OP_TLBI_RVAE2 sys_insn(1, 4, 8, 6, 1)
+#define OP_TLBI_RVALE2 sys_insn(1, 4, 8, 6, 5)
+#define OP_TLBI_ALLE2 sys_insn(1, 4, 8, 7, 0)
+#define OP_TLBI_VAE2 sys_insn(1, 4, 8, 7, 1)
+#define OP_TLBI_ALLE1 sys_insn(1, 4, 8, 7, 4)
+#define OP_TLBI_VALE2 sys_insn(1, 4, 8, 7, 5)
+#define OP_TLBI_VMALLS12E1 sys_insn(1, 4, 8, 7, 6)
+#define OP_TLBI_IPAS2E1ISNXS sys_insn(1, 4, 9, 0, 1)
+#define OP_TLBI_RIPAS2E1ISNXS sys_insn(1, 4, 9, 0, 2)
+#define OP_TLBI_IPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 5)
+#define OP_TLBI_RIPAS2LE1ISNXS sys_insn(1, 4, 9, 0, 6)
+#define OP_TLBI_ALLE2OSNXS sys_insn(1, 4, 9, 1, 0)
+#define OP_TLBI_VAE2OSNXS sys_insn(1, 4, 9, 1, 1)
+#define OP_TLBI_ALLE1OSNXS sys_insn(1, 4, 9, 1, 4)
+#define OP_TLBI_VALE2OSNXS sys_insn(1, 4, 9, 1, 5)
+#define OP_TLBI_VMALLS12E1OSNXS sys_insn(1, 4, 9, 1, 6)
+#define OP_TLBI_RVAE2ISNXS sys_insn(1, 4, 9, 2, 1)
+#define OP_TLBI_RVALE2ISNXS sys_insn(1, 4, 9, 2, 5)
+#define OP_TLBI_ALLE2ISNXS sys_insn(1, 4, 9, 3, 0)
+#define OP_TLBI_VAE2ISNXS sys_insn(1, 4, 9, 3, 1)
+#define OP_TLBI_ALLE1ISNXS sys_insn(1, 4, 9, 3, 4)
+#define OP_TLBI_VALE2ISNXS sys_insn(1, 4, 9, 3, 5)
+#define OP_TLBI_VMALLS12E1ISNXS sys_insn(1, 4, 9, 3, 6)
+#define OP_TLBI_IPAS2E1OSNXS sys_insn(1, 4, 9, 4, 0)
+#define OP_TLBI_IPAS2E1NXS sys_insn(1, 4, 9, 4, 1)
+#define OP_TLBI_RIPAS2E1NXS sys_insn(1, 4, 9, 4, 2)
+#define OP_TLBI_RIPAS2E1OSNXS sys_insn(1, 4, 9, 4, 3)
+#define OP_TLBI_IPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 4)
+#define OP_TLBI_IPAS2LE1NXS sys_insn(1, 4, 9, 4, 5)
+#define OP_TLBI_RIPAS2LE1NXS sys_insn(1, 4, 9, 4, 6)
+#define OP_TLBI_RIPAS2LE1OSNXS sys_insn(1, 4, 9, 4, 7)
+#define OP_TLBI_RVAE2OSNXS sys_insn(1, 4, 9, 5, 1)
+#define OP_TLBI_RVALE2OSNXS sys_insn(1, 4, 9, 5, 5)
+#define OP_TLBI_RVAE2NXS sys_insn(1, 4, 9, 6, 1)
+#define OP_TLBI_RVALE2NXS sys_insn(1, 4, 9, 6, 5)
+#define OP_TLBI_ALLE2NXS sys_insn(1, 4, 9, 7, 0)
+#define OP_TLBI_VAE2NXS sys_insn(1, 4, 9, 7, 1)
+#define OP_TLBI_ALLE1NXS sys_insn(1, 4, 9, 7, 4)
+#define OP_TLBI_VALE2NXS sys_insn(1, 4, 9, 7, 5)
+#define OP_TLBI_VMALLS12E1NXS sys_insn(1, 4, 9, 7, 6)
+
+/* Misc instructions */
+#define OP_BRB_IALL sys_insn(1, 1, 7, 2, 4)
+#define OP_BRB_INJ sys_insn(1, 1, 7, 2, 5)
+#define OP_CFP_RCTX sys_insn(1, 3, 7, 3, 4)
+#define OP_DVP_RCTX sys_insn(1, 3, 7, 3, 5)
+#define OP_CPP_RCTX sys_insn(1, 3, 7, 3, 7)
+
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_ENTP2 (BIT(60))
#define SCTLR_ELx_DSSBS (BIT(44))
*/
#define MAX_TLBI_OPS PTRS_PER_PTE
+/*
+ * __flush_tlb_range_op - Perform TLBI operation upon a range
+ *
+ * @op: TLBI instruction that operates on a range (has 'r' prefix)
+ * @start: The start address of the range
+ * @pages: Range as the number of pages from 'start'
+ * @stride: Flush granularity
+ * @asid: The ASID of the task (0 for IPA instructions)
+ * @tlb_level: Translation Table level hint, if known
+ * @tlbi_user: If 'true', call an additional __tlbi_user()
+ * (typically for user ASIDs). 'flase' for IPA instructions
+ *
+ * When the CPU does not support TLB range operations, flush the TLB
+ * entries one by one at the granularity of 'stride'. If the TLB
+ * range ops are supported, then:
+ *
+ * 1. If 'pages' is odd, flush the first page through non-range
+ * operations;
+ *
+ * 2. For remaining pages: the minimum range granularity is decided
+ * by 'scale', so multiple range TLBI operations may be required.
+ * Start from scale = 0, flush the corresponding number of pages
+ * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
+ * until no pages left.
+ *
+ * Note that certain ranges can be represented by either num = 31 and
+ * scale or num = 0 and scale + 1. The loop below favours the latter
+ * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
+ */
+#define __flush_tlb_range_op(op, start, pages, stride, \
+ asid, tlb_level, tlbi_user) \
+do { \
+ int num = 0; \
+ int scale = 0; \
+ unsigned long addr; \
+ \
+ while (pages > 0) { \
+ if (!system_supports_tlb_range() || \
+ pages % 2 == 1) { \
+ addr = __TLBI_VADDR(start, asid); \
+ __tlbi_level(op, addr, tlb_level); \
+ if (tlbi_user) \
+ __tlbi_user_level(op, addr, tlb_level); \
+ start += stride; \
+ pages -= stride >> PAGE_SHIFT; \
+ continue; \
+ } \
+ \
+ num = __TLBI_RANGE_NUM(pages, scale); \
+ if (num >= 0) { \
+ addr = __TLBI_VADDR_RANGE(start, asid, scale, \
+ num, tlb_level); \
+ __tlbi(r##op, addr); \
+ if (tlbi_user) \
+ __tlbi_user(r##op, addr); \
+ start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; \
+ pages -= __TLBI_RANGE_PAGES(num, scale); \
+ } \
+ scale++; \
+ } \
+} while (0)
+
+#define __flush_s2_tlb_range_op(op, start, pages, stride, tlb_level) \
+ __flush_tlb_range_op(op, start, pages, stride, 0, tlb_level, false)
+
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
unsigned long stride, bool last_level,
int tlb_level)
{
- int num = 0;
- int scale = 0;
- unsigned long asid, addr, pages;
+ unsigned long asid, pages;
start = round_down(start, stride);
end = round_up(end, stride);
dsb(ishst);
asid = ASID(vma->vm_mm);
- /*
- * When the CPU does not support TLB range operations, flush the TLB
- * entries one by one at the granularity of 'stride'. If the TLB
- * range ops are supported, then:
- *
- * 1. If 'pages' is odd, flush the first page through non-range
- * operations;
- *
- * 2. For remaining pages: the minimum range granularity is decided
- * by 'scale', so multiple range TLBI operations may be required.
- * Start from scale = 0, flush the corresponding number of pages
- * ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
- * until no pages left.
- *
- * Note that certain ranges can be represented by either num = 31 and
- * scale or num = 0 and scale + 1. The loop below favours the latter
- * since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
- */
- while (pages > 0) {
- if (!system_supports_tlb_range() ||
- pages % 2 == 1) {
- addr = __TLBI_VADDR(start, asid);
- if (last_level) {
- __tlbi_level(vale1is, addr, tlb_level);
- __tlbi_user_level(vale1is, addr, tlb_level);
- } else {
- __tlbi_level(vae1is, addr, tlb_level);
- __tlbi_user_level(vae1is, addr, tlb_level);
- }
- start += stride;
- pages -= stride >> PAGE_SHIFT;
- continue;
- }
-
- num = __TLBI_RANGE_NUM(pages, scale);
- if (num >= 0) {
- addr = __TLBI_VADDR_RANGE(start, asid, scale,
- num, tlb_level);
- if (last_level) {
- __tlbi(rvale1is, addr);
- __tlbi_user(rvale1is, addr);
- } else {
- __tlbi(rvae1is, addr);
- __tlbi_user(rvae1is, addr);
- }
- start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
- pages -= __TLBI_RANGE_PAGES(num, scale);
- }
- scale++;
- }
+ if (last_level)
+ __flush_tlb_range_op(vale1is, start, pages, stride, asid, tlb_level, true);
+ else
+ __flush_tlb_range_op(vae1is, start, pages, stride, asid, tlb_level, true);
+
dsb(ish);
mmu_notifier_arch_invalidate_secondary_tlbs(vma->vm_mm, start, end);
}
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LRCPC, IMP)
},
+ {
+ .desc = "Fine Grained Traps",
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .capability = ARM64_HAS_FGT,
+ .matches = has_cpuid_feature,
+ ARM64_CPUID_FIELDS(ID_AA64MMFR0_EL1, FGT, IMP)
+ },
#ifdef CONFIG_ARM64_SME
{
.desc = "Scalable Matrix Extension",
if (!len)
return;
- len = strscpy(buf, cmdline, ARRAY_SIZE(buf));
- if (len == -E2BIG)
- len = ARRAY_SIZE(buf) - 1;
+ len = min(len, ARRAY_SIZE(buf) - 1);
+ memcpy(buf, cmdline, len);
+ buf[len] = '\0';
if (strcmp(buf, "--") == 0)
return;
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
- select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
select SCHED_INFO
select GUEST_PERF_EVENTS if PERF_EVENTS
select INTERVAL_TREE
+ select XARRAY_MULTI
help
Support hosting virtualized guest machines.
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
+#include <asm/kvm_nested.h>
#include <asm/kvm_pkvm.h>
#include <asm/kvm_emulate.h>
#include <asm/sections.h>
#endif
/* Force users to call KVM_ARM_VCPU_INIT */
- vcpu->arch.target = -1;
+ vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
vcpu_ptrauth_disable(vcpu);
kvm_arch_vcpu_load_debug_state_flags(vcpu);
- if (!cpumask_test_cpu(smp_processor_id(), vcpu->kvm->arch.supported_cpus))
+ if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
- return vcpu->arch.target >= 0;
+ return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
}
/*
kvm_pmu_handle_pmcr(vcpu,
__vcpu_sys_reg(vcpu, PMCR_EL0));
+ if (kvm_check_request(KVM_REQ_RESYNC_PMU_EL0, vcpu))
+ kvm_vcpu_pmu_restore_guest(vcpu);
+
if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
return kvm_vcpu_suspend(vcpu);
if (likely(!vcpu_mode_is_32bit(vcpu)))
return false;
+ if (vcpu_has_nv(vcpu))
+ return true;
+
return !kvm_supports_32bit_el0();
}
* invalid. The VMM can try and fix it by issuing a
* KVM_ARM_VCPU_INIT if it really wants to.
*/
- vcpu->arch.target = -1;
+ vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
ret = ARM_EXCEPTION_IL;
}
{
unsigned long features = init->features[0];
- return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES) ||
- vcpu->arch.target != init->target;
+ return !bitmap_equal(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
}
static int __kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
!bitmap_equal(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES))
goto out_unlock;
- vcpu->arch.target = init->target;
bitmap_copy(vcpu->arch.features, &features, KVM_VCPU_MAX_FEATURES);
/* Now we know what it is, we can reset it. */
ret = kvm_reset_vcpu(vcpu);
if (ret) {
- vcpu->arch.target = -1;
bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
goto out_unlock;
}
bitmap_copy(kvm->arch.vcpu_features, &features, KVM_VCPU_MAX_FEATURES);
set_bit(KVM_ARCH_FLAG_VCPU_FEATURES_CONFIGURED, &kvm->arch.flags);
-
+ vcpu_set_flag(vcpu, VCPU_INITIALIZED);
out_unlock:
mutex_unlock(&kvm->arch.config_lock);
return ret;
{
int ret;
- if (init->target != kvm_target_cpu())
+ if (init->target != KVM_ARM_TARGET_GENERIC_V8 &&
+ init->target != kvm_target_cpu())
return -EINVAL;
ret = kvm_vcpu_init_check_features(vcpu, init);
if (ret)
return ret;
- if (vcpu->arch.target == -1)
+ if (!kvm_vcpu_initialized(vcpu))
return __kvm_vcpu_set_target(vcpu, init);
if (kvm_vcpu_init_changed(vcpu, init))
}
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
-{
- kvm_flush_remote_tlbs(kvm);
-}
-
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr);
}
case KVM_ARM_PREFERRED_TARGET: {
- struct kvm_vcpu_init init;
-
- kvm_vcpu_preferred_target(&init);
+ struct kvm_vcpu_init init = {
+ .target = KVM_ARM_TARGET_GENERIC_V8,
+ };
if (copy_to_user(argp, &init, sizeof(init)))
return -EFAULT;
for_each_possible_cpu(cpu) {
struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu);
- unsigned long hyp_addr;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- err = hyp_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
- if (err) {
- kvm_err("Cannot allocate hyp stack guard page\n");
- goto out_err;
- }
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- err = __create_hyp_mappings(hyp_addr + PAGE_SIZE, PAGE_SIZE,
- __pa(stack_page), PAGE_HYP);
+ err = create_hyp_stack(__pa(stack_page), ¶ms->stack_hyp_va);
if (err) {
kvm_err("Cannot map hyp stack\n");
goto out_err;
* has been mapped in the flexible private VA space.
*/
params->stack_pa = __pa(stack_page);
-
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
for_each_possible_cpu(cpu) {
#include "trace.h"
+enum trap_behaviour {
+ BEHAVE_HANDLE_LOCALLY = 0,
+ BEHAVE_FORWARD_READ = BIT(0),
+ BEHAVE_FORWARD_WRITE = BIT(1),
+ BEHAVE_FORWARD_ANY = BEHAVE_FORWARD_READ | BEHAVE_FORWARD_WRITE,
+};
+
+struct trap_bits {
+ const enum vcpu_sysreg index;
+ const enum trap_behaviour behaviour;
+ const u64 value;
+ const u64 mask;
+};
+
+/* Coarse Grained Trap definitions */
+enum cgt_group_id {
+ /* Indicates no coarse trap control */
+ __RESERVED__,
+
+ /*
+ * The first batch of IDs denote coarse trapping that are used
+ * on their own instead of being part of a combination of
+ * trap controls.
+ */
+ CGT_HCR_TID1,
+ CGT_HCR_TID2,
+ CGT_HCR_TID3,
+ CGT_HCR_IMO,
+ CGT_HCR_FMO,
+ CGT_HCR_TIDCP,
+ CGT_HCR_TACR,
+ CGT_HCR_TSW,
+ CGT_HCR_TPC,
+ CGT_HCR_TPU,
+ CGT_HCR_TTLB,
+ CGT_HCR_TVM,
+ CGT_HCR_TDZ,
+ CGT_HCR_TRVM,
+ CGT_HCR_TLOR,
+ CGT_HCR_TERR,
+ CGT_HCR_APK,
+ CGT_HCR_NV,
+ CGT_HCR_NV_nNV2,
+ CGT_HCR_NV1_nNV2,
+ CGT_HCR_AT,
+ CGT_HCR_nFIEN,
+ CGT_HCR_TID4,
+ CGT_HCR_TICAB,
+ CGT_HCR_TOCU,
+ CGT_HCR_ENSCXT,
+ CGT_HCR_TTLBIS,
+ CGT_HCR_TTLBOS,
+
+ CGT_MDCR_TPMCR,
+ CGT_MDCR_TPM,
+ CGT_MDCR_TDE,
+ CGT_MDCR_TDA,
+ CGT_MDCR_TDOSA,
+ CGT_MDCR_TDRA,
+ CGT_MDCR_E2PB,
+ CGT_MDCR_TPMS,
+ CGT_MDCR_TTRF,
+ CGT_MDCR_E2TB,
+ CGT_MDCR_TDCC,
+
+ /*
+ * Anything after this point is a combination of coarse trap
+ * controls, which must all be evaluated to decide what to do.
+ */
+ __MULTIPLE_CONTROL_BITS__,
+ CGT_HCR_IMO_FMO = __MULTIPLE_CONTROL_BITS__,
+ CGT_HCR_TID2_TID4,
+ CGT_HCR_TTLB_TTLBIS,
+ CGT_HCR_TTLB_TTLBOS,
+ CGT_HCR_TVM_TRVM,
+ CGT_HCR_TPU_TICAB,
+ CGT_HCR_TPU_TOCU,
+ CGT_HCR_NV1_nNV2_ENSCXT,
+ CGT_MDCR_TPM_TPMCR,
+ CGT_MDCR_TDE_TDA,
+ CGT_MDCR_TDE_TDOSA,
+ CGT_MDCR_TDE_TDRA,
+ CGT_MDCR_TDCC_TDE_TDA,
+
+ /*
+ * Anything after this point requires a callback evaluating a
+ * complex trap condition. Ugly stuff.
+ */
+ __COMPLEX_CONDITIONS__,
+ CGT_CNTHCTL_EL1PCTEN = __COMPLEX_CONDITIONS__,
+ CGT_CNTHCTL_EL1PTEN,
+
+ /* Must be last */
+ __NR_CGT_GROUP_IDS__
+};
+
+static const struct trap_bits coarse_trap_bits[] = {
+ [CGT_HCR_TID1] = {
+ .index = HCR_EL2,
+ .value = HCR_TID1,
+ .mask = HCR_TID1,
+ .behaviour = BEHAVE_FORWARD_READ,
+ },
+ [CGT_HCR_TID2] = {
+ .index = HCR_EL2,
+ .value = HCR_TID2,
+ .mask = HCR_TID2,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TID3] = {
+ .index = HCR_EL2,
+ .value = HCR_TID3,
+ .mask = HCR_TID3,
+ .behaviour = BEHAVE_FORWARD_READ,
+ },
+ [CGT_HCR_IMO] = {
+ .index = HCR_EL2,
+ .value = HCR_IMO,
+ .mask = HCR_IMO,
+ .behaviour = BEHAVE_FORWARD_WRITE,
+ },
+ [CGT_HCR_FMO] = {
+ .index = HCR_EL2,
+ .value = HCR_FMO,
+ .mask = HCR_FMO,
+ .behaviour = BEHAVE_FORWARD_WRITE,
+ },
+ [CGT_HCR_TIDCP] = {
+ .index = HCR_EL2,
+ .value = HCR_TIDCP,
+ .mask = HCR_TIDCP,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TACR] = {
+ .index = HCR_EL2,
+ .value = HCR_TACR,
+ .mask = HCR_TACR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TSW] = {
+ .index = HCR_EL2,
+ .value = HCR_TSW,
+ .mask = HCR_TSW,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TPC] = { /* Also called TCPC when FEAT_DPB is implemented */
+ .index = HCR_EL2,
+ .value = HCR_TPC,
+ .mask = HCR_TPC,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TPU] = {
+ .index = HCR_EL2,
+ .value = HCR_TPU,
+ .mask = HCR_TPU,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TTLB] = {
+ .index = HCR_EL2,
+ .value = HCR_TTLB,
+ .mask = HCR_TTLB,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TVM] = {
+ .index = HCR_EL2,
+ .value = HCR_TVM,
+ .mask = HCR_TVM,
+ .behaviour = BEHAVE_FORWARD_WRITE,
+ },
+ [CGT_HCR_TDZ] = {
+ .index = HCR_EL2,
+ .value = HCR_TDZ,
+ .mask = HCR_TDZ,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TRVM] = {
+ .index = HCR_EL2,
+ .value = HCR_TRVM,
+ .mask = HCR_TRVM,
+ .behaviour = BEHAVE_FORWARD_READ,
+ },
+ [CGT_HCR_TLOR] = {
+ .index = HCR_EL2,
+ .value = HCR_TLOR,
+ .mask = HCR_TLOR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TERR] = {
+ .index = HCR_EL2,
+ .value = HCR_TERR,
+ .mask = HCR_TERR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_APK] = {
+ .index = HCR_EL2,
+ .value = 0,
+ .mask = HCR_APK,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_NV] = {
+ .index = HCR_EL2,
+ .value = HCR_NV,
+ .mask = HCR_NV,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_NV_nNV2] = {
+ .index = HCR_EL2,
+ .value = HCR_NV,
+ .mask = HCR_NV | HCR_NV2,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_NV1_nNV2] = {
+ .index = HCR_EL2,
+ .value = HCR_NV | HCR_NV1,
+ .mask = HCR_NV | HCR_NV1 | HCR_NV2,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_AT] = {
+ .index = HCR_EL2,
+ .value = HCR_AT,
+ .mask = HCR_AT,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_nFIEN] = {
+ .index = HCR_EL2,
+ .value = 0,
+ .mask = HCR_FIEN,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TID4] = {
+ .index = HCR_EL2,
+ .value = HCR_TID4,
+ .mask = HCR_TID4,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TICAB] = {
+ .index = HCR_EL2,
+ .value = HCR_TICAB,
+ .mask = HCR_TICAB,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TOCU] = {
+ .index = HCR_EL2,
+ .value = HCR_TOCU,
+ .mask = HCR_TOCU,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_ENSCXT] = {
+ .index = HCR_EL2,
+ .value = 0,
+ .mask = HCR_ENSCXT,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TTLBIS] = {
+ .index = HCR_EL2,
+ .value = HCR_TTLBIS,
+ .mask = HCR_TTLBIS,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_HCR_TTLBOS] = {
+ .index = HCR_EL2,
+ .value = HCR_TTLBOS,
+ .mask = HCR_TTLBOS,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TPMCR] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TPMCR,
+ .mask = MDCR_EL2_TPMCR,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TPM] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TPM,
+ .mask = MDCR_EL2_TPM,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TDE] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TDE,
+ .mask = MDCR_EL2_TDE,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TDA] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TDA,
+ .mask = MDCR_EL2_TDA,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TDOSA] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TDOSA,
+ .mask = MDCR_EL2_TDOSA,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TDRA] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TDRA,
+ .mask = MDCR_EL2_TDRA,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_E2PB] = {
+ .index = MDCR_EL2,
+ .value = 0,
+ .mask = BIT(MDCR_EL2_E2PB_SHIFT),
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TPMS] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TPMS,
+ .mask = MDCR_EL2_TPMS,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TTRF] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TTRF,
+ .mask = MDCR_EL2_TTRF,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_E2TB] = {
+ .index = MDCR_EL2,
+ .value = 0,
+ .mask = BIT(MDCR_EL2_E2TB_SHIFT),
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+ [CGT_MDCR_TDCC] = {
+ .index = MDCR_EL2,
+ .value = MDCR_EL2_TDCC,
+ .mask = MDCR_EL2_TDCC,
+ .behaviour = BEHAVE_FORWARD_ANY,
+ },
+};
+
+#define MCB(id, ...) \
+ [id - __MULTIPLE_CONTROL_BITS__] = \
+ (const enum cgt_group_id[]){ \
+ __VA_ARGS__, __RESERVED__ \
+ }
+
+static const enum cgt_group_id *coarse_control_combo[] = {
+ MCB(CGT_HCR_IMO_FMO, CGT_HCR_IMO, CGT_HCR_FMO),
+ MCB(CGT_HCR_TID2_TID4, CGT_HCR_TID2, CGT_HCR_TID4),
+ MCB(CGT_HCR_TTLB_TTLBIS, CGT_HCR_TTLB, CGT_HCR_TTLBIS),
+ MCB(CGT_HCR_TTLB_TTLBOS, CGT_HCR_TTLB, CGT_HCR_TTLBOS),
+ MCB(CGT_HCR_TVM_TRVM, CGT_HCR_TVM, CGT_HCR_TRVM),
+ MCB(CGT_HCR_TPU_TICAB, CGT_HCR_TPU, CGT_HCR_TICAB),
+ MCB(CGT_HCR_TPU_TOCU, CGT_HCR_TPU, CGT_HCR_TOCU),
+ MCB(CGT_HCR_NV1_nNV2_ENSCXT, CGT_HCR_NV1_nNV2, CGT_HCR_ENSCXT),
+ MCB(CGT_MDCR_TPM_TPMCR, CGT_MDCR_TPM, CGT_MDCR_TPMCR),
+ MCB(CGT_MDCR_TDE_TDA, CGT_MDCR_TDE, CGT_MDCR_TDA),
+ MCB(CGT_MDCR_TDE_TDOSA, CGT_MDCR_TDE, CGT_MDCR_TDOSA),
+ MCB(CGT_MDCR_TDE_TDRA, CGT_MDCR_TDE, CGT_MDCR_TDRA),
+ MCB(CGT_MDCR_TDCC_TDE_TDA, CGT_MDCR_TDCC, CGT_MDCR_TDE, CGT_MDCR_TDA),
+};
+
+typedef enum trap_behaviour (*complex_condition_check)(struct kvm_vcpu *);
+
+/*
+ * Warning, maximum confusion ahead.
+ *
+ * When E2H=0, CNTHCTL_EL2[1:0] are defined as EL1PCEN:EL1PCTEN
+ * When E2H=1, CNTHCTL_EL2[11:10] are defined as EL1PTEN:EL1PCTEN
+ *
+ * Note the single letter difference? Yet, the bits have the same
+ * function despite a different layout and a different name.
+ *
+ * We don't try to reconcile this mess. We just use the E2H=0 bits
+ * to generate something that is in the E2H=1 format, and live with
+ * it. You're welcome.
+ */
+static u64 get_sanitized_cnthctl(struct kvm_vcpu *vcpu)
+{
+ u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
+
+ return val & ((CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN) << 10);
+}
+
+static enum trap_behaviour check_cnthctl_el1pcten(struct kvm_vcpu *vcpu)
+{
+ if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCTEN << 10))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_ANY;
+}
+
+static enum trap_behaviour check_cnthctl_el1pten(struct kvm_vcpu *vcpu)
+{
+ if (get_sanitized_cnthctl(vcpu) & (CNTHCTL_EL1PCEN << 10))
+ return BEHAVE_HANDLE_LOCALLY;
+
+ return BEHAVE_FORWARD_ANY;
+}
+
+#define CCC(id, fn) \
+ [id - __COMPLEX_CONDITIONS__] = fn
+
+static const complex_condition_check ccc[] = {
+ CCC(CGT_CNTHCTL_EL1PCTEN, check_cnthctl_el1pcten),
+ CCC(CGT_CNTHCTL_EL1PTEN, check_cnthctl_el1pten),
+};
+
+/*
+ * Bit assignment for the trap controls. We use a 64bit word with the
+ * following layout for each trapped sysreg:
+ *
+ * [9:0] enum cgt_group_id (10 bits)
+ * [13:10] enum fgt_group_id (4 bits)
+ * [19:14] bit number in the FGT register (6 bits)
+ * [20] trap polarity (1 bit)
+ * [25:21] FG filter (5 bits)
+ * [62:26] Unused (37 bits)
+ * [63] RES0 - Must be zero, as lost on insertion in the xarray
+ */
+#define TC_CGT_BITS 10
+#define TC_FGT_BITS 4
+#define TC_FGF_BITS 5
+
+union trap_config {
+ u64 val;
+ struct {
+ unsigned long cgt:TC_CGT_BITS; /* Coarse Grained Trap id */
+ unsigned long fgt:TC_FGT_BITS; /* Fine Grained Trap id */
+ unsigned long bit:6; /* Bit number */
+ unsigned long pol:1; /* Polarity */
+ unsigned long fgf:TC_FGF_BITS; /* Fine Grained Filter */
+ unsigned long unused:37; /* Unused, should be zero */
+ unsigned long mbz:1; /* Must Be Zero */
+ };
+};
+
+struct encoding_to_trap_config {
+ const u32 encoding;
+ const u32 end;
+ const union trap_config tc;
+ const unsigned int line;
+};
+
+#define SR_RANGE_TRAP(sr_start, sr_end, trap_id) \
+ { \
+ .encoding = sr_start, \
+ .end = sr_end, \
+ .tc = { \
+ .cgt = trap_id, \
+ }, \
+ .line = __LINE__, \
+ }
+
+#define SR_TRAP(sr, trap_id) SR_RANGE_TRAP(sr, sr, trap_id)
+
+/*
+ * Map encoding to trap bits for exception reported with EC=0x18.
+ * These must only be evaluated when running a nested hypervisor, but
+ * that the current context is not a hypervisor context. When the
+ * trapped access matches one of the trap controls, the exception is
+ * re-injected in the nested hypervisor.
+ */
+static const struct encoding_to_trap_config encoding_to_cgt[] __initconst = {
+ SR_TRAP(SYS_REVIDR_EL1, CGT_HCR_TID1),
+ SR_TRAP(SYS_AIDR_EL1, CGT_HCR_TID1),
+ SR_TRAP(SYS_SMIDR_EL1, CGT_HCR_TID1),
+ SR_TRAP(SYS_CTR_EL0, CGT_HCR_TID2),
+ SR_TRAP(SYS_CCSIDR_EL1, CGT_HCR_TID2_TID4),
+ SR_TRAP(SYS_CCSIDR2_EL1, CGT_HCR_TID2_TID4),
+ SR_TRAP(SYS_CLIDR_EL1, CGT_HCR_TID2_TID4),
+ SR_TRAP(SYS_CSSELR_EL1, CGT_HCR_TID2_TID4),
+ SR_RANGE_TRAP(SYS_ID_PFR0_EL1,
+ sys_reg(3, 0, 0, 7, 7), CGT_HCR_TID3),
+ SR_TRAP(SYS_ICC_SGI0R_EL1, CGT_HCR_IMO_FMO),
+ SR_TRAP(SYS_ICC_ASGI1R_EL1, CGT_HCR_IMO_FMO),
+ SR_TRAP(SYS_ICC_SGI1R_EL1, CGT_HCR_IMO_FMO),
+ SR_RANGE_TRAP(sys_reg(3, 0, 11, 0, 0),
+ sys_reg(3, 0, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 1, 11, 0, 0),
+ sys_reg(3, 1, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 2, 11, 0, 0),
+ sys_reg(3, 2, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 3, 11, 0, 0),
+ sys_reg(3, 3, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 4, 11, 0, 0),
+ sys_reg(3, 4, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 5, 11, 0, 0),
+ sys_reg(3, 5, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 6, 11, 0, 0),
+ sys_reg(3, 6, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 7, 11, 0, 0),
+ sys_reg(3, 7, 11, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 0, 15, 0, 0),
+ sys_reg(3, 0, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 1, 15, 0, 0),
+ sys_reg(3, 1, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 2, 15, 0, 0),
+ sys_reg(3, 2, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 3, 15, 0, 0),
+ sys_reg(3, 3, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 4, 15, 0, 0),
+ sys_reg(3, 4, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 5, 15, 0, 0),
+ sys_reg(3, 5, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 6, 15, 0, 0),
+ sys_reg(3, 6, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_RANGE_TRAP(sys_reg(3, 7, 15, 0, 0),
+ sys_reg(3, 7, 15, 15, 7), CGT_HCR_TIDCP),
+ SR_TRAP(SYS_ACTLR_EL1, CGT_HCR_TACR),
+ SR_TRAP(SYS_DC_ISW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CISW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_IGSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_IGDSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CGSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CGDSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CIGSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CIGDSW, CGT_HCR_TSW),
+ SR_TRAP(SYS_DC_CIVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CVAP, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CVADP, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_IVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CIGVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CIGDVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_IGVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_IGDVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGDVAC, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGVAP, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGDVAP, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGVADP, CGT_HCR_TPC),
+ SR_TRAP(SYS_DC_CGDVADP, CGT_HCR_TPC),
+ SR_TRAP(SYS_IC_IVAU, CGT_HCR_TPU_TOCU),
+ SR_TRAP(SYS_IC_IALLU, CGT_HCR_TPU_TOCU),
+ SR_TRAP(SYS_IC_IALLUIS, CGT_HCR_TPU_TICAB),
+ SR_TRAP(SYS_DC_CVAU, CGT_HCR_TPU_TOCU),
+ SR_TRAP(OP_TLBI_RVAE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAAE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVALE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAALE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VMALLE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_ASIDE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAAE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VALE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAALE1, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAAE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVALE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAALE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VMALLE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_ASIDE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAAE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VALE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_VAALE1NXS, CGT_HCR_TTLB),
+ SR_TRAP(OP_TLBI_RVAE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVAAE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVALE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVAALE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VMALLE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_ASIDE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAAE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VALE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAALE1IS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVAE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVAAE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVALE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_RVAALE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VMALLE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_ASIDE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAAE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VALE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VAALE1ISNXS, CGT_HCR_TTLB_TTLBIS),
+ SR_TRAP(OP_TLBI_VMALLE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_ASIDE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAAE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VALE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAALE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAAE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVALE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAALE1OS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VMALLE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_ASIDE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAAE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_VAALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAAE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(OP_TLBI_RVAALE1OSNXS, CGT_HCR_TTLB_TTLBOS),
+ SR_TRAP(SYS_SCTLR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_TTBR0_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_TTBR1_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_TCR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_ESR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_FAR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_AFSR0_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_AFSR1_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_MAIR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_AMAIR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_CONTEXTIDR_EL1, CGT_HCR_TVM_TRVM),
+ SR_TRAP(SYS_DC_ZVA, CGT_HCR_TDZ),
+ SR_TRAP(SYS_DC_GVA, CGT_HCR_TDZ),
+ SR_TRAP(SYS_DC_GZVA, CGT_HCR_TDZ),
+ SR_TRAP(SYS_LORSA_EL1, CGT_HCR_TLOR),
+ SR_TRAP(SYS_LOREA_EL1, CGT_HCR_TLOR),
+ SR_TRAP(SYS_LORN_EL1, CGT_HCR_TLOR),
+ SR_TRAP(SYS_LORC_EL1, CGT_HCR_TLOR),
+ SR_TRAP(SYS_LORID_EL1, CGT_HCR_TLOR),
+ SR_TRAP(SYS_ERRIDR_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERRSELR_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXADDR_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXCTLR_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXFR_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXMISC0_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXMISC1_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXMISC2_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXMISC3_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_ERXSTATUS_EL1, CGT_HCR_TERR),
+ SR_TRAP(SYS_APIAKEYLO_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APIAKEYHI_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APIBKEYLO_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APIBKEYHI_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APDAKEYLO_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APDAKEYHI_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APDBKEYLO_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APDBKEYHI_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APGAKEYLO_EL1, CGT_HCR_APK),
+ SR_TRAP(SYS_APGAKEYHI_EL1, CGT_HCR_APK),
+ /* All _EL2 registers */
+ SR_RANGE_TRAP(sys_reg(3, 4, 0, 0, 0),
+ sys_reg(3, 4, 3, 15, 7), CGT_HCR_NV),
+ /* Skip the SP_EL1 encoding... */
+ SR_TRAP(SYS_SPSR_EL2, CGT_HCR_NV),
+ SR_TRAP(SYS_ELR_EL2, CGT_HCR_NV),
+ SR_RANGE_TRAP(sys_reg(3, 4, 4, 1, 1),
+ sys_reg(3, 4, 10, 15, 7), CGT_HCR_NV),
+ SR_RANGE_TRAP(sys_reg(3, 4, 12, 0, 0),
+ sys_reg(3, 4, 14, 15, 7), CGT_HCR_NV),
+ /* All _EL02, _EL12 registers */
+ SR_RANGE_TRAP(sys_reg(3, 5, 0, 0, 0),
+ sys_reg(3, 5, 10, 15, 7), CGT_HCR_NV),
+ SR_RANGE_TRAP(sys_reg(3, 5, 12, 0, 0),
+ sys_reg(3, 5, 14, 15, 7), CGT_HCR_NV),
+ SR_TRAP(OP_AT_S1E2R, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S1E2W, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S12E1R, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S12E1W, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S12E0R, CGT_HCR_NV),
+ SR_TRAP(OP_AT_S12E0W, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1NXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1IS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2ISNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1ISNXS,CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2OS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE2OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VAE2OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_ALLE1OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VALE2OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_VMALLS12E1OSNXS,CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2E1OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2E1OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_IPAS2LE1OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RIPAS2LE1OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVAE2OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_TLBI_RVALE2OSNXS, CGT_HCR_NV),
+ SR_TRAP(OP_CPP_RCTX, CGT_HCR_NV),
+ SR_TRAP(OP_DVP_RCTX, CGT_HCR_NV),
+ SR_TRAP(OP_CFP_RCTX, CGT_HCR_NV),
+ SR_TRAP(SYS_SP_EL1, CGT_HCR_NV_nNV2),
+ SR_TRAP(SYS_VBAR_EL1, CGT_HCR_NV1_nNV2),
+ SR_TRAP(SYS_ELR_EL1, CGT_HCR_NV1_nNV2),
+ SR_TRAP(SYS_SPSR_EL1, CGT_HCR_NV1_nNV2),
+ SR_TRAP(SYS_SCXTNUM_EL1, CGT_HCR_NV1_nNV2_ENSCXT),
+ SR_TRAP(SYS_SCXTNUM_EL0, CGT_HCR_ENSCXT),
+ SR_TRAP(OP_AT_S1E1R, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E1W, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E0R, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E0W, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E1RP, CGT_HCR_AT),
+ SR_TRAP(OP_AT_S1E1WP, CGT_HCR_AT),
+ SR_TRAP(SYS_ERXPFGF_EL1, CGT_HCR_nFIEN),
+ SR_TRAP(SYS_ERXPFGCTL_EL1, CGT_HCR_nFIEN),
+ SR_TRAP(SYS_ERXPFGCDN_EL1, CGT_HCR_nFIEN),
+ SR_TRAP(SYS_PMCR_EL0, CGT_MDCR_TPM_TPMCR),
+ SR_TRAP(SYS_PMCNTENSET_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMCNTENCLR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMOVSSET_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMOVSCLR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMCEID0_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMCEID1_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMXEVTYPER_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMSWINC_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMSELR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMXEVCNTR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMCCNTR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMUSERENR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMINTENSET_EL1, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMINTENCLR_EL1, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMMIR_EL1, CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(0), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(1), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(2), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(3), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(4), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(5), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(6), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(7), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(8), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(9), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(10), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(11), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(12), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(13), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(14), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(15), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(16), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(17), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(18), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(19), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(20), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(21), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(22), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(23), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(24), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(25), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(26), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(27), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(28), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(29), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVCNTRn_EL0(30), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(0), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(1), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(2), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(3), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(4), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(5), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(6), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(7), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(8), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(9), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(10), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(11), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(12), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(13), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(14), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(15), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(16), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(17), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(18), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(19), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(20), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(21), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(22), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(23), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(24), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(25), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(26), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(27), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(28), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(29), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMEVTYPERn_EL0(30), CGT_MDCR_TPM),
+ SR_TRAP(SYS_PMCCFILTR_EL0, CGT_MDCR_TPM),
+ SR_TRAP(SYS_MDCCSR_EL0, CGT_MDCR_TDCC_TDE_TDA),
+ SR_TRAP(SYS_MDCCINT_EL1, CGT_MDCR_TDCC_TDE_TDA),
+ SR_TRAP(SYS_OSDTRRX_EL1, CGT_MDCR_TDCC_TDE_TDA),
+ SR_TRAP(SYS_OSDTRTX_EL1, CGT_MDCR_TDCC_TDE_TDA),
+ SR_TRAP(SYS_DBGDTR_EL0, CGT_MDCR_TDCC_TDE_TDA),
+ /*
+ * Also covers DBGDTRRX_EL0, which has the same encoding as
+ * SYS_DBGDTRTX_EL0...
+ */
+ SR_TRAP(SYS_DBGDTRTX_EL0, CGT_MDCR_TDCC_TDE_TDA),
+ SR_TRAP(SYS_MDSCR_EL1, CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_OSECCR_EL1, CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(0), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(1), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(2), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(3), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(4), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(5), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(6), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(7), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(8), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(9), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(10), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(11), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(12), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(13), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(14), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBVRn_EL1(15), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(0), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(1), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(2), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(3), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(4), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(5), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(6), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(7), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(8), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(9), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(10), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(11), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(12), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(13), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(14), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGBCRn_EL1(15), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(0), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(1), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(2), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(3), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(4), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(5), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(6), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(7), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(8), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(9), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(10), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(11), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(12), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(13), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(14), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWVRn_EL1(15), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(0), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(1), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(2), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(3), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(4), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(5), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(6), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(7), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(8), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(9), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(10), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(11), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(12), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(13), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGWCRn_EL1(14), CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGCLAIMSET_EL1, CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGCLAIMCLR_EL1, CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_DBGAUTHSTATUS_EL1, CGT_MDCR_TDE_TDA),
+ SR_TRAP(SYS_OSLAR_EL1, CGT_MDCR_TDE_TDOSA),
+ SR_TRAP(SYS_OSLSR_EL1, CGT_MDCR_TDE_TDOSA),
+ SR_TRAP(SYS_OSDLR_EL1, CGT_MDCR_TDE_TDOSA),
+ SR_TRAP(SYS_DBGPRCR_EL1, CGT_MDCR_TDE_TDOSA),
+ SR_TRAP(SYS_MDRAR_EL1, CGT_MDCR_TDE_TDRA),
+ SR_TRAP(SYS_PMBLIMITR_EL1, CGT_MDCR_E2PB),
+ SR_TRAP(SYS_PMBPTR_EL1, CGT_MDCR_E2PB),
+ SR_TRAP(SYS_PMBSR_EL1, CGT_MDCR_E2PB),
+ SR_TRAP(SYS_PMSCR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSEVFR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSFCR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSICR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSIDR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSIRR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSLATFR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_PMSNEVFR_EL1, CGT_MDCR_TPMS),
+ SR_TRAP(SYS_TRFCR_EL1, CGT_MDCR_TTRF),
+ SR_TRAP(SYS_TRBBASER_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_TRBLIMITR_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_TRBMAR_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_TRBPTR_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_TRBSR_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_TRBTRG_EL1, CGT_MDCR_E2TB),
+ SR_TRAP(SYS_CNTP_TVAL_EL0, CGT_CNTHCTL_EL1PTEN),
+ SR_TRAP(SYS_CNTP_CVAL_EL0, CGT_CNTHCTL_EL1PTEN),
+ SR_TRAP(SYS_CNTP_CTL_EL0, CGT_CNTHCTL_EL1PTEN),
+ SR_TRAP(SYS_CNTPCT_EL0, CGT_CNTHCTL_EL1PCTEN),
+ SR_TRAP(SYS_CNTPCTSS_EL0, CGT_CNTHCTL_EL1PCTEN),
+};
+
+static DEFINE_XARRAY(sr_forward_xa);
+
+enum fgt_group_id {
+ __NO_FGT_GROUP__,
+ HFGxTR_GROUP,
+ HDFGRTR_GROUP,
+ HDFGWTR_GROUP,
+ HFGITR_GROUP,
+
+ /* Must be last */
+ __NR_FGT_GROUP_IDS__
+};
+
+enum fg_filter_id {
+ __NO_FGF__,
+ HCRX_FGTnXS,
+
+ /* Must be last */
+ __NR_FG_FILTER_IDS__
+};
+
+#define SR_FGF(sr, g, b, p, f) \
+ { \
+ .encoding = sr, \
+ .end = sr, \
+ .tc = { \
+ .fgt = g ## _GROUP, \
+ .bit = g ## _EL2_ ## b ## _SHIFT, \
+ .pol = p, \
+ .fgf = f, \
+ }, \
+ .line = __LINE__, \
+ }
+
+#define SR_FGT(sr, g, b, p) SR_FGF(sr, g, b, p, __NO_FGF__)
+
+static const struct encoding_to_trap_config encoding_to_fgt[] __initconst = {
+ /* HFGRTR_EL2, HFGWTR_EL2 */
+ SR_FGT(SYS_TPIDR2_EL0, HFGxTR, nTPIDR2_EL0, 0),
+ SR_FGT(SYS_SMPRI_EL1, HFGxTR, nSMPRI_EL1, 0),
+ SR_FGT(SYS_ACCDATA_EL1, HFGxTR, nACCDATA_EL1, 0),
+ SR_FGT(SYS_ERXADDR_EL1, HFGxTR, ERXADDR_EL1, 1),
+ SR_FGT(SYS_ERXPFGCDN_EL1, HFGxTR, ERXPFGCDN_EL1, 1),
+ SR_FGT(SYS_ERXPFGCTL_EL1, HFGxTR, ERXPFGCTL_EL1, 1),
+ SR_FGT(SYS_ERXPFGF_EL1, HFGxTR, ERXPFGF_EL1, 1),
+ SR_FGT(SYS_ERXMISC0_EL1, HFGxTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC1_EL1, HFGxTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC2_EL1, HFGxTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXMISC3_EL1, HFGxTR, ERXMISCn_EL1, 1),
+ SR_FGT(SYS_ERXSTATUS_EL1, HFGxTR, ERXSTATUS_EL1, 1),
+ SR_FGT(SYS_ERXCTLR_EL1, HFGxTR, ERXCTLR_EL1, 1),
+ SR_FGT(SYS_ERXFR_EL1, HFGxTR, ERXFR_EL1, 1),
+ SR_FGT(SYS_ERRSELR_EL1, HFGxTR, ERRSELR_EL1, 1),
+ SR_FGT(SYS_ERRIDR_EL1, HFGxTR, ERRIDR_EL1, 1),
+ SR_FGT(SYS_ICC_IGRPEN0_EL1, HFGxTR, ICC_IGRPENn_EL1, 1),
+ SR_FGT(SYS_ICC_IGRPEN1_EL1, HFGxTR, ICC_IGRPENn_EL1, 1),
+ SR_FGT(SYS_VBAR_EL1, HFGxTR, VBAR_EL1, 1),
+ SR_FGT(SYS_TTBR1_EL1, HFGxTR, TTBR1_EL1, 1),
+ SR_FGT(SYS_TTBR0_EL1, HFGxTR, TTBR0_EL1, 1),
+ SR_FGT(SYS_TPIDR_EL0, HFGxTR, TPIDR_EL0, 1),
+ SR_FGT(SYS_TPIDRRO_EL0, HFGxTR, TPIDRRO_EL0, 1),
+ SR_FGT(SYS_TPIDR_EL1, HFGxTR, TPIDR_EL1, 1),
+ SR_FGT(SYS_TCR_EL1, HFGxTR, TCR_EL1, 1),
+ SR_FGT(SYS_SCXTNUM_EL0, HFGxTR, SCXTNUM_EL0, 1),
+ SR_FGT(SYS_SCXTNUM_EL1, HFGxTR, SCXTNUM_EL1, 1),
+ SR_FGT(SYS_SCTLR_EL1, HFGxTR, SCTLR_EL1, 1),
+ SR_FGT(SYS_REVIDR_EL1, HFGxTR, REVIDR_EL1, 1),
+ SR_FGT(SYS_PAR_EL1, HFGxTR, PAR_EL1, 1),
+ SR_FGT(SYS_MPIDR_EL1, HFGxTR, MPIDR_EL1, 1),
+ SR_FGT(SYS_MIDR_EL1, HFGxTR, MIDR_EL1, 1),
+ SR_FGT(SYS_MAIR_EL1, HFGxTR, MAIR_EL1, 1),
+ SR_FGT(SYS_LORSA_EL1, HFGxTR, LORSA_EL1, 1),
+ SR_FGT(SYS_LORN_EL1, HFGxTR, LORN_EL1, 1),
+ SR_FGT(SYS_LORID_EL1, HFGxTR, LORID_EL1, 1),
+ SR_FGT(SYS_LOREA_EL1, HFGxTR, LOREA_EL1, 1),
+ SR_FGT(SYS_LORC_EL1, HFGxTR, LORC_EL1, 1),
+ SR_FGT(SYS_ISR_EL1, HFGxTR, ISR_EL1, 1),
+ SR_FGT(SYS_FAR_EL1, HFGxTR, FAR_EL1, 1),
+ SR_FGT(SYS_ESR_EL1, HFGxTR, ESR_EL1, 1),
+ SR_FGT(SYS_DCZID_EL0, HFGxTR, DCZID_EL0, 1),
+ SR_FGT(SYS_CTR_EL0, HFGxTR, CTR_EL0, 1),
+ SR_FGT(SYS_CSSELR_EL1, HFGxTR, CSSELR_EL1, 1),
+ SR_FGT(SYS_CPACR_EL1, HFGxTR, CPACR_EL1, 1),
+ SR_FGT(SYS_CONTEXTIDR_EL1, HFGxTR, CONTEXTIDR_EL1, 1),
+ SR_FGT(SYS_CLIDR_EL1, HFGxTR, CLIDR_EL1, 1),
+ SR_FGT(SYS_CCSIDR_EL1, HFGxTR, CCSIDR_EL1, 1),
+ SR_FGT(SYS_APIBKEYLO_EL1, HFGxTR, APIBKey, 1),
+ SR_FGT(SYS_APIBKEYHI_EL1, HFGxTR, APIBKey, 1),
+ SR_FGT(SYS_APIAKEYLO_EL1, HFGxTR, APIAKey, 1),
+ SR_FGT(SYS_APIAKEYHI_EL1, HFGxTR, APIAKey, 1),
+ SR_FGT(SYS_APGAKEYLO_EL1, HFGxTR, APGAKey, 1),
+ SR_FGT(SYS_APGAKEYHI_EL1, HFGxTR, APGAKey, 1),
+ SR_FGT(SYS_APDBKEYLO_EL1, HFGxTR, APDBKey, 1),
+ SR_FGT(SYS_APDBKEYHI_EL1, HFGxTR, APDBKey, 1),
+ SR_FGT(SYS_APDAKEYLO_EL1, HFGxTR, APDAKey, 1),
+ SR_FGT(SYS_APDAKEYHI_EL1, HFGxTR, APDAKey, 1),
+ SR_FGT(SYS_AMAIR_EL1, HFGxTR, AMAIR_EL1, 1),
+ SR_FGT(SYS_AIDR_EL1, HFGxTR, AIDR_EL1, 1),
+ SR_FGT(SYS_AFSR1_EL1, HFGxTR, AFSR1_EL1, 1),
+ SR_FGT(SYS_AFSR0_EL1, HFGxTR, AFSR0_EL1, 1),
+ /* HFGITR_EL2 */
+ SR_FGT(OP_BRB_IALL, HFGITR, nBRBIALL, 0),
+ SR_FGT(OP_BRB_INJ, HFGITR, nBRBINJ, 0),
+ SR_FGT(SYS_DC_CVAC, HFGITR, DCCVAC, 1),
+ SR_FGT(SYS_DC_CGVAC, HFGITR, DCCVAC, 1),
+ SR_FGT(SYS_DC_CGDVAC, HFGITR, DCCVAC, 1),
+ SR_FGT(OP_CPP_RCTX, HFGITR, CPPRCTX, 1),
+ SR_FGT(OP_DVP_RCTX, HFGITR, DVPRCTX, 1),
+ SR_FGT(OP_CFP_RCTX, HFGITR, CFPRCTX, 1),
+ SR_FGT(OP_TLBI_VAALE1, HFGITR, TLBIVAALE1, 1),
+ SR_FGT(OP_TLBI_VALE1, HFGITR, TLBIVALE1, 1),
+ SR_FGT(OP_TLBI_VAAE1, HFGITR, TLBIVAAE1, 1),
+ SR_FGT(OP_TLBI_ASIDE1, HFGITR, TLBIASIDE1, 1),
+ SR_FGT(OP_TLBI_VAE1, HFGITR, TLBIVAE1, 1),
+ SR_FGT(OP_TLBI_VMALLE1, HFGITR, TLBIVMALLE1, 1),
+ SR_FGT(OP_TLBI_RVAALE1, HFGITR, TLBIRVAALE1, 1),
+ SR_FGT(OP_TLBI_RVALE1, HFGITR, TLBIRVALE1, 1),
+ SR_FGT(OP_TLBI_RVAAE1, HFGITR, TLBIRVAAE1, 1),
+ SR_FGT(OP_TLBI_RVAE1, HFGITR, TLBIRVAE1, 1),
+ SR_FGT(OP_TLBI_RVAALE1IS, HFGITR, TLBIRVAALE1IS, 1),
+ SR_FGT(OP_TLBI_RVALE1IS, HFGITR, TLBIRVALE1IS, 1),
+ SR_FGT(OP_TLBI_RVAAE1IS, HFGITR, TLBIRVAAE1IS, 1),
+ SR_FGT(OP_TLBI_RVAE1IS, HFGITR, TLBIRVAE1IS, 1),
+ SR_FGT(OP_TLBI_VAALE1IS, HFGITR, TLBIVAALE1IS, 1),
+ SR_FGT(OP_TLBI_VALE1IS, HFGITR, TLBIVALE1IS, 1),
+ SR_FGT(OP_TLBI_VAAE1IS, HFGITR, TLBIVAAE1IS, 1),
+ SR_FGT(OP_TLBI_ASIDE1IS, HFGITR, TLBIASIDE1IS, 1),
+ SR_FGT(OP_TLBI_VAE1IS, HFGITR, TLBIVAE1IS, 1),
+ SR_FGT(OP_TLBI_VMALLE1IS, HFGITR, TLBIVMALLE1IS, 1),
+ SR_FGT(OP_TLBI_RVAALE1OS, HFGITR, TLBIRVAALE1OS, 1),
+ SR_FGT(OP_TLBI_RVALE1OS, HFGITR, TLBIRVALE1OS, 1),
+ SR_FGT(OP_TLBI_RVAAE1OS, HFGITR, TLBIRVAAE1OS, 1),
+ SR_FGT(OP_TLBI_RVAE1OS, HFGITR, TLBIRVAE1OS, 1),
+ SR_FGT(OP_TLBI_VAALE1OS, HFGITR, TLBIVAALE1OS, 1),
+ SR_FGT(OP_TLBI_VALE1OS, HFGITR, TLBIVALE1OS, 1),
+ SR_FGT(OP_TLBI_VAAE1OS, HFGITR, TLBIVAAE1OS, 1),
+ SR_FGT(OP_TLBI_ASIDE1OS, HFGITR, TLBIASIDE1OS, 1),
+ SR_FGT(OP_TLBI_VAE1OS, HFGITR, TLBIVAE1OS, 1),
+ SR_FGT(OP_TLBI_VMALLE1OS, HFGITR, TLBIVMALLE1OS, 1),
+ /* nXS variants must be checked against HCRX_EL2.FGTnXS */
+ SR_FGF(OP_TLBI_VAALE1NXS, HFGITR, TLBIVAALE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VALE1NXS, HFGITR, TLBIVALE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAAE1NXS, HFGITR, TLBIVAAE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_ASIDE1NXS, HFGITR, TLBIASIDE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAE1NXS, HFGITR, TLBIVAE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VMALLE1NXS, HFGITR, TLBIVMALLE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAALE1NXS, HFGITR, TLBIRVAALE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVALE1NXS, HFGITR, TLBIRVALE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAAE1NXS, HFGITR, TLBIRVAAE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAE1NXS, HFGITR, TLBIRVAE1, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAALE1ISNXS, HFGITR, TLBIRVAALE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVALE1ISNXS, HFGITR, TLBIRVALE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAAE1ISNXS, HFGITR, TLBIRVAAE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAE1ISNXS, HFGITR, TLBIRVAE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAALE1ISNXS, HFGITR, TLBIVAALE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VALE1ISNXS, HFGITR, TLBIVALE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAAE1ISNXS, HFGITR, TLBIVAAE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_ASIDE1ISNXS, HFGITR, TLBIASIDE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAE1ISNXS, HFGITR, TLBIVAE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VMALLE1ISNXS, HFGITR, TLBIVMALLE1IS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAALE1OSNXS, HFGITR, TLBIRVAALE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVALE1OSNXS, HFGITR, TLBIRVALE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAAE1OSNXS, HFGITR, TLBIRVAAE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_RVAE1OSNXS, HFGITR, TLBIRVAE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAALE1OSNXS, HFGITR, TLBIVAALE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VALE1OSNXS, HFGITR, TLBIVALE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAAE1OSNXS, HFGITR, TLBIVAAE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_ASIDE1OSNXS, HFGITR, TLBIASIDE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VAE1OSNXS, HFGITR, TLBIVAE1OS, 1, HCRX_FGTnXS),
+ SR_FGF(OP_TLBI_VMALLE1OSNXS, HFGITR, TLBIVMALLE1OS, 1, HCRX_FGTnXS),
+ SR_FGT(OP_AT_S1E1WP, HFGITR, ATS1E1WP, 1),
+ SR_FGT(OP_AT_S1E1RP, HFGITR, ATS1E1RP, 1),
+ SR_FGT(OP_AT_S1E0W, HFGITR, ATS1E0W, 1),
+ SR_FGT(OP_AT_S1E0R, HFGITR, ATS1E0R, 1),
+ SR_FGT(OP_AT_S1E1W, HFGITR, ATS1E1W, 1),
+ SR_FGT(OP_AT_S1E1R, HFGITR, ATS1E1R, 1),
+ SR_FGT(SYS_DC_ZVA, HFGITR, DCZVA, 1),
+ SR_FGT(SYS_DC_GVA, HFGITR, DCZVA, 1),
+ SR_FGT(SYS_DC_GZVA, HFGITR, DCZVA, 1),
+ SR_FGT(SYS_DC_CIVAC, HFGITR, DCCIVAC, 1),
+ SR_FGT(SYS_DC_CIGVAC, HFGITR, DCCIVAC, 1),
+ SR_FGT(SYS_DC_CIGDVAC, HFGITR, DCCIVAC, 1),
+ SR_FGT(SYS_DC_CVADP, HFGITR, DCCVADP, 1),
+ SR_FGT(SYS_DC_CGVADP, HFGITR, DCCVADP, 1),
+ SR_FGT(SYS_DC_CGDVADP, HFGITR, DCCVADP, 1),
+ SR_FGT(SYS_DC_CVAP, HFGITR, DCCVAP, 1),
+ SR_FGT(SYS_DC_CGVAP, HFGITR, DCCVAP, 1),
+ SR_FGT(SYS_DC_CGDVAP, HFGITR, DCCVAP, 1),
+ SR_FGT(SYS_DC_CVAU, HFGITR, DCCVAU, 1),
+ SR_FGT(SYS_DC_CISW, HFGITR, DCCISW, 1),
+ SR_FGT(SYS_DC_CIGSW, HFGITR, DCCISW, 1),
+ SR_FGT(SYS_DC_CIGDSW, HFGITR, DCCISW, 1),
+ SR_FGT(SYS_DC_CSW, HFGITR, DCCSW, 1),
+ SR_FGT(SYS_DC_CGSW, HFGITR, DCCSW, 1),
+ SR_FGT(SYS_DC_CGDSW, HFGITR, DCCSW, 1),
+ SR_FGT(SYS_DC_ISW, HFGITR, DCISW, 1),
+ SR_FGT(SYS_DC_IGSW, HFGITR, DCISW, 1),
+ SR_FGT(SYS_DC_IGDSW, HFGITR, DCISW, 1),
+ SR_FGT(SYS_DC_IVAC, HFGITR, DCIVAC, 1),
+ SR_FGT(SYS_DC_IGVAC, HFGITR, DCIVAC, 1),
+ SR_FGT(SYS_DC_IGDVAC, HFGITR, DCIVAC, 1),
+ SR_FGT(SYS_IC_IVAU, HFGITR, ICIVAU, 1),
+ SR_FGT(SYS_IC_IALLU, HFGITR, ICIALLU, 1),
+ SR_FGT(SYS_IC_IALLUIS, HFGITR, ICIALLUIS, 1),
+ /* HDFGRTR_EL2 */
+ SR_FGT(SYS_PMBIDR_EL1, HDFGRTR, PMBIDR_EL1, 1),
+ SR_FGT(SYS_PMSNEVFR_EL1, HDFGRTR, nPMSNEVFR_EL1, 0),
+ SR_FGT(SYS_BRBINF_EL1(0), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(1), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(2), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(3), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(4), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(5), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(6), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(7), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(8), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(9), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(10), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(11), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(12), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(13), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(14), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(15), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(16), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(17), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(18), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(19), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(20), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(21), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(22), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(23), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(24), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(25), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(26), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(27), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(28), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(29), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(30), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINF_EL1(31), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBINFINJ_EL1, HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(0), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(1), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(2), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(3), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(4), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(5), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(6), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(7), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(8), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(9), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(10), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(11), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(12), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(13), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(14), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(15), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(16), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(17), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(18), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(19), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(20), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(21), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(22), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(23), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(24), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(25), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(26), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(27), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(28), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(29), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(30), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRC_EL1(31), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBSRCINJ_EL1, HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(0), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(1), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(2), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(3), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(4), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(5), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(6), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(7), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(8), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(9), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(10), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(11), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(12), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(13), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(14), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(15), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(16), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(17), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(18), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(19), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(20), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(21), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(22), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(23), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(24), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(25), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(26), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(27), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(28), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(29), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(30), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGT_EL1(31), HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTGTINJ_EL1, HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBTS_EL1, HDFGRTR, nBRBDATA, 0),
+ SR_FGT(SYS_BRBCR_EL1, HDFGRTR, nBRBCTL, 0),
+ SR_FGT(SYS_BRBFCR_EL1, HDFGRTR, nBRBCTL, 0),
+ SR_FGT(SYS_BRBIDR0_EL1, HDFGRTR, nBRBIDR, 0),
+ SR_FGT(SYS_PMCEID0_EL0, HDFGRTR, PMCEIDn_EL0, 1),
+ SR_FGT(SYS_PMCEID1_EL0, HDFGRTR, PMCEIDn_EL0, 1),
+ SR_FGT(SYS_PMUSERENR_EL0, HDFGRTR, PMUSERENR_EL0, 1),
+ SR_FGT(SYS_TRBTRG_EL1, HDFGRTR, TRBTRG_EL1, 1),
+ SR_FGT(SYS_TRBSR_EL1, HDFGRTR, TRBSR_EL1, 1),
+ SR_FGT(SYS_TRBPTR_EL1, HDFGRTR, TRBPTR_EL1, 1),
+ SR_FGT(SYS_TRBMAR_EL1, HDFGRTR, TRBMAR_EL1, 1),
+ SR_FGT(SYS_TRBLIMITR_EL1, HDFGRTR, TRBLIMITR_EL1, 1),
+ SR_FGT(SYS_TRBIDR_EL1, HDFGRTR, TRBIDR_EL1, 1),
+ SR_FGT(SYS_TRBBASER_EL1, HDFGRTR, TRBBASER_EL1, 1),
+ SR_FGT(SYS_TRCVICTLR, HDFGRTR, TRCVICTLR, 1),
+ SR_FGT(SYS_TRCSTATR, HDFGRTR, TRCSTATR, 1),
+ SR_FGT(SYS_TRCSSCSR(0), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(1), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(2), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(3), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(4), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(5), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(6), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSSCSR(7), HDFGRTR, TRCSSCSRn, 1),
+ SR_FGT(SYS_TRCSEQSTR, HDFGRTR, TRCSEQSTR, 1),
+ SR_FGT(SYS_TRCPRGCTLR, HDFGRTR, TRCPRGCTLR, 1),
+ SR_FGT(SYS_TRCOSLSR, HDFGRTR, TRCOSLSR, 1),
+ SR_FGT(SYS_TRCIMSPEC(0), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(1), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(2), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(3), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(4), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(5), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(6), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCIMSPEC(7), HDFGRTR, TRCIMSPECn, 1),
+ SR_FGT(SYS_TRCDEVARCH, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCDEVID, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR0, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR1, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR2, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR3, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR4, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR5, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR6, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR7, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR8, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR9, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR10, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR11, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR12, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCIDR13, HDFGRTR, TRCID, 1),
+ SR_FGT(SYS_TRCCNTVR(0), HDFGRTR, TRCCNTVRn, 1),
+ SR_FGT(SYS_TRCCNTVR(1), HDFGRTR, TRCCNTVRn, 1),
+ SR_FGT(SYS_TRCCNTVR(2), HDFGRTR, TRCCNTVRn, 1),
+ SR_FGT(SYS_TRCCNTVR(3), HDFGRTR, TRCCNTVRn, 1),
+ SR_FGT(SYS_TRCCLAIMCLR, HDFGRTR, TRCCLAIM, 1),
+ SR_FGT(SYS_TRCCLAIMSET, HDFGRTR, TRCCLAIM, 1),
+ SR_FGT(SYS_TRCAUXCTLR, HDFGRTR, TRCAUXCTLR, 1),
+ SR_FGT(SYS_TRCAUTHSTATUS, HDFGRTR, TRCAUTHSTATUS, 1),
+ SR_FGT(SYS_TRCACATR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(8), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(9), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(10), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(11), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(12), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(13), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(14), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACATR(15), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(8), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(9), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(10), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(11), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(12), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(13), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(14), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCACVR(15), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCBBCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCCCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCCTLR0, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCCTLR1, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCIDCVR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTCTLR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTCTLR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTCTLR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTCTLR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTRLDVR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTRLDVR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTRLDVR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCNTRLDVR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCCONFIGR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEVENTCTL0R, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEVENTCTL1R, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEXTINSELR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEXTINSELR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEXTINSELR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCEXTINSELR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCQCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(8), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(9), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(10), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(11), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(12), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(13), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(14), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(15), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(16), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(17), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(18), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(19), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(20), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(21), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(22), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(23), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(24), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(25), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(26), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(27), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(28), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(29), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(30), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSCTLR(31), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCRSR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSEQEVR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSEQEVR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSEQEVR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSEQRSTEVR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSCCR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSSPCICR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSTALLCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCSYNCPR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCTRACEIDR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCTSCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVIIECTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVIPCSSCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVISSCTLR, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCCTLR0, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCCTLR1, HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(0), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(1), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(2), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(3), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(4), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(5), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(6), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_TRCVMIDCVR(7), HDFGRTR, TRC, 1),
+ SR_FGT(SYS_PMSLATFR_EL1, HDFGRTR, PMSLATFR_EL1, 1),
+ SR_FGT(SYS_PMSIRR_EL1, HDFGRTR, PMSIRR_EL1, 1),
+ SR_FGT(SYS_PMSIDR_EL1, HDFGRTR, PMSIDR_EL1, 1),
+ SR_FGT(SYS_PMSICR_EL1, HDFGRTR, PMSICR_EL1, 1),
+ SR_FGT(SYS_PMSFCR_EL1, HDFGRTR, PMSFCR_EL1, 1),
+ SR_FGT(SYS_PMSEVFR_EL1, HDFGRTR, PMSEVFR_EL1, 1),
+ SR_FGT(SYS_PMSCR_EL1, HDFGRTR, PMSCR_EL1, 1),
+ SR_FGT(SYS_PMBSR_EL1, HDFGRTR, PMBSR_EL1, 1),
+ SR_FGT(SYS_PMBPTR_EL1, HDFGRTR, PMBPTR_EL1, 1),
+ SR_FGT(SYS_PMBLIMITR_EL1, HDFGRTR, PMBLIMITR_EL1, 1),
+ SR_FGT(SYS_PMMIR_EL1, HDFGRTR, PMMIR_EL1, 1),
+ SR_FGT(SYS_PMSELR_EL0, HDFGRTR, PMSELR_EL0, 1),
+ SR_FGT(SYS_PMOVSCLR_EL0, HDFGRTR, PMOVS, 1),
+ SR_FGT(SYS_PMOVSSET_EL0, HDFGRTR, PMOVS, 1),
+ SR_FGT(SYS_PMINTENCLR_EL1, HDFGRTR, PMINTEN, 1),
+ SR_FGT(SYS_PMINTENSET_EL1, HDFGRTR, PMINTEN, 1),
+ SR_FGT(SYS_PMCNTENCLR_EL0, HDFGRTR, PMCNTEN, 1),
+ SR_FGT(SYS_PMCNTENSET_EL0, HDFGRTR, PMCNTEN, 1),
+ SR_FGT(SYS_PMCCNTR_EL0, HDFGRTR, PMCCNTR_EL0, 1),
+ SR_FGT(SYS_PMCCFILTR_EL0, HDFGRTR, PMCCFILTR_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(0), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(1), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(2), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(3), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(4), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(5), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(6), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(7), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(8), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(9), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(10), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(11), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(12), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(13), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(14), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(15), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(16), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(17), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(18), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(19), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(20), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(21), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(22), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(23), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(24), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(25), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(26), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(27), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(28), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(29), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVTYPERn_EL0(30), HDFGRTR, PMEVTYPERn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(0), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(1), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(2), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(3), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(4), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(5), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(6), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(7), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(8), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(9), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(10), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(11), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(12), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(13), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(14), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(15), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(16), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(17), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(18), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(19), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(20), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(21), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(22), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(23), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(24), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(25), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(26), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(27), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(28), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(29), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_PMEVCNTRn_EL0(30), HDFGRTR, PMEVCNTRn_EL0, 1),
+ SR_FGT(SYS_OSDLR_EL1, HDFGRTR, OSDLR_EL1, 1),
+ SR_FGT(SYS_OSECCR_EL1, HDFGRTR, OSECCR_EL1, 1),
+ SR_FGT(SYS_OSLSR_EL1, HDFGRTR, OSLSR_EL1, 1),
+ SR_FGT(SYS_DBGPRCR_EL1, HDFGRTR, DBGPRCR_EL1, 1),
+ SR_FGT(SYS_DBGAUTHSTATUS_EL1, HDFGRTR, DBGAUTHSTATUS_EL1, 1),
+ SR_FGT(SYS_DBGCLAIMSET_EL1, HDFGRTR, DBGCLAIM, 1),
+ SR_FGT(SYS_DBGCLAIMCLR_EL1, HDFGRTR, DBGCLAIM, 1),
+ SR_FGT(SYS_MDSCR_EL1, HDFGRTR, MDSCR_EL1, 1),
+ /*
+ * The trap bits capture *64* debug registers per bit, but the
+ * ARM ARM only describes the encoding for the first 16, and
+ * we don't really support more than that anyway.
+ */
+ SR_FGT(SYS_DBGWVRn_EL1(0), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(1), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(2), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(3), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(4), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(5), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(6), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(7), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(8), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(9), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(10), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(11), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(12), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(13), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(14), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWVRn_EL1(15), HDFGRTR, DBGWVRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(0), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(1), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(2), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(3), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(4), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(5), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(6), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(7), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(8), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(9), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(10), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(11), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(12), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(13), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(14), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGWCRn_EL1(15), HDFGRTR, DBGWCRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(0), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(1), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(2), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(3), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(4), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(5), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(6), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(7), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(8), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(9), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(10), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(11), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(12), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(13), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(14), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBVRn_EL1(15), HDFGRTR, DBGBVRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(0), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(1), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(2), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(3), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(4), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(5), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(6), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(7), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(8), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(9), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(10), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(11), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(12), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(13), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(14), HDFGRTR, DBGBCRn_EL1, 1),
+ SR_FGT(SYS_DBGBCRn_EL1(15), HDFGRTR, DBGBCRn_EL1, 1),
+ /*
+ * HDFGWTR_EL2
+ *
+ * Although HDFGRTR_EL2 and HDFGWTR_EL2 registers largely
+ * overlap in their bit assignment, there are a number of bits
+ * that are RES0 on one side, and an actual trap bit on the
+ * other. The policy chosen here is to describe all the
+ * read-side mappings, and only the write-side mappings that
+ * differ from the read side, and the trap handler will pick
+ * the correct shadow register based on the access type.
+ */
+ SR_FGT(SYS_TRFCR_EL1, HDFGWTR, TRFCR_EL1, 1),
+ SR_FGT(SYS_TRCOSLAR, HDFGWTR, TRCOSLAR, 1),
+ SR_FGT(SYS_PMCR_EL0, HDFGWTR, PMCR_EL0, 1),
+ SR_FGT(SYS_PMSWINC_EL0, HDFGWTR, PMSWINC_EL0, 1),
+ SR_FGT(SYS_OSLAR_EL1, HDFGWTR, OSLAR_EL1, 1),
+};
+
+static union trap_config get_trap_config(u32 sysreg)
+{
+ return (union trap_config) {
+ .val = xa_to_value(xa_load(&sr_forward_xa, sysreg)),
+ };
+}
+
+static __init void print_nv_trap_error(const struct encoding_to_trap_config *tc,
+ const char *type, int err)
+{
+ kvm_err("%s line %d encoding range "
+ "(%d, %d, %d, %d, %d) - (%d, %d, %d, %d, %d) (err=%d)\n",
+ type, tc->line,
+ sys_reg_Op0(tc->encoding), sys_reg_Op1(tc->encoding),
+ sys_reg_CRn(tc->encoding), sys_reg_CRm(tc->encoding),
+ sys_reg_Op2(tc->encoding),
+ sys_reg_Op0(tc->end), sys_reg_Op1(tc->end),
+ sys_reg_CRn(tc->end), sys_reg_CRm(tc->end),
+ sys_reg_Op2(tc->end),
+ err);
+}
+
+int __init populate_nv_trap_config(void)
+{
+ int ret = 0;
+
+ BUILD_BUG_ON(sizeof(union trap_config) != sizeof(void *));
+ BUILD_BUG_ON(__NR_CGT_GROUP_IDS__ > BIT(TC_CGT_BITS));
+ BUILD_BUG_ON(__NR_FGT_GROUP_IDS__ > BIT(TC_FGT_BITS));
+ BUILD_BUG_ON(__NR_FG_FILTER_IDS__ > BIT(TC_FGF_BITS));
+
+ for (int i = 0; i < ARRAY_SIZE(encoding_to_cgt); i++) {
+ const struct encoding_to_trap_config *cgt = &encoding_to_cgt[i];
+ void *prev;
+
+ if (cgt->tc.val & BIT(63)) {
+ kvm_err("CGT[%d] has MBZ bit set\n", i);
+ ret = -EINVAL;
+ }
+
+ if (cgt->encoding != cgt->end) {
+ prev = xa_store_range(&sr_forward_xa,
+ cgt->encoding, cgt->end,
+ xa_mk_value(cgt->tc.val),
+ GFP_KERNEL);
+ } else {
+ prev = xa_store(&sr_forward_xa, cgt->encoding,
+ xa_mk_value(cgt->tc.val), GFP_KERNEL);
+ if (prev && !xa_is_err(prev)) {
+ ret = -EINVAL;
+ print_nv_trap_error(cgt, "Duplicate CGT", ret);
+ }
+ }
+
+ if (xa_is_err(prev)) {
+ ret = xa_err(prev);
+ print_nv_trap_error(cgt, "Failed CGT insertion", ret);
+ }
+ }
+
+ kvm_info("nv: %ld coarse grained trap handlers\n",
+ ARRAY_SIZE(encoding_to_cgt));
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ goto check_mcb;
+
+ for (int i = 0; i < ARRAY_SIZE(encoding_to_fgt); i++) {
+ const struct encoding_to_trap_config *fgt = &encoding_to_fgt[i];
+ union trap_config tc;
+
+ if (fgt->tc.fgt >= __NR_FGT_GROUP_IDS__) {
+ ret = -EINVAL;
+ print_nv_trap_error(fgt, "Invalid FGT", ret);
+ }
+
+ tc = get_trap_config(fgt->encoding);
+
+ if (tc.fgt) {
+ ret = -EINVAL;
+ print_nv_trap_error(fgt, "Duplicate FGT", ret);
+ }
+
+ tc.val |= fgt->tc.val;
+ xa_store(&sr_forward_xa, fgt->encoding,
+ xa_mk_value(tc.val), GFP_KERNEL);
+ }
+
+ kvm_info("nv: %ld fine grained trap handlers\n",
+ ARRAY_SIZE(encoding_to_fgt));
+
+check_mcb:
+ for (int id = __MULTIPLE_CONTROL_BITS__; id < __COMPLEX_CONDITIONS__; id++) {
+ const enum cgt_group_id *cgids;
+
+ cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+
+ for (int i = 0; cgids[i] != __RESERVED__; i++) {
+ if (cgids[i] >= __MULTIPLE_CONTROL_BITS__) {
+ kvm_err("Recursive MCB %d/%d\n", id, cgids[i]);
+ ret = -EINVAL;
+ }
+ }
+ }
+
+ if (ret)
+ xa_destroy(&sr_forward_xa);
+
+ return ret;
+}
+
+static enum trap_behaviour get_behaviour(struct kvm_vcpu *vcpu,
+ const struct trap_bits *tb)
+{
+ enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, tb->index);
+ if ((val & tb->mask) == tb->value)
+ b |= tb->behaviour;
+
+ return b;
+}
+
+static enum trap_behaviour __compute_trap_behaviour(struct kvm_vcpu *vcpu,
+ const enum cgt_group_id id,
+ enum trap_behaviour b)
+{
+ switch (id) {
+ const enum cgt_group_id *cgids;
+
+ case __RESERVED__ ... __MULTIPLE_CONTROL_BITS__ - 1:
+ if (likely(id != __RESERVED__))
+ b |= get_behaviour(vcpu, &coarse_trap_bits[id]);
+ break;
+ case __MULTIPLE_CONTROL_BITS__ ... __COMPLEX_CONDITIONS__ - 1:
+ /* Yes, this is recursive. Don't do anything stupid. */
+ cgids = coarse_control_combo[id - __MULTIPLE_CONTROL_BITS__];
+ for (int i = 0; cgids[i] != __RESERVED__; i++)
+ b |= __compute_trap_behaviour(vcpu, cgids[i], b);
+ break;
+ default:
+ if (ARRAY_SIZE(ccc))
+ b |= ccc[id - __COMPLEX_CONDITIONS__](vcpu);
+ break;
+ }
+
+ return b;
+}
+
+static enum trap_behaviour compute_trap_behaviour(struct kvm_vcpu *vcpu,
+ const union trap_config tc)
+{
+ enum trap_behaviour b = BEHAVE_HANDLE_LOCALLY;
+
+ return __compute_trap_behaviour(vcpu, tc.cgt, b);
+}
+
+static bool check_fgt_bit(u64 val, const union trap_config tc)
+{
+ return ((val >> tc.bit) & 1) == tc.pol;
+}
+
+#define sanitised_sys_reg(vcpu, reg) \
+ ({ \
+ u64 __val; \
+ __val = __vcpu_sys_reg(vcpu, reg); \
+ __val &= ~__ ## reg ## _RES0; \
+ (__val); \
+ })
+
+bool __check_nv_sr_forward(struct kvm_vcpu *vcpu)
+{
+ union trap_config tc;
+ enum trap_behaviour b;
+ bool is_read;
+ u32 sysreg;
+ u64 esr, val;
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return false;
+
+ esr = kvm_vcpu_get_esr(vcpu);
+ sysreg = esr_sys64_to_sysreg(esr);
+ is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ;
+
+ tc = get_trap_config(sysreg);
+
+ /*
+ * A value of 0 for the whole entry means that we know nothing
+ * for this sysreg, and that it cannot be re-injected into the
+ * nested hypervisor. In this situation, let's cut it short.
+ *
+ * Note that ultimately, we could also make use of the xarray
+ * to store the index of the sysreg in the local descriptor
+ * array, avoiding another search... Hint, hint...
+ */
+ if (!tc.val)
+ return false;
+
+ switch ((enum fgt_group_id)tc.fgt) {
+ case __NO_FGT_GROUP__:
+ break;
+
+ case HFGxTR_GROUP:
+ if (is_read)
+ val = sanitised_sys_reg(vcpu, HFGRTR_EL2);
+ else
+ val = sanitised_sys_reg(vcpu, HFGWTR_EL2);
+ break;
+
+ case HDFGRTR_GROUP:
+ case HDFGWTR_GROUP:
+ if (is_read)
+ val = sanitised_sys_reg(vcpu, HDFGRTR_EL2);
+ else
+ val = sanitised_sys_reg(vcpu, HDFGWTR_EL2);
+ break;
+
+ case HFGITR_GROUP:
+ val = sanitised_sys_reg(vcpu, HFGITR_EL2);
+ switch (tc.fgf) {
+ u64 tmp;
+
+ case __NO_FGF__:
+ break;
+
+ case HCRX_FGTnXS:
+ tmp = sanitised_sys_reg(vcpu, HCRX_EL2);
+ if (tmp & HCRX_EL2_FGTnXS)
+ tc.fgt = __NO_FGT_GROUP__;
+ }
+ break;
+
+ case __NR_FGT_GROUP_IDS__:
+ /* Something is really wrong, bail out */
+ WARN_ONCE(1, "__NR_FGT_GROUP_IDS__");
+ return false;
+ }
+
+ if (tc.fgt != __NO_FGT_GROUP__ && check_fgt_bit(val, tc))
+ goto inject;
+
+ b = compute_trap_behaviour(vcpu, tc);
+
+ if (((b & BEHAVE_FORWARD_READ) && is_read) ||
+ ((b & BEHAVE_FORWARD_WRITE) && !is_read))
+ goto inject;
+
+ return false;
+
+inject:
+ trace_kvm_forward_sysreg_trap(vcpu, sysreg, is_read);
+
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return true;
+}
+
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;
return KVM_ARM_TARGET_GENERIC_V8;
}
-void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
-{
- u32 target = kvm_target_cpu();
-
- memset(init, 0, sizeof(*init));
-
- /*
- * For now, we don't return any features.
- * In future, we might use features to return target
- * specific features available for the preferred
- * target type.
- */
- init->target = (__u32)target;
-}
-
int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
{
return -EINVAL;
if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
return kvm_handle_ptrauth(vcpu);
- kvm_emulate_nested_eret(vcpu);
+ /*
+ * If we got here, two possibilities:
+ *
+ * - the guest is in EL2, and we need to fully emulate ERET
+ *
+ * - the guest is in EL1, and we need to reinject the
+ * exception into the L1 hypervisor.
+ *
+ * If KVM ever traps ERET for its own use, we'll have to
+ * revisit this.
+ */
+ if (is_hyp_ctxt(vcpu))
+ kvm_emulate_nested_eret(vcpu);
+ else
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+
+ return 1;
+}
+
+static int handle_svc(struct kvm_vcpu *vcpu)
+{
+ /*
+ * So far, SVC traps only for NV via HFGITR_EL2. A SVC from a
+ * 32bit guest would be caught by vpcu_mode_is_bad_32bit(), so
+ * we should only have to deal with a 64 bit exception.
+ */
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
return 1;
}
[ESR_ELx_EC_SMC32] = handle_smc,
[ESR_ELx_EC_HVC64] = handle_hvc,
[ESR_ELx_EC_SMC64] = handle_smc,
+ [ESR_ELx_EC_SVC64] = handle_svc,
[ESR_ELx_EC_SYS64] = kvm_handle_sys_reg,
[ESR_ELx_EC_SVE] = handle_sve,
[ESR_ELx_EC_ERET] = kvm_handle_eret,
}
}
-static inline bool __hfgxtr_traps_required(void)
-{
- if (cpus_have_final_cap(ARM64_SME))
- return true;
-
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- return true;
+#define compute_clr_set(vcpu, reg, clr, set) \
+ do { \
+ u64 hfg; \
+ hfg = __vcpu_sys_reg(vcpu, reg) & ~__ ## reg ## _RES0; \
+ set |= hfg & __ ## reg ## _MASK; \
+ clr |= ~hfg & __ ## reg ## _nMASK; \
+ } while(0)
- return false;
-}
-static inline void __activate_traps_hfgxtr(void)
+static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
+ struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+ u64 r_val, w_val;
+
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ return;
+
+ ctxt_sys_reg(hctxt, HFGRTR_EL2) = read_sysreg_s(SYS_HFGRTR_EL2);
+ ctxt_sys_reg(hctxt, HFGWTR_EL2) = read_sysreg_s(SYS_HFGWTR_EL2);
if (cpus_have_final_cap(ARM64_SME)) {
tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
w_set |= HFGxTR_EL2_TCR_EL1_MASK;
- sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
- sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ compute_clr_set(vcpu, HFGRTR_EL2, r_clr, r_set);
+ compute_clr_set(vcpu, HFGWTR_EL2, w_clr, w_set);
+ }
+
+ /* The default is not to trap anything but ACCDATA_EL1 */
+ r_val = __HFGRTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ r_val |= r_set;
+ r_val &= ~r_clr;
+
+ w_val = __HFGWTR_EL2_nMASK & ~HFGxTR_EL2_nACCDATA_EL1;
+ w_val |= w_set;
+ w_val &= ~w_clr;
+
+ write_sysreg_s(r_val, SYS_HFGRTR_EL2);
+ write_sysreg_s(w_val, SYS_HFGWTR_EL2);
+
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return;
+
+ ctxt_sys_reg(hctxt, HFGITR_EL2) = read_sysreg_s(SYS_HFGITR_EL2);
+
+ r_set = r_clr = 0;
+ compute_clr_set(vcpu, HFGITR_EL2, r_clr, r_set);
+ r_val = __HFGITR_EL2_nMASK;
+ r_val |= r_set;
+ r_val &= ~r_clr;
+
+ write_sysreg_s(r_val, SYS_HFGITR_EL2);
+
+ ctxt_sys_reg(hctxt, HDFGRTR_EL2) = read_sysreg_s(SYS_HDFGRTR_EL2);
+ ctxt_sys_reg(hctxt, HDFGWTR_EL2) = read_sysreg_s(SYS_HDFGWTR_EL2);
+
+ r_clr = r_set = w_clr = w_set = 0;
+
+ compute_clr_set(vcpu, HDFGRTR_EL2, r_clr, r_set);
+ compute_clr_set(vcpu, HDFGWTR_EL2, w_clr, w_set);
+
+ r_val = __HDFGRTR_EL2_nMASK;
+ r_val |= r_set;
+ r_val &= ~r_clr;
+
+ w_val = __HDFGWTR_EL2_nMASK;
+ w_val |= w_set;
+ w_val &= ~w_clr;
+
+ write_sysreg_s(r_val, SYS_HDFGRTR_EL2);
+ write_sysreg_s(w_val, SYS_HDFGWTR_EL2);
}
-static inline void __deactivate_traps_hfgxtr(void)
+static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- u64 r_clr = 0, w_clr = 0, r_set = 0, w_set = 0, tmp;
+ struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- if (cpus_have_final_cap(ARM64_SME)) {
- tmp = HFGxTR_EL2_nSMPRI_EL1_MASK | HFGxTR_EL2_nTPIDR2_EL0_MASK;
+ if (!cpus_have_final_cap(ARM64_HAS_FGT))
+ return;
- r_set |= tmp;
- w_set |= tmp;
- }
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGRTR_EL2), SYS_HFGRTR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGWTR_EL2), SYS_HFGWTR_EL2);
- if (cpus_have_final_cap(ARM64_WORKAROUND_AMPERE_AC03_CPU_38))
- w_clr |= HFGxTR_EL2_TCR_EL1_MASK;
+ if (!vcpu_has_nv(vcpu) || is_hyp_ctxt(vcpu))
+ return;
- sysreg_clear_set_s(SYS_HFGRTR_EL2, r_clr, r_set);
- sysreg_clear_set_s(SYS_HFGWTR_EL2, w_clr, w_set);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HFGITR_EL2), SYS_HFGITR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HDFGRTR_EL2), SYS_HDFGRTR_EL2);
+ write_sysreg_s(ctxt_sys_reg(hctxt, HDFGWTR_EL2), SYS_HDFGWTR_EL2);
}
static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
- if (__hfgxtr_traps_required())
- __activate_traps_hfgxtr();
+ if (cpus_have_final_cap(ARM64_HAS_HCX)) {
+ u64 hcrx = HCRX_GUEST_FLAGS;
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ u64 clr = 0, set = 0;
+
+ compute_clr_set(vcpu, HCRX_EL2, clr, set);
+
+ hcrx |= set;
+ hcrx &= ~clr;
+ }
+
+ write_sysreg_s(hcrx, SYS_HCRX_EL2);
+ }
+
+ __activate_traps_hfgxtr(vcpu);
}
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
- if (__hfgxtr_traps_required())
- __deactivate_traps_hfgxtr();
+ if (cpus_have_final_cap(ARM64_HAS_HCX))
+ write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
+
+ __deactivate_traps_hfgxtr(vcpu);
}
static inline void ___activate_traps(struct kvm_vcpu *vcpu)
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
-
- if (cpus_have_final_cap(ARM64_HAS_HCX))
- write_sysreg_s(HCRX_GUEST_FLAGS, SYS_HCRX_EL2);
}
static inline void ___deactivate_traps(struct kvm_vcpu *vcpu)
vcpu->arch.hcr_el2 &= ~HCR_VSE;
vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE;
}
-
- if (cpus_have_final_cap(ARM64_HAS_HCX))
- write_sysreg_s(HCRX_HOST_FLAGS, SYS_HCRX_EL2);
}
static inline bool __populate_fault_info(struct kvm_vcpu *vcpu)
int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
enum kvm_pgtable_prot prot,
unsigned long *haddr);
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr);
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
#endif /* __KVM_HYP_MM_H */
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+ DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+ __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),
return err;
}
+static int __pkvm_alloc_private_va_range(unsigned long start, size_t size)
+{
+ unsigned long cur;
+
+ hyp_assert_lock_held(&pkvm_pgd_lock);
+
+ if (!start || start < __io_map_base)
+ return -EINVAL;
+
+ /* The allocated size is always a multiple of PAGE_SIZE */
+ cur = start + PAGE_ALIGN(size);
+
+ /* Are we overflowing on the vmemmap ? */
+ if (cur > __hyp_vmemmap)
+ return -ENOMEM;
+
+ __io_map_base = cur;
+
+ return 0;
+}
+
/**
* pkvm_alloc_private_va_range - Allocates a private VA range.
* @size: The size of the VA range to reserve.
*/
int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr)
{
- unsigned long base, addr;
- int ret = 0;
+ unsigned long addr;
+ int ret;
hyp_spin_lock(&pkvm_pgd_lock);
-
- /* Align the allocation based on the order of its size */
- addr = ALIGN(__io_map_base, PAGE_SIZE << get_order(size));
-
- /* The allocated size is always a multiple of PAGE_SIZE */
- base = addr + PAGE_ALIGN(size);
-
- /* Are we overflowing on the vmemmap ? */
- if (!addr || base > __hyp_vmemmap)
- ret = -ENOMEM;
- else {
- __io_map_base = base;
- *haddr = addr;
- }
-
+ addr = __io_map_base;
+ ret = __pkvm_alloc_private_va_range(addr, size);
hyp_spin_unlock(&pkvm_pgd_lock);
+ *haddr = addr;
+
return ret;
}
return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
}
+int pkvm_create_stack(phys_addr_t phys, unsigned long *haddr)
+{
+ unsigned long addr, prev_base;
+ size_t size;
+ int ret;
+
+ hyp_spin_lock(&pkvm_pgd_lock);
+
+ prev_base = __io_map_base;
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ addr = ALIGN(__io_map_base, size);
+
+ ret = __pkvm_alloc_private_va_range(addr, size);
+ if (!ret) {
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr + PAGE_SIZE,
+ PAGE_SIZE, phys, PAGE_HYP);
+ if (ret)
+ __io_map_base = prev_base;
+ }
+ hyp_spin_unlock(&pkvm_pgd_lock);
+
+ *haddr = addr + size;
+
+ return ret;
+}
+
static void *admit_host_page(void *arg)
{
struct kvm_hyp_memcache *host_mc = arg;
for (i = 0; i < hyp_nr_cpus; i++) {
struct kvm_nvhe_init_params *params = per_cpu_ptr(&kvm_init_params, i);
- unsigned long hyp_addr;
start = (void *)kern_hyp_va(per_cpu_base[i]);
end = start + PAGE_ALIGN(hyp_percpu_size);
if (ret)
return ret;
- /*
- * Allocate a contiguous HYP private VA range for the stack
- * and guard page. The allocation is also aligned based on
- * the order of its size.
- */
- ret = pkvm_alloc_private_va_range(PAGE_SIZE * 2, &hyp_addr);
+ ret = pkvm_create_stack(params->stack_pa, ¶ms->stack_hyp_va);
if (ret)
return ret;
-
- /*
- * Since the stack grows downwards, map the stack to the page
- * at the higher address and leave the lower guard page
- * unbacked.
- *
- * Any valid stack address now has the PAGE_SHIFT bit as 1
- * and addresses corresponding to the guard page have the
- * PAGE_SHIFT bit as 0 - this is used for overflow detection.
- */
- hyp_spin_lock(&pkvm_pgd_lock);
- ret = kvm_pgtable_hyp_map(&pkvm_pgtable, hyp_addr + PAGE_SIZE,
- PAGE_SIZE, params->stack_pa, PAGE_HYP);
- hyp_spin_unlock(&pkvm_pgd_lock);
- if (ret)
- return ret;
-
- /* Update stack_hyp_va to end of the stack's private VA range */
- params->stack_hyp_va = hyp_addr + (2 * PAGE_SIZE);
}
/*
* KVM_ARM_VCPU_INIT, however, this is likely not possible for
* protected VMs.
*/
- vcpu->arch.target = -1;
+ vcpu_clear_flag(vcpu, VCPU_INITIALIZED);
*exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT);
*exit_code |= ARM_EXCEPTION_IL;
}
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt, false);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+ if (icache_is_vpipt())
+ icache_inval_all_pou();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
}
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size)
+{
+ unsigned long pages, inval_pages;
+
+ if (!system_supports_tlb_range()) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ return;
+ }
+
+ pages = size >> PAGE_SHIFT;
+ while (pages > 0) {
+ inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+ addr += inval_pages << PAGE_SHIFT;
+ pages -= inval_pages;
+ }
+}
+
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
* evicted pte value (if any).
*/
if (kvm_pte_table(ctx->old, ctx->level))
- kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+ kvm_granule_size(ctx->level));
else if (kvm_pte_valid(ctx->old))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
smp_store_release(ctx->ptep, new);
}
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
- struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+ /*
+ * If FEAT_TLBIRANGE is implemented, defer the individual
+ * TLB invalidations until the entire walk is finished, and
+ * then use the range-based TLBI instructions to do the
+ * invalidations. Condition deferred TLB invalidation on the
+ * system supporting FWB as the optimization is entirely
+ * pointless when the unmap walker needs to perform CMOs.
+ */
+ return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
+ struct kvm_pgtable *pgt = ctx->arg;
+
/*
- * Clear the existing PTE, and perform break-before-make with
- * TLB maintenance if it was valid.
+ * Clear the existing PTE, and perform break-before-make if it was
+ * valid. Depending on the system support, defer the TLB maintenance
+ * for the same until the entire unmap walk is completed.
*/
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+ if (!stage2_unmap_defer_tlb_flush(pgt))
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+ ctx->addr, ctx->level);
}
mm_ops->put_page(ctx->ptep);
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
- stage2_put_pte(ctx, mmu, mm_ops);
+ stage2_unmap_put_pte(ctx, mmu, mm_ops);
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
+ int ret;
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ if (stage2_unmap_defer_tlb_flush(pgt))
+ /* Perform the deferred TLB invalidations */
+ kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+ return ret;
}
struct stage2_attr_data {
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
}
/**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* @kvm: pointer to kvm structure.
*
* Interface to HYP function to flush all VM TLB entries
*/
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
- ++kvm->stat.generic.remote_tlb_flush_requests;
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+ return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+ gfn_t gfn, u64 nr_pages)
+{
+ kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+ gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ return 0;
}
static bool kvm_is_device_pfn(unsigned long pfn)
return 0;
}
+static int __hyp_alloc_private_va_range(unsigned long base)
+{
+ lockdep_assert_held(&kvm_hyp_pgd_mutex);
+
+ if (!PAGE_ALIGNED(base))
+ return -EINVAL;
+
+ /*
+ * Verify that BIT(VA_BITS - 1) hasn't been flipped by
+ * allocating the new area, as it would indicate we've
+ * overflowed the idmap/IO address range.
+ */
+ if ((base ^ io_map_base) & BIT(VA_BITS - 1))
+ return -ENOMEM;
+
+ io_map_base = base;
+
+ return 0;
+}
/**
* hyp_alloc_private_va_range - Allocates a private VA range.
/*
* This assumes that we have enough space below the idmap
- * page to allocate our VAs. If not, the check below will
- * kick. A potential alternative would be to detect that
- * overflow and switch to an allocation above the idmap.
+ * page to allocate our VAs. If not, the check in
+ * __hyp_alloc_private_va_range() will kick. A potential
+ * alternative would be to detect that overflow and switch
+ * to an allocation above the idmap.
*
* The allocated size is always a multiple of PAGE_SIZE.
*/
- base = io_map_base - PAGE_ALIGN(size);
-
- /* Align the allocation based on the order of its size */
- base = ALIGN_DOWN(base, PAGE_SIZE << get_order(size));
-
- /*
- * Verify that BIT(VA_BITS - 1) hasn't been flipped by
- * allocating the new area, as it would indicate we've
- * overflowed the idmap/IO address range.
- */
- if ((base ^ io_map_base) & BIT(VA_BITS - 1))
- ret = -ENOMEM;
- else
- *haddr = io_map_base = base;
+ size = PAGE_ALIGN(size);
+ base = io_map_base - size;
+ ret = __hyp_alloc_private_va_range(base);
mutex_unlock(&kvm_hyp_pgd_mutex);
return ret;
}
+int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr)
+{
+ unsigned long base;
+ size_t size;
+ int ret;
+
+ mutex_lock(&kvm_hyp_pgd_mutex);
+ /*
+ * Efficient stack verification using the PAGE_SHIFT bit implies
+ * an alignment of our allocation on the order of the size.
+ */
+ size = PAGE_SIZE * 2;
+ base = ALIGN_DOWN(io_map_base - size, size);
+
+ ret = __hyp_alloc_private_va_range(base);
+
+ mutex_unlock(&kvm_hyp_pgd_mutex);
+
+ if (ret) {
+ kvm_err("Cannot allocate hyp stack guard page\n");
+ return ret;
+ }
+
+ /*
+ * Since the stack grows downwards, map the stack to the page
+ * at the higher address and leave the lower guard page
+ * unbacked.
+ *
+ * Any valid stack address now has the PAGE_SHIFT bit as 1
+ * and addresses corresponding to the guard page have the
+ * PAGE_SHIFT bit as 0 - this is used for overflow detection.
+ */
+ ret = __create_hyp_mappings(base + PAGE_SIZE, PAGE_SIZE, phys_addr,
+ PAGE_HYP);
+ if (ret)
+ kvm_err("Cannot map hyp stack\n");
+
+ *haddr = base + size;
+
+ return ret;
+}
+
/**
* create_hyp_io_mappings - Map IO into both kernel and HYP
* @phys_addr: The physical start address which gets mapped
write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
write_unlock(&kvm->mmu_lock);
- kvm_flush_remote_tlbs(kvm);
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
}
/**
out_unlock:
read_unlock(&kvm->mmu_lock);
- kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
- kvm_pfn_t pfn = pte_pfn(range->pte);
+ kvm_pfn_t pfn = pte_pfn(range->arg.pte);
if (!kvm->arch.mmu.pgt)
return false;
break;
case SYS_ID_AA64MMFR0_EL1:
- /* Hide ECV, FGT, ExS, Secure Memory */
- val &= ~(GENMASK_ULL(63, 43) |
+ /* Hide ECV, ExS, Secure Memory */
+ val &= ~(NV_FTR(MMFR0, ECV) |
+ NV_FTR(MMFR0, EXS) |
NV_FTR(MMFR0, TGRAN4_2) |
NV_FTR(MMFR0, TGRAN16_2) |
NV_FTR(MMFR0, TGRAN64_2) |
break;
case SYS_ID_AA64MMFR1_EL1:
- val &= (NV_FTR(MMFR1, PAN) |
+ val &= (NV_FTR(MMFR1, HCX) |
+ NV_FTR(MMFR1, PAN) |
NV_FTR(MMFR1, LO) |
NV_FTR(MMFR1, HPDS) |
NV_FTR(MMFR1, VH) |
break;
case SYS_ID_AA64MMFR2_EL1:
- val &= ~(NV_FTR(MMFR2, EVT) |
- NV_FTR(MMFR2, BBM) |
+ val &= ~(NV_FTR(MMFR2, BBM) |
NV_FTR(MMFR2, TTL) |
GENMASK_ULL(47, 44) |
NV_FTR(MMFR2, ST) |
#include <asm/kvm_emulate.h>
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
+#include <asm/arm_pmuv3.h>
#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0)
return &vcpu->arch.pmu.pmc[cnt_idx];
}
-static u32 kvm_pmu_event_mask(struct kvm *kvm)
+static u32 __kvm_pmu_event_mask(unsigned int pmuver)
{
- unsigned int pmuver;
-
- pmuver = kvm->arch.arm_pmu->pmuver;
-
switch (pmuver) {
case ID_AA64DFR0_EL1_PMUVer_IMP:
return GENMASK(9, 0);
}
}
+static u32 kvm_pmu_event_mask(struct kvm *kvm)
+{
+ u64 dfr0 = IDREG(kvm, SYS_ID_AA64DFR0_EL1);
+ u8 pmuver = SYS_FIELD_GET(ID_AA64DFR0_EL1, PMUVer, dfr0);
+
+ return __kvm_pmu_event_mask(pmuver);
+}
+
/**
* kvm_pmc_is_64bit - determine if counter is 64bit
* @pmc: counter context
{
struct arm_pmu_entry *entry;
- if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
- pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+ /*
+ * Check the sanitised PMU version for the system, as KVM does not
+ * support implementations where PMUv3 exists on a subset of CPUs.
+ */
+ if (!pmuv3_implemented(kvm_arm_pmu_get_pmuver_limit()))
return;
mutex_lock(&arm_pmus_lock);
} else {
val = read_sysreg(pmceid1_el0);
/*
- * Don't advertise STALL_SLOT, as PMMIR_EL0 is handled
+ * Don't advertise STALL_SLOT*, as PMMIR_EL0 is handled
* as RAZ
*/
- if (vcpu->kvm->arch.arm_pmu->pmuver >= ID_AA64DFR0_EL1_PMUVer_V3P4)
- val &= ~BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32);
+ val &= ~(BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND - 32) |
+ BIT_ULL(ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND - 32));
base = 32;
}
return 0;
}
case KVM_ARM_VCPU_PMU_V3_FILTER: {
+ u8 pmuver = kvm_arm_pmu_get_pmuver_limit();
struct kvm_pmu_event_filter __user *uaddr;
struct kvm_pmu_event_filter filter;
int nr_events;
- nr_events = kvm_pmu_event_mask(kvm) + 1;
+ /*
+ * Allow userspace to specify an event filter for the entire
+ * event range supported by PMUVer of the hardware, rather
+ * than the guest's PMUVer for KVM backward compatibility.
+ */
+ nr_events = __kvm_pmu_event_mask(pmuver) + 1;
uaddr = (struct kvm_pmu_event_filter __user *)(long)attr->addr;
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
return true;
}
+
+/*
+ * If we interrupted the guest to update the host PMU context, make
+ * sure we re-apply the guest EL0 state.
+ */
+void kvm_vcpu_pmu_resync_el0(void)
+{
+ struct kvm_vcpu *vcpu;
+
+ if (!has_vhe() || !in_interrupt())
+ return;
+
+ vcpu = kvm_get_running_vcpu();
+ if (!vcpu)
+ return;
+
+ kvm_make_request(KVM_REQ_RESYNC_PMU_EL0, vcpu);
+}
}
}
- switch (vcpu->arch.target) {
- default:
- if (vcpu_el1_is_32bit(vcpu)) {
- pstate = VCPU_RESET_PSTATE_SVC;
- } else if (vcpu_has_nv(vcpu)) {
- pstate = VCPU_RESET_PSTATE_EL2;
- } else {
- pstate = VCPU_RESET_PSTATE_EL1;
- }
-
- if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
- ret = -EINVAL;
- goto out;
- }
- break;
+ if (vcpu_el1_is_32bit(vcpu))
+ pstate = VCPU_RESET_PSTATE_SVC;
+ else if (vcpu_has_nv(vcpu))
+ pstate = VCPU_RESET_PSTATE_EL2;
+ else
+ pstate = VCPU_RESET_PSTATE_EL1;
+
+ if (kvm_vcpu_has_pmu(vcpu) && !kvm_arm_support_pmu_v3()) {
+ ret = -EINVAL;
+ goto out;
}
/* Reset core registers */
{ SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 },
{ SYS_DESC(SYS_TPIDR_EL1), NULL, reset_unknown, TPIDR_EL1 },
+ { SYS_DESC(SYS_ACCDATA_EL1), undef_access },
+
{ SYS_DESC(SYS_SCXTNUM_EL1), undef_access },
{ SYS_DESC(SYS_CNTKCTL_EL1), NULL, reset_val, CNTKCTL_EL1, 0},
EL2_REG(MDCR_EL2, access_rw, reset_val, 0),
EL2_REG(CPTR_EL2, access_rw, reset_val, CPTR_NVHE_EL2_RES1),
EL2_REG(HSTR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HFGRTR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HFGWTR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HFGITR_EL2, access_rw, reset_val, 0),
EL2_REG(HACR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HCRX_EL2, access_rw, reset_val, 0),
+
EL2_REG(TTBR0_EL2, access_rw, reset_val, 0),
EL2_REG(TTBR1_EL2, access_rw, reset_val, 0),
EL2_REG(TCR_EL2, access_rw, reset_val, TCR_EL2_RES1),
EL2_REG(VTCR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_DACR32_EL2), NULL, reset_unknown, DACR32_EL2 },
+ EL2_REG(HDFGRTR_EL2, access_rw, reset_val, 0),
+ EL2_REG(HDFGWTR_EL2, access_rw, reset_val, 0),
EL2_REG(SPSR_EL2, access_rw, reset_val, 0),
EL2_REG(ELR_EL2, access_rw, reset_val, 0),
{ SYS_DESC(SYS_SP_EL1), access_sp_el1},
trace_kvm_handle_sys_reg(esr);
+ if (__check_nv_sr_forward(vcpu))
+ return 1;
+
params = esr_sys64_to_params(esr);
params.regval = vcpu_get_reg(vcpu, Rt);
if (!first_idreg)
return -EINVAL;
+ if (kvm_get_mode() == KVM_MODE_NV)
+ return populate_nv_trap_config();
+
return 0;
}
__entry->hcr_el2)
);
+TRACE_EVENT(kvm_forward_sysreg_trap,
+ TP_PROTO(struct kvm_vcpu *vcpu, u32 sysreg, bool is_read),
+ TP_ARGS(vcpu, sysreg, is_read),
+
+ TP_STRUCT__entry(
+ __field(u64, pc)
+ __field(u32, sysreg)
+ __field(bool, is_read)
+ ),
+
+ TP_fast_assign(
+ __entry->pc = *vcpu_pc(vcpu);
+ __entry->sysreg = sysreg;
+ __entry->is_read = is_read;
+ ),
+
+ TP_printk("%llx %c (%d,%d,%d,%d,%d)",
+ __entry->pc,
+ __entry->is_read ? 'R' : 'W',
+ sys_reg_Op0(__entry->sysreg),
+ sys_reg_Op1(__entry->sysreg),
+ sys_reg_CRn(__entry->sysreg),
+ sys_reg_CRm(__entry->sysreg),
+ sys_reg_Op2(__entry->sysreg))
+);
+
#endif /* _TRACE_ARM_ARM64_KVM_H */
#undef TRACE_INCLUDE_PATH
void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v2_set_npie(struct kvm_vcpu *vcpu);
int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
int offset, u32 *val);
void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
-void vgic_v3_set_npie(struct kvm_vcpu *vcpu);
void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
void vgic_v3_enable(struct kvm_vcpu *vcpu);
const u64 *ptr;
u64 data, sum64 = 0;
- if (unlikely(len == 0))
+ if (unlikely(len <= 0))
return 0;
offset = (unsigned long)buff & 7;
HAS_ECV_CNTPOFF
HAS_EPAN
HAS_EVT
+HAS_FGT
HAS_GENERIC_AUTH
HAS_GENERIC_AUTH_ARCH_QARMA3
HAS_GENERIC_AUTH_ARCH_QARMA5
Field 0 ICIALLUIS
EndSysreg
+Sysreg HDFGRTR_EL2 3 4 3 1 4
+Field 63 PMBIDR_EL1
+Field 62 nPMSNEVFR_EL1
+Field 61 nBRBDATA
+Field 60 nBRBCTL
+Field 59 nBRBIDR
+Field 58 PMCEIDn_EL0
+Field 57 PMUSERENR_EL0
+Field 56 TRBTRG_EL1
+Field 55 TRBSR_EL1
+Field 54 TRBPTR_EL1
+Field 53 TRBMAR_EL1
+Field 52 TRBLIMITR_EL1
+Field 51 TRBIDR_EL1
+Field 50 TRBBASER_EL1
+Res0 49
+Field 48 TRCVICTLR
+Field 47 TRCSTATR
+Field 46 TRCSSCSRn
+Field 45 TRCSEQSTR
+Field 44 TRCPRGCTLR
+Field 43 TRCOSLSR
+Res0 42
+Field 41 TRCIMSPECn
+Field 40 TRCID
+Res0 39:38
+Field 37 TRCCNTVRn
+Field 36 TRCCLAIM
+Field 35 TRCAUXCTLR
+Field 34 TRCAUTHSTATUS
+Field 33 TRC
+Field 32 PMSLATFR_EL1
+Field 31 PMSIRR_EL1
+Field 30 PMSIDR_EL1
+Field 29 PMSICR_EL1
+Field 28 PMSFCR_EL1
+Field 27 PMSEVFR_EL1
+Field 26 PMSCR_EL1
+Field 25 PMBSR_EL1
+Field 24 PMBPTR_EL1
+Field 23 PMBLIMITR_EL1
+Field 22 PMMIR_EL1
+Res0 21:20
+Field 19 PMSELR_EL0
+Field 18 PMOVS
+Field 17 PMINTEN
+Field 16 PMCNTEN
+Field 15 PMCCNTR_EL0
+Field 14 PMCCFILTR_EL0
+Field 13 PMEVTYPERn_EL0
+Field 12 PMEVCNTRn_EL0
+Field 11 OSDLR_EL1
+Field 10 OSECCR_EL1
+Field 9 OSLSR_EL1
+Res0 8
+Field 7 DBGPRCR_EL1
+Field 6 DBGAUTHSTATUS_EL1
+Field 5 DBGCLAIM
+Field 4 MDSCR_EL1
+Field 3 DBGWVRn_EL1
+Field 2 DBGWCRn_EL1
+Field 1 DBGBVRn_EL1
+Field 0 DBGBCRn_EL1
+EndSysreg
+
+Sysreg HDFGWTR_EL2 3 4 3 1 5
+Res0 63
+Field 62 nPMSNEVFR_EL1
+Field 61 nBRBDATA
+Field 60 nBRBCTL
+Res0 59:58
+Field 57 PMUSERENR_EL0
+Field 56 TRBTRG_EL1
+Field 55 TRBSR_EL1
+Field 54 TRBPTR_EL1
+Field 53 TRBMAR_EL1
+Field 52 TRBLIMITR_EL1
+Res0 51
+Field 50 TRBBASER_EL1
+Field 49 TRFCR_EL1
+Field 48 TRCVICTLR
+Res0 47
+Field 46 TRCSSCSRn
+Field 45 TRCSEQSTR
+Field 44 TRCPRGCTLR
+Res0 43
+Field 42 TRCOSLAR
+Field 41 TRCIMSPECn
+Res0 40:38
+Field 37 TRCCNTVRn
+Field 36 TRCCLAIM
+Field 35 TRCAUXCTLR
+Res0 34
+Field 33 TRC
+Field 32 PMSLATFR_EL1
+Field 31 PMSIRR_EL1
+Res0 30
+Field 29 PMSICR_EL1
+Field 28 PMSFCR_EL1
+Field 27 PMSEVFR_EL1
+Field 26 PMSCR_EL1
+Field 25 PMBSR_EL1
+Field 24 PMBPTR_EL1
+Field 23 PMBLIMITR_EL1
+Res0 22
+Field 21 PMCR_EL0
+Field 20 PMSWINC_EL0
+Field 19 PMSELR_EL0
+Field 18 PMOVS
+Field 17 PMINTEN
+Field 16 PMCNTEN
+Field 15 PMCCNTR_EL0
+Field 14 PMCCFILTR_EL0
+Field 13 PMEVTYPERn_EL0
+Field 12 PMEVCNTRn_EL0
+Field 11 OSDLR_EL1
+Field 10 OSECCR_EL1
+Res0 9
+Field 8 OSLAR_EL1
+Field 7 DBGPRCR_EL1
+Res0 6
+Field 5 DBGCLAIM
+Field 4 MDSCR_EL1
+Field 3 DBGWVRn_EL1
+Field 2 DBGWCRn_EL1
+Field 1 DBGBVRn_EL1
+Field 0 DBGBCRn_EL1
+EndSysreg
+
Sysreg ZCR_EL2 3 4 1 2 0
Fields ZCR_ELx
EndSysreg
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table.h
generic-y += agp.h
-generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += vtime.h
* pNonSys: !pSys
*/
-
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <asm/asmmacro.h>
#include <asm/cache.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
-#include <asm/export.h>
#include "minstate.h"
#define PSR_BITS_TO_SET \
(IA64_PSR_BN)
+#include <linux/export.h>
#include <asm/processor.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
/*
* Inputs:
* Support for CPU Hotplug
*/
-
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <asm/asmmacro.h>
#include <asm/fpu.h>
#include <asm/mca_asm.h>
#include <linux/init.h>
#include <linux/linkage.h>
-#include <asm/export.h>
#ifdef CONFIG_HOTPLUG_CPU
#define SAL_PSR_BITS_TO_SET \
* Table is based upon EAS2.6 (Oct 1999)
*/
-
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <asm/asmmacro.h>
#include <asm/break.h>
#include <asm/thread_info.h>
#include <asm/unistd.h>
#include <asm/errno.h>
-#include <asm/export.h>
#if 0
# define PSR_DEFAULT_BITS psr.ac
* 05/24/2000 eranian Added support for physical mode static calls
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
#include <asm/processor.h>
-#include <asm/export.h>
.data
pal_entry_point:
* 3/08/02 davidm Some more tweaking
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
#include <asm/page.h>
-#include <asm/export.h>
#ifdef CONFIG_ITANIUM
# define L3_LINE_SIZE 64 // Itanium L3 line size
* Stephane Eranian <eranian@hpl.hp.com>
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
//
// arguments
*
* 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies.
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
#include <asm/page.h>
-#include <asm/export.h>
#define PIPE_DEPTH 3
#define EPI p[PIPE_DEPTH-1]
* to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
* an order that avoids bank conflicts.
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
#include <asm/page.h>
-#include <asm/export.h>
#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
* - fix extraneous stop bit introduced by the EX() macro.
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
//
// Tuneable parameters
* 05/28/05 Zoltan Menyhart Dynamic stride size
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
-
/*
* flush_icache_range(start,end)
* (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
#ifdef MODULO
# define OP mod
* (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
#ifdef MODULO
# define OP mod
* Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com>
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
/*
* Since we know that most likely this function is called with buf aligned
* Stephane Eranian <eranian@hpl.hp.com>
* David Mosberger-Tang <davidm@hpl.hp.com>
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
GLOBAL_ENTRY(memcpy)
* Copyright (C) 2002 Intel Corp.
* Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
#include <asm/page.h>
-#include <asm/export.h>
#define EK(y...) EX(y)
Since a stf.spill f0 can store 16B in one go, we use this instruction
to get peak speed when value = 0. */
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
#undef ret
#define dest in0
* 09/24/99 S.Eranian add speculation recovery code
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
//
//
* by Andreas Schwab <schwab@suse.de>).
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
GLOBAL_ENTRY(__strncpy_from_user)
alloc r2=ar.pfs,3,0,0,0
* Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
GLOBAL_ENTRY(__strnlen_user)
.prologue
* Optimized RAID-5 checksumming functions for IA-64.
*/
+#include <linux/export.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
GLOBAL_ENTRY(xor_ia64_2)
.prologue
select ACPI_PPTT if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_BINFMT_ELF_STATE
+ select ARCH_DISABLE_KASAN_INLINE
select ARCH_ENABLE_MEMORY_HOTPLUG
select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
+ select ARCH_HAS_KCOV
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
+ select HAVE_ARCH_KASAN
+ select HAVE_ARCH_KFENCE
+ select HAVE_ARCH_KGDB if PERF_EVENTS
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_FUNCTION_GRAPH_RETVAL if HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
+ select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IOREMAP_PROT
config AS_HAS_LASX_EXTENSION
def_bool $(as-instr,xvld \$xr0$(comma)\$a0$(comma)0)
+config AS_HAS_LBT_EXTENSION
+ def_bool $(as-instr,movscr2gr \$a0$(comma)\$scr0)
+
menu "Kernel type and options"
source "kernel/Kconfig.hz"
If unsure, say Y.
+config CPU_HAS_LBT
+ bool "Support for the Loongson Binary Translation Extension"
+ depends on AS_HAS_LBT_EXTENSION
+ help
+ Loongson Binary Translation (LBT) introduces 4 scratch registers (SCR0
+ to SCR3), x86/ARM eflags (eflags) and x87 fpu stack pointer (ftop).
+ Enabling this option allows the kernel to allocate and switch registers
+ specific to LBT.
+
+ If you want to use this feature, such as the Loongson Architecture
+ Translator (LAT), say Y.
+
config CPU_HAS_PREFETCH
bool
default y
config ARCH_SUPPORTS_UPROBES
def_bool y
+config KASAN_SHADOW_OFFSET
+ hex
+ default 0x0
+ depends on KASAN
+
menu "Power management options"
config ARCH_SUSPEND_POSSIBLE
endif
cflags-y += $(call cc-option, -mno-check-zero-division)
+
+ifndef CONFIG_KASAN
cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
+endif
load-y = 0x9000000000200000
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
-CONFIG_SYSFS_DEPRECATED=y
CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
CONFIG_HOTPLUG_CPU=y
CONFIG_NR_CPUS=64
CONFIG_NUMA=y
+CONFIG_CPU_HAS_FPU=y
+CONFIG_CPU_HAS_LSX=y
+CONFIG_CPU_HAS_LASX=y
CONFIG_KEXEC=y
CONFIG_CRASH_DUMP=y
+CONFIG_RANDOMIZE_BASE=y
CONFIG_SUSPEND=y
CONFIG_HIBERNATION=y
CONFIG_ACPI=y
CONFIG_EFI_GENERIC_STUB_INITRD_CMDLINE_LOADER=y
CONFIG_EFI_CAPSULE_LOADER=m
CONFIG_EFI_TEST=m
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_IP_PNP_RARP=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
CONFIG_INET_ESP=m
CONFIG_INET_UDP_DIAG=y
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_NFT_REDIR=m
CONFIG_NFT_NAT=m
CONFIG_NFT_TUNNEL=m
-CONFIG_NFT_OBJREF=m
CONFIG_NFT_QUEUE=m
CONFIG_NFT_QUOTA=m
CONFIG_NFT_REJECT=m
CONFIG_IP_VS_IPV6=y
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_SCTP=y
CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_WRR=m
CONFIG_IP_VS_NFCT=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_DUP_IPV4=m
CONFIG_IP_NF_TARGET_NETMAP=m
CONFIG_IP_NF_TARGET_REDIRECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_MTD_CFI_STAA=m
CONFIG_MTD_RAM=m
CONFIG_MTD_ROM=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BLOCK=y
CONFIG_PARPORT=y
CONFIG_PARPORT_PC=y
CONFIG_PARPORT_SERIAL=y
CONFIG_ZRAM=m
CONFIG_ZRAM_DEF_COMP_ZSTD=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=8192
# CONFIG_NET_VENDOR_TEHUTI is not set
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VIA is not set
+CONFIG_NGBE=y
+CONFIG_TXGBE=y
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_PIIX4=y
CONFIG_I2C_GPIO=y
+CONFIG_I2C_LS2X=y
CONFIG_SPI=y
+CONFIG_SPI_LOONGSON_PCI=m
+CONFIG_SPI_LOONGSON_PLATFORM=m
+CONFIG_PINCTRL=y
+CONFIG_PINCTRL_LOONGSON2=y
CONFIG_GPIO_SYSFS=y
CONFIG_GPIO_LOONGSON=y
+CONFIG_GPIO_LOONGSON_64BIT=y
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_RESTART=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SENSORS_LM93=m
CONFIG_SENSORS_W83795=m
CONFIG_SENSORS_W83627HF=m
+CONFIG_LOONGSON2_THERMAL=m
CONFIG_RC_CORE=m
CONFIG_LIRC=y
CONFIG_RC_DECODERS=y
CONFIG_DRM_AST=y
CONFIG_DRM_QXL=m
CONFIG_DRM_VIRTIO_GPU=m
+CONFIG_DRM_LOONGSON=y
CONFIG_FB=y
CONFIG_FB_EFI=y
CONFIG_FB_RADEON=y
CONFIG_INFINIBAND=m
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_EFI=y
+CONFIG_RTC_DRV_LOONGSON=y
CONFIG_DMADEVICES=y
CONFIG_UIO=m
CONFIG_UIO_PDRV_GENIRQ=m
CONFIG_COMEDI_NI_PCIDIO=m
CONFIG_COMEDI_NI_PCIMIO=m
CONFIG_STAGING=y
-CONFIG_R8188EU=m
+CONFIG_COMMON_CLK_LOONGSON2=y
+CONFIG_LOONGSON2_GUTS=y
+CONFIG_LOONGSON2_PM=y
CONFIG_PM_DEVFREQ=y
CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND=y
CONFIG_DEVFREQ_GOV_PERFORMANCE=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
+CONFIG_GFS2_FS=m
+CONFIG_GFS2_FS_LOCKING_DLM=y
+CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
+CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
CONFIG_QFMT_V2=m
CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
+CONFIG_CUSE=m
+CONFIG_VIRTIO_FS=m
CONFIG_OVERLAY_FS=y
CONFIG_OVERLAY_FS_INDEX=y
CONFIG_OVERLAY_FS_XINO_AUTO=y
CONFIG_OVERLAY_FS_METACOPY=y
CONFIG_FSCACHE=y
+CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_CODEPAGE=936
CONFIG_FAT_DEFAULT_IOCHARSET="gb2312"
+CONFIG_EXFAT_FS=m
+CONFIG_NTFS3_FS=m
+CONFIG_NTFS3_64BIT_CLUSTER=y
+CONFIG_NTFS3_LZX_XPRESS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=y
+CONFIG_ORANGEFS_FS=m
+CONFIG_ECRYPT_FS=m
+CONFIG_ECRYPT_FS_MESSAGING=y
CONFIG_HFS_FS=m
CONFIG_HFSPLUS_FS=m
+CONFIG_UBIFS_FS=m
+CONFIG_UBIFS_FS_ADVANCED_COMPR=y
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=y
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZ4=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
+CONFIG_MINIX_FS=m
+CONFIG_ROMFS_FS=m
+CONFIG_PSTORE=m
+CONFIG_PSTORE_LZO_COMPRESS=m
+CONFIG_PSTORE_LZ4_COMPRESS=m
+CONFIG_PSTORE_LZ4HC_COMPRESS=m
+CONFIG_PSTORE_842_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS=y
+CONFIG_PSTORE_ZSTD_COMPRESS_DEFAULT=y
+CONFIG_SYSV_FS=m
+CONFIG_UFS_FS=m
+CONFIG_EROFS_FS=m
+CONFIG_EROFS_FS_ZIP_LZMA=y
+CONFIG_EROFS_FS_PCPU_KTHREAD=y
CONFIG_NFS_FS=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_BLOCKLAYOUT=y
+CONFIG_CEPH_FS=m
+CONFIG_CEPH_FSCACHE=y
+CONFIG_CEPH_FS_POSIX_ACL=y
+CONFIG_CEPH_FS_SECURITY_LABEL=y
CONFIG_CIFS=m
# CONFIG_CIFS_DEBUG is not set
CONFIG_9P_FS=y
CONFIG_NLS_CODEPAGE_936=y
CONFIG_NLS_ASCII=y
CONFIG_NLS_UTF8=y
+CONFIG_DLM=m
CONFIG_KEY_DH_OPERATIONS=y
CONFIG_SECURITY=y
CONFIG_SECURITY_SELINUX=y
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
+CONFIG_CRYPTO_CRC32_LOONGARCH=m
CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_PRINTK_TIME=y
CONFIG_STRIP_ASM_SYMS=y
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/uaccess.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/mmu_context.h>
#include <asm/page.h>
#include <asm/ftrace.h>
#include <asm/fpregdef.h>
#include <asm/loongarch.h>
- .macro parse_v var val
- \var = \val
- .endm
-
- .macro parse_r var r
- \var = -1
- .ifc \r, $r0
- \var = 0
- .endif
- .ifc \r, $r1
- \var = 1
- .endif
- .ifc \r, $r2
- \var = 2
- .endif
- .ifc \r, $r3
- \var = 3
- .endif
- .ifc \r, $r4
- \var = 4
- .endif
- .ifc \r, $r5
- \var = 5
- .endif
- .ifc \r, $r6
- \var = 6
- .endif
- .ifc \r, $r7
- \var = 7
- .endif
- .ifc \r, $r8
- \var = 8
- .endif
- .ifc \r, $r9
- \var = 9
- .endif
- .ifc \r, $r10
- \var = 10
- .endif
- .ifc \r, $r11
- \var = 11
- .endif
- .ifc \r, $r12
- \var = 12
- .endif
- .ifc \r, $r13
- \var = 13
- .endif
- .ifc \r, $r14
- \var = 14
- .endif
- .ifc \r, $r15
- \var = 15
- .endif
- .ifc \r, $r16
- \var = 16
- .endif
- .ifc \r, $r17
- \var = 17
- .endif
- .ifc \r, $r18
- \var = 18
- .endif
- .ifc \r, $r19
- \var = 19
- .endif
- .ifc \r, $r20
- \var = 20
- .endif
- .ifc \r, $r21
- \var = 21
- .endif
- .ifc \r, $r22
- \var = 22
- .endif
- .ifc \r, $r23
- \var = 23
- .endif
- .ifc \r, $r24
- \var = 24
- .endif
- .ifc \r, $r25
- \var = 25
- .endif
- .ifc \r, $r26
- \var = 26
- .endif
- .ifc \r, $r27
- \var = 27
- .endif
- .ifc \r, $r28
- \var = 28
- .endif
- .ifc \r, $r29
- \var = 29
- .endif
- .ifc \r, $r30
- \var = 30
- .endif
- .ifc \r, $r31
- \var = 31
- .endif
- .iflt \var
- .error "Unable to parse register name \r"
- .endif
- .endm
-
.macro cpu_save_nonscratch thread
stptr.d s0, \thread, THREAD_REG23
stptr.d s1, \thread, THREAD_REG24
.macro fpu_save_csr thread tmp
movfcsr2gr \tmp, fcsr0
- stptr.w \tmp, \thread, THREAD_FCSR
+ stptr.w \tmp, \thread, THREAD_FCSR
+#ifdef CONFIG_CPU_HAS_LBT
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp, \tmp, FPU_CSR_TM
+ beqz \tmp, 1f
+ /* Save FTOP */
+ x86mftop \tmp
+ stptr.w \tmp, \thread, THREAD_FTOP
+ /* Turn off TM to ensure the order of FPR in memory independent of TM */
+ x86clrtm
+1:
+#endif
.endm
- .macro fpu_restore_csr thread tmp
- ldptr.w \tmp, \thread, THREAD_FCSR
- movgr2fcsr fcsr0, \tmp
+ .macro fpu_restore_csr thread tmp0 tmp1
+ ldptr.w \tmp0, \thread, THREAD_FCSR
+ movgr2fcsr fcsr0, \tmp0
+#ifdef CONFIG_CPU_HAS_LBT
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp0, \tmp0, FPU_CSR_TM
+ beqz \tmp0, 2f
+ /* Restore FTOP */
+ ldptr.w \tmp0, \thread, THREAD_FTOP
+ andi \tmp0, \tmp0, 0x7
+ la.pcrel \tmp1, 1f
+ alsl.d \tmp1, \tmp0, \tmp1, 3
+ jr \tmp1
+1:
+ x86mttop 0
+ b 2f
+ x86mttop 1
+ b 2f
+ x86mttop 2
+ b 2f
+ x86mttop 3
+ b 2f
+ x86mttop 4
+ b 2f
+ x86mttop 5
+ b 2f
+ x86mttop 6
+ b 2f
+ x86mttop 7
+2:
+#endif
.endm
.macro fpu_save_cc thread tmp0 tmp1
.macro lsx_restore_all thread tmp0 tmp1
lsx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
- fpu_restore_csr \thread, \tmp0
+ fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lsx_save_upper vd base tmp off
.macro lasx_restore_all thread tmp0 tmp1
lasx_restore_data \thread, \tmp0
fpu_restore_cc \thread, \tmp0, \tmp1
- fpu_restore_csr \thread, \tmp0
+ fpu_restore_csr \thread, \tmp0, \tmp1
.endm
.macro lasx_save_upper xd base tmp off
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/linkage.h>
+#include <linux/mmzone.h>
+#include <asm/addrspace.h>
+#include <asm/io.h>
+#include <asm/pgtable.h>
+
+#define __HAVE_ARCH_SHADOW_MAP
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+
+#define XRANGE_SHIFT (48)
+
+/* Valid address length */
+#define XRANGE_SHADOW_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
+/* Used for taking out the valid address */
+#define XRANGE_SHADOW_MASK GENMASK_ULL(XRANGE_SHADOW_SHIFT - 1, 0)
+/* One segment whole address space size */
+#define XRANGE_SIZE (XRANGE_SHADOW_MASK + 1)
+
+/* 64-bit segment value. */
+#define XKPRANGE_UC_SEG (0x8000)
+#define XKPRANGE_CC_SEG (0x9000)
+#define XKVRANGE_VC_SEG (0xffff)
+
+/* Cached */
+#define XKPRANGE_CC_START CACHE_BASE
+#define XKPRANGE_CC_SIZE XRANGE_SIZE
+#define XKPRANGE_CC_KASAN_OFFSET (0)
+#define XKPRANGE_CC_SHADOW_SIZE (XKPRANGE_CC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_CC_SHADOW_END (XKPRANGE_CC_KASAN_OFFSET + XKPRANGE_CC_SHADOW_SIZE)
+
+/* UnCached */
+#define XKPRANGE_UC_START UNCACHE_BASE
+#define XKPRANGE_UC_SIZE XRANGE_SIZE
+#define XKPRANGE_UC_KASAN_OFFSET XKPRANGE_CC_SHADOW_END
+#define XKPRANGE_UC_SHADOW_SIZE (XKPRANGE_UC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKPRANGE_UC_SHADOW_END (XKPRANGE_UC_KASAN_OFFSET + XKPRANGE_UC_SHADOW_SIZE)
+
+/* VMALLOC (Cached or UnCached) */
+#define XKVRANGE_VC_START MODULES_VADDR
+#define XKVRANGE_VC_SIZE round_up(KFENCE_AREA_END - MODULES_VADDR + 1, PGDIR_SIZE)
+#define XKVRANGE_VC_KASAN_OFFSET XKPRANGE_UC_SHADOW_END
+#define XKVRANGE_VC_SHADOW_SIZE (XKVRANGE_VC_SIZE >> KASAN_SHADOW_SCALE_SHIFT)
+#define XKVRANGE_VC_SHADOW_END (XKVRANGE_VC_KASAN_OFFSET + XKVRANGE_VC_SHADOW_SIZE)
+
+/* KAsan shadow memory start right after vmalloc. */
+#define KASAN_SHADOW_START round_up(KFENCE_AREA_END, PGDIR_SIZE)
+#define KASAN_SHADOW_SIZE (XKVRANGE_VC_SHADOW_END - XKPRANGE_CC_KASAN_OFFSET)
+#define KASAN_SHADOW_END round_up(KASAN_SHADOW_START + KASAN_SHADOW_SIZE, PGDIR_SIZE)
+
+#define XKPRANGE_CC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_CC_KASAN_OFFSET)
+#define XKPRANGE_UC_SHADOW_OFFSET (KASAN_SHADOW_START + XKPRANGE_UC_KASAN_OFFSET)
+#define XKVRANGE_VC_SHADOW_OFFSET (KASAN_SHADOW_START + XKVRANGE_VC_KASAN_OFFSET)
+
+extern bool kasan_early_stage;
+extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
+
+#define kasan_arch_is_ready kasan_arch_is_ready
+static __always_inline bool kasan_arch_is_ready(void)
+{
+ return !kasan_early_stage;
+}
+
+static inline void *kasan_mem_to_shadow(const void *addr)
+{
+ if (!kasan_arch_is_ready()) {
+ return (void *)(kasan_early_shadow_page);
+ } else {
+ unsigned long maddr = (unsigned long)addr;
+ unsigned long xrange = (maddr >> XRANGE_SHIFT) & 0xffff;
+ unsigned long offset = 0;
+
+ maddr &= XRANGE_SHADOW_MASK;
+ switch (xrange) {
+ case XKPRANGE_CC_SEG:
+ offset = XKPRANGE_CC_SHADOW_OFFSET;
+ break;
+ case XKPRANGE_UC_SEG:
+ offset = XKPRANGE_UC_SHADOW_OFFSET;
+ break;
+ case XKVRANGE_VC_SEG:
+ offset = XKVRANGE_VC_SHADOW_OFFSET;
+ break;
+ default:
+ WARN_ON(1);
+ return NULL;
+ }
+
+ return (void *)((maddr >> KASAN_SHADOW_SCALE_SHIFT) + offset);
+ }
+}
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+ unsigned long addr = (unsigned long)shadow_addr;
+
+ if (unlikely(addr > KASAN_SHADOW_END) ||
+ unlikely(addr < KASAN_SHADOW_START)) {
+ WARN_ON(1);
+ return NULL;
+ }
+
+ if (addr >= XKVRANGE_VC_SHADOW_OFFSET)
+ return (void *)(((addr - XKVRANGE_VC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKVRANGE_VC_START);
+ else if (addr >= XKPRANGE_UC_SHADOW_OFFSET)
+ return (void *)(((addr - XKPRANGE_UC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_UC_START);
+ else if (addr >= XKPRANGE_CC_SHADOW_OFFSET)
+ return (void *)(((addr - XKPRANGE_CC_SHADOW_OFFSET) << KASAN_SHADOW_SCALE_SHIFT) + XKPRANGE_CC_START);
+ else {
+ WARN_ON(1);
+ return NULL;
+ }
+}
+
+void kasan_init(void);
+asmlinkage void kasan_early_init(void);
+
+#endif
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KFENCE support for LoongArch.
+ *
+ * Author: Enze Li <lienze@kylinos.cn>
+ * Copyright (C) 2022-2023 KylinSoft Corporation.
+ */
+
+#ifndef _ASM_LOONGARCH_KFENCE_H
+#define _ASM_LOONGARCH_KFENCE_H
+
+#include <linux/kfence.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+
+static inline bool arch_kfence_init_pool(void)
+{
+ int err;
+ char *kfence_pool = __kfence_pool;
+ struct vm_struct *area;
+
+ area = __get_vm_area_caller(KFENCE_POOL_SIZE, VM_IOREMAP,
+ KFENCE_AREA_START, KFENCE_AREA_END,
+ __builtin_return_address(0));
+ if (!area)
+ return false;
+
+ __kfence_pool = (char *)area->addr;
+ err = ioremap_page_range((unsigned long)__kfence_pool,
+ (unsigned long)__kfence_pool + KFENCE_POOL_SIZE,
+ virt_to_phys((void *)kfence_pool), PAGE_KERNEL);
+ if (err) {
+ free_vm_area(area);
+ __kfence_pool = kfence_pool;
+ return false;
+ }
+
+ return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ pte_t *pte = virt_to_kpte(addr);
+
+ if (WARN_ON(!pte) || pte_none(*pte))
+ return false;
+
+ if (protect)
+ set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT)));
+ else
+ set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT)));
+
+ preempt_disable();
+ local_flush_tlb_one(addr);
+ preempt_enable();
+
+ return true;
+}
+
+#endif /* _ASM_LOONGARCH_KFENCE_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_KGDB_H
+#define _ASM_LOONGARCH_KGDB_H
+
+#define GDB_SIZEOF_REG sizeof(u64)
+
+/* gdb remote procotol expects the following register layout. */
+
+/*
+ * General purpose registers:
+ * r0-r31: 64 bit
+ * orig_a0: 64 bit
+ * pc : 64 bit
+ * csr_badvaddr: 64 bit
+ */
+#define DBG_PT_REGS_BASE 0
+#define DBG_PT_REGS_NUM 35
+#define DBG_PT_REGS_END (DBG_PT_REGS_BASE + DBG_PT_REGS_NUM - 1)
+
+/*
+ * Floating point registers:
+ * f0-f31: 64 bit
+ */
+#define DBG_FPR_BASE (DBG_PT_REGS_END + 1)
+#define DBG_FPR_NUM 32
+#define DBG_FPR_END (DBG_FPR_BASE + DBG_FPR_NUM - 1)
+
+/*
+ * Condition Flag registers:
+ * fcc0-fcc8: 8 bit
+ */
+#define DBG_FCC_BASE (DBG_FPR_END + 1)
+#define DBG_FCC_NUM 8
+#define DBG_FCC_END (DBG_FCC_BASE + DBG_FCC_NUM - 1)
+
+/*
+ * Floating-point Control and Status registers:
+ * fcsr: 32 bit
+ */
+#define DBG_FCSR_NUM 1
+#define DBG_FCSR (DBG_FCC_END + 1)
+
+#define DBG_MAX_REG_NUM (DBG_FCSR + 1)
+
+/*
+ * Size of I/O buffer for gdb packet.
+ * considering to hold all register contents, size is set
+ */
+#define BUFMAX 2048
+
+/*
+ * Number of bytes required for gdb_regs buffer.
+ * PT_REGS and FPR: 8 bytes; FCSR: 4 bytes; FCC: 1 bytes.
+ * GDB fails to connect for size beyond this with error
+ * "'g' packet reply is too long"
+ */
+#define NUMREGBYTES ((DBG_PT_REGS_NUM + DBG_FPR_NUM) * GDB_SIZEOF_REG + DBG_FCC_NUM * 1 + DBG_FCSR_NUM * 4)
+
+#define BREAK_INSTR_SIZE 4
+#define CACHE_FLUSH_IS_SAFE 0
+
+/* Register numbers of various important registers. */
+enum dbg_loongarch_regnum {
+ DBG_LOONGARCH_ZERO = 0,
+ DBG_LOONGARCH_RA,
+ DBG_LOONGARCH_TP,
+ DBG_LOONGARCH_SP,
+ DBG_LOONGARCH_A0,
+ DBG_LOONGARCH_FP = 22,
+ DBG_LOONGARCH_S0,
+ DBG_LOONGARCH_S1,
+ DBG_LOONGARCH_S2,
+ DBG_LOONGARCH_S3,
+ DBG_LOONGARCH_S4,
+ DBG_LOONGARCH_S5,
+ DBG_LOONGARCH_S6,
+ DBG_LOONGARCH_S7,
+ DBG_LOONGARCH_S8,
+ DBG_LOONGARCH_ORIG_A0,
+ DBG_LOONGARCH_PC,
+ DBG_LOONGARCH_BADV
+};
+
+void kgdb_breakinst(void);
+void arch_kgdb_breakpoint(void);
+
+#ifdef CONFIG_KGDB
+bool kgdb_breakpoint_handler(struct pt_regs *regs);
+#else /* !CONFIG_KGDB */
+static inline bool kgdb_breakpoint_handler(struct pt_regs *regs) { return false; }
+#endif /* CONFIG_KGDB */
+
+#endif /* __ASM_KGDB_H_ */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ * Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_LBT_H
+#define _ASM_LBT_H
+
+#include <asm/cpu.h>
+#include <asm/current.h>
+#include <asm/loongarch.h>
+#include <asm/processor.h>
+
+extern void _init_lbt(void);
+extern void _save_lbt(struct loongarch_lbt *);
+extern void _restore_lbt(struct loongarch_lbt *);
+
+static inline int is_lbt_enabled(void)
+{
+ if (!cpu_has_lbt)
+ return 0;
+
+ return (csr_read32(LOONGARCH_CSR_EUEN) & CSR_EUEN_LBTEN) ?
+ 1 : 0;
+}
+
+static inline int is_lbt_owner(void)
+{
+ return test_thread_flag(TIF_USEDLBT);
+}
+
+#ifdef CONFIG_CPU_HAS_LBT
+
+static inline void enable_lbt(void)
+{
+ if (cpu_has_lbt)
+ csr_xchg32(CSR_EUEN_LBTEN, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void disable_lbt(void)
+{
+ if (cpu_has_lbt)
+ csr_xchg32(0, CSR_EUEN_LBTEN, LOONGARCH_CSR_EUEN);
+}
+
+static inline void __own_lbt(void)
+{
+ enable_lbt();
+ set_thread_flag(TIF_USEDLBT);
+ KSTK_EUEN(current) |= CSR_EUEN_LBTEN;
+}
+
+static inline void own_lbt_inatomic(int restore)
+{
+ if (cpu_has_lbt && !is_lbt_owner()) {
+ __own_lbt();
+ if (restore)
+ _restore_lbt(¤t->thread.lbt);
+ }
+}
+
+static inline void own_lbt(int restore)
+{
+ preempt_disable();
+ own_lbt_inatomic(restore);
+ preempt_enable();
+}
+
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk)
+{
+ if (cpu_has_lbt && is_lbt_owner()) {
+ if (save)
+ _save_lbt(&tsk->thread.lbt);
+
+ disable_lbt();
+ clear_tsk_thread_flag(tsk, TIF_USEDLBT);
+ }
+ KSTK_EUEN(tsk) &= ~(CSR_EUEN_LBTEN);
+}
+
+static inline void lose_lbt(int save)
+{
+ preempt_disable();
+ lose_lbt_inatomic(save, current);
+ preempt_enable();
+}
+
+static inline void init_lbt(void)
+{
+ __own_lbt();
+ _init_lbt();
+}
+#else
+static inline void own_lbt_inatomic(int restore) {}
+static inline void lose_lbt_inatomic(int save, struct task_struct *tsk) {}
+static inline void init_lbt(void) {}
+static inline void lose_lbt(int save) {}
+#endif
+
+static inline int thread_lbt_context_live(void)
+{
+ if (!cpu_has_lbt)
+ return 0;
+
+ return test_thread_flag(TIF_LBT_CTX_LIVE);
+}
+
+#endif /* _ASM_LBT_H */
#ifndef __ASSEMBLY__
#include <larchintrin.h>
-/*
- * parse_r var, r - Helper assembler macro for parsing register names.
- *
- * This converts the register name in $n form provided in \r to the
- * corresponding register number, which is assigned to the variable \var. It is
- * needed to allow explicit encoding of instructions in inline assembly where
- * registers are chosen by the compiler in $n form, allowing us to avoid using
- * fixed register numbers.
- *
- * It also allows newer instructions (not implemented by the assembler) to be
- * transparently implemented using assembler macros, instead of needing separate
- * cases depending on toolchain support.
- *
- * Simple usage example:
- * __asm__ __volatile__("parse_r addr, %0\n\t"
- * "#invtlb op, 0, %0\n\t"
- * ".word ((0x6498000) | (addr << 10) | (0 << 5) | op)"
- * : "=r" (status);
- */
-
-/* Match an individual register number and assign to \var */
-#define _IFC_REG(n) \
- ".ifc \\r, $r" #n "\n\t" \
- "\\var = " #n "\n\t" \
- ".endif\n\t"
-
-__asm__(".macro parse_r var r\n\t"
- "\\var = -1\n\t"
- _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3)
- _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7)
- _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11)
- _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15)
- _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19)
- _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23)
- _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27)
- _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31)
- ".iflt \\var\n\t"
- ".error \"Unable to parse register name \\r\"\n\t"
- ".endif\n\t"
- ".endm");
-
-#undef _IFC_REG
-
/* CPUCFG */
#define read_cpucfg(reg) __cpucfg(reg)
#define FPU_CSR_RU 0x200 /* towards +Infinity */
#define FPU_CSR_RD 0x300 /* towards -Infinity */
+/* Bit 6 of FPU Status Register specify the LBT TOP simulation mode */
+#define FPU_CSR_TM_SHIFT 0x6
+#define FPU_CSR_TM (_ULCAST_(1) << FPU_CSR_TM_SHIFT)
+
#define read_fcsr(source) \
({ \
unsigned int __res; \
#define NODE_DATA(nid) (node_data[(nid)])
-extern void setup_zero_pages(void);
-
#endif /* _ASM_MMZONE_H_ */
#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
-#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
+
+#define virt_to_page(kaddr) \
+({ \
+ (likely((unsigned long)kaddr < vm_map_base)) ? \
+ dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\
+})
extern int __virt_addr_valid(volatile void *kaddr);
#define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr))
#endif /* __PAGETABLE_PUD_FOLDED */
+extern pte_t * __init populate_kernel_pte(unsigned long addr);
#endif /* _ASM_PGALLOC_H */
* for zero-mapped memory areas etc..
*/
-extern unsigned long empty_zero_page;
-extern unsigned long zero_page_mask;
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) \
- (virt_to_page((void *)(empty_zero_page + (((unsigned long)(vaddr)) & zero_page_mask))))
-#define __HAVE_COLOR_ZERO_PAGE
+#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
/*
* TLB refill handlers may also map the vmalloc area into xkvrange.
#define MODULES_VADDR (vm_map_base + PCI_IOSIZE + (2 * PAGE_SIZE))
#define MODULES_END (MODULES_VADDR + SZ_256M)
+#ifdef CONFIG_KFENCE
+#define KFENCE_AREA_SIZE (((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 + 2) * PAGE_SIZE)
+#else
+#define KFENCE_AREA_SIZE 0
+#endif
+
#define VMALLOC_START MODULES_END
+
+#ifndef CONFIG_KASAN
#define VMALLOC_END \
(vm_map_base + \
- min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#else
+#define VMALLOC_END \
+ (vm_map_base + \
+ min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits) / 2) - PMD_SIZE - VMEMMAP_SIZE - KFENCE_AREA_SIZE)
+#endif
#define vmemmap ((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
#define VMEMMAP_END ((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
+#define KFENCE_AREA_START (VMEMMAP_END + 1)
+#define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1)
+
#define pte_ERROR(e) \
pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
#ifndef __PAGETABLE_PMD_FOLDED
extern pgd_t swapper_pg_dir[];
extern pgd_t invalid_pg_dir[];
+struct page *dmw_virt_to_page(unsigned long kaddr);
+struct page *tlb_virt_to_page(unsigned long kaddr);
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
}
#endif /* CONFIG_NUMA_BALANCING */
+#define pmd_leaf(pmd) ((pmd_val(pmd) & _PAGE_HUGE) != 0)
+#define pud_leaf(pud) ((pud_val(pud) & _PAGE_HUGE) != 0)
+
/*
* We provide our own get_unmapped area to cope with the virtual aliasing
* constraints placed on us by the cache architecture.
BUILD_FPR_ACCESS(64)
struct loongarch_fpu {
- unsigned int fcsr;
uint64_t fcc; /* 8x8 */
+ uint32_t fcsr;
+ uint32_t ftop;
union fpureg fpr[NUM_FPU_REGS];
};
+struct loongarch_lbt {
+ /* Scratch registers */
+ unsigned long scr0;
+ unsigned long scr1;
+ unsigned long scr2;
+ unsigned long scr3;
+ /* Eflags register */
+ unsigned long eflags;
+};
+
#define INIT_CPUMASK { \
{0,} \
}
unsigned long csr_ecfg;
unsigned long csr_badvaddr; /* Last user fault */
- /* Scratch registers */
- unsigned long scr0;
- unsigned long scr1;
- unsigned long scr2;
- unsigned long scr3;
-
- /* Eflags register */
- unsigned long eflags;
-
/* Other stuff associated with the thread. */
unsigned long trap_nr;
unsigned long error_code;
* context because they are conditionally copied at fork().
*/
struct loongarch_fpu fpu FPU_ALIGN;
+ struct loongarch_lbt lbt; /* Also conditionally copied */
/* Hardware breakpoints pinned to this task. */
struct perf_event *hbp_break[LOONGARCH_MAX_BRP];
* FPU & vector registers \
*/ \
.fpu = { \
- .fcsr = 0, \
.fcc = 0, \
+ .fcsr = 0, \
+ .ftop = 0, \
.fpr = {{{0,},},}, \
}, \
.hbp_break = {0}, \
#define _LOONGARCH_SETUP_H
#include <linux/types.h>
+#include <asm/sections.h>
#include <uapi/asm/setup.h>
#define VECSIZE 0x200
extern long __rela_dyn_begin;
extern long __rela_dyn_end;
-extern void * __init relocate_kernel(void);
+extern unsigned long __init relocate_kernel(void);
#endif
+static inline unsigned long kaslr_offset(void)
+{
+ return (unsigned long)&_text - VMLINUX_LOAD_ADDRESS;
+}
+
#endif /* __SETUP_H */
cfi_st u0, PT_R21, \docfi
csrrd u0, PERCPU_BASE_KS
9:
+#ifdef CONFIG_KGDB
+ li.w t0, CSR_CRMD_WE
+ csrxchg t0, t0, LOONGARCH_CSR_CRMD
+#endif
.endm
.macro SAVE_ALL docfi=0
#define __HAVE_ARCH_MEMSET
extern void *memset(void *__s, int __c, size_t __count);
+extern void *__memset(void *__s, int __c, size_t __count);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+extern void *__memcpy(void *__to, __const__ void *__from, size_t __n);
#define __HAVE_ARCH_MEMMOVE
extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
+extern void *__memmove(void *__dest, __const__ void *__src, size_t __n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+
+#define memset(s, c, n) __memset(s, c, n)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
#endif /* _ASM_STRING_H */
#include <asm/cpu-features.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
struct task_struct;
#define switch_to(prev, next, last) \
do { \
lose_fpu_inatomic(1, prev); \
+ lose_lbt_inatomic(1, prev); \
hw_breakpoint_thread_switch(next); \
(last) = __switch_to(prev, next, task_thread_info(next), \
__builtin_return_address(0), __builtin_frame_address(0)); \
#define TIF_SINGLESTEP 16 /* Single Step */
#define TIF_LSX_CTX_LIVE 17 /* LSX context must be preserved */
#define TIF_LASX_CTX_LIVE 18 /* LASX context must be preserved */
+#define TIF_USEDLBT 19 /* LBT was used by this task this quantum (SMP) */
+#define TIF_LBT_CTX_LIVE 20 /* LBT context must be preserved */
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_LSX_CTX_LIVE (1<<TIF_LSX_CTX_LIVE)
#define _TIF_LASX_CTX_LIVE (1<<TIF_LASX_CTX_LIVE)
+#define _TIF_USEDLBT (1<<TIF_USEDLBT)
+#define _TIF_LBT_CTX_LIVE (1<<TIF_LBT_CTX_LIVE)
#endif /* __KERNEL__ */
#endif /* _ASM_THREAD_INFO_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_H
+#define _ASM_LOONGARCH_XOR_H
+
+#include <asm/cpu-features.h>
+#include <asm/xor_simd.h>
+
+#ifdef CONFIG_CPU_HAS_LSX
+static struct xor_block_template xor_block_lsx = {
+ .name = "lsx",
+ .do_2 = xor_lsx_2,
+ .do_3 = xor_lsx_3,
+ .do_4 = xor_lsx_4,
+ .do_5 = xor_lsx_5,
+};
+
+#define XOR_SPEED_LSX() \
+ do { \
+ if (cpu_has_lsx) \
+ xor_speed(&xor_block_lsx); \
+ } while (0)
+#else /* CONFIG_CPU_HAS_LSX */
+#define XOR_SPEED_LSX()
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static struct xor_block_template xor_block_lasx = {
+ .name = "lasx",
+ .do_2 = xor_lasx_2,
+ .do_3 = xor_lasx_3,
+ .do_4 = xor_lasx_4,
+ .do_5 = xor_lasx_5,
+};
+
+#define XOR_SPEED_LASX() \
+ do { \
+ if (cpu_has_lasx) \
+ xor_speed(&xor_block_lasx); \
+ } while (0)
+#else /* CONFIG_CPU_HAS_LASX */
+#define XOR_SPEED_LASX()
+#endif /* CONFIG_CPU_HAS_LASX */
+
+/*
+ * For grins, also test the generic routines.
+ *
+ * More importantly: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+#include <asm-generic/xor.h>
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_8regs_p); \
+ xor_speed(&xor_block_32regs); \
+ xor_speed(&xor_block_32regs_p); \
+ XOR_SPEED_LSX(); \
+ XOR_SPEED_LASX(); \
+} while (0)
+
+#endif /* _ASM_LOONGARCH_XOR_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+#ifndef _ASM_LOONGARCH_XOR_SIMD_H
+#define _ASM_LOONGARCH_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* _ASM_LOONGARCH_XOR_SIMD_H */
uint64_t vregs[32*4];
};
+struct user_lbt_state {
+ uint64_t scr[4];
+ uint32_t eflags;
+ uint32_t ftop;
+};
+
struct user_watch_state {
uint64_t dbg_info;
struct {
__u32 fcsr;
};
+/* LBT context */
+#define LBT_CTX_MAGIC 0x42540001
+#define LBT_CTX_ALIGN 8
+struct lbt_context {
+ __u64 regs[4];
+ __u32 eflags;
+ __u32 ftop;
+};
+
+
#endif /* _UAPI_ASM_SIGCONTEXT_H */
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o kfpu.o
+obj-$(CONFIG_CPU_HAS_LBT) += lbt.o
+
obj-$(CONFIG_ARCH_STRICT_ALIGN) += unaligned.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE)
endif
+KASAN_SANITIZE_efi.o := n
+KASAN_SANITIZE_cpu-probe.o := n
+KASAN_SANITIZE_traps.o := n
+KASAN_SANITIZE_smp.o := n
+KASAN_SANITIZE_vdso.o := n
+
obj-$(CONFIG_MODULES) += module.o module-sections.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_regs.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o
obj-$(CONFIG_UPROBES) += uprobes.o
OFFSET(THREAD_CSRECFG, task_struct,
thread.csr_ecfg);
- OFFSET(THREAD_SCR0, task_struct, thread.scr0);
- OFFSET(THREAD_SCR1, task_struct, thread.scr1);
- OFFSET(THREAD_SCR2, task_struct, thread.scr2);
- OFFSET(THREAD_SCR3, task_struct, thread.scr3);
-
- OFFSET(THREAD_EFLAGS, task_struct, thread.eflags);
-
OFFSET(THREAD_FPU, task_struct, thread.fpu);
OFFSET(THREAD_BVADDR, task_struct, \
OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
OFFSET(THREAD_FCC, loongarch_fpu, fcc);
+ OFFSET(THREAD_FTOP, loongarch_fpu, ftop);
+ BLANK();
+}
+
+void output_thread_lbt_defines(void)
+{
+ OFFSET(THREAD_SCR0, loongarch_lbt, scr0);
+ OFFSET(THREAD_SCR1, loongarch_lbt, scr1);
+ OFFSET(THREAD_SCR2, loongarch_lbt, scr2);
+ OFFSET(THREAD_SCR3, loongarch_lbt, scr3);
+ OFFSET(THREAD_EFLAGS, loongarch_lbt, eflags);
BLANK();
}
c->options |= LOONGARCH_CPU_LVZ;
elf_hwcap |= HWCAP_LOONGARCH_LVZ;
}
+#ifdef CONFIG_CPU_HAS_LBT
+ if (config & CPUCFG2_X86BT) {
+ c->options |= LOONGARCH_CPU_LBT_X86;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_X86;
+ }
+ if (config & CPUCFG2_ARMBT) {
+ c->options |= LOONGARCH_CPU_LBT_ARM;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_ARM;
+ }
+ if (config & CPUCFG2_MIPSBT) {
+ c->options |= LOONGARCH_CPU_LBT_MIPS;
+ elf_hwcap |= HWCAP_LOONGARCH_LBT_MIPS;
+ }
+#endif
config = read_cpucfg(LOONGARCH_CPUCFG6);
if (config & CPUCFG6_PMP)
SAVE_STATIC
+#ifdef CONFIG_KGDB
+ li.w t1, CSR_CRMD_WE
+ csrxchg t1, t1, LOONGARCH_CSR_CRMD
+#endif
+
move u0, t0
li.d tp, ~_THREAD_MASK
and tp, tp, sp
.macro EX insn, reg, src, offs
.ex\@: \insn \reg, \src, \offs
- _asm_extable .ex\@, fault
+ _asm_extable .ex\@, .L_fpu_fault
.endm
.macro sc_save_fp base
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
EX st.w \tmp0, \base, 0
+#if defined(CONFIG_CPU_HAS_LBT)
+ /* TM bit is always 0 if LBT not supported */
+ andi \tmp0, \tmp0, FPU_CSR_TM
+ beqz \tmp0, 1f
+ x86clrtm
+1:
+#endif
.endm
.macro sc_restore_fcsr base, tmp0
*/
SYM_FUNC_START(_restore_fp)
fpu_restore_double a0 t1 # clobbers t1
- fpu_restore_csr a0 t1
+ fpu_restore_csr a0 t1 t2
fpu_restore_cc a0 t1 t2 # clobbers t1, t2
jr ra
SYM_FUNC_END(_restore_fp)
jr ra
SYM_FUNC_END(_restore_lasx_context)
-SYM_FUNC_START(fault)
+.L_fpu_fault:
li.w a0, -EFAULT # failure
jr ra
-SYM_FUNC_END(fault)
PTR_LI sp, (_THREAD_SIZE - PT_SIZE)
PTR_ADD sp, sp, tp
set_saved_sp sp, t0, t1
-#endif
- /* relocate_kernel() returns the new kernel entry point */
- jr a0
- ASM_BUG()
+ /* Jump to the new kernel: new_pc = current_pc + random_offset */
+ pcaddi t0, 0
+ add.d t0, t0, a0
+ jirl zero, t0, 0xc
+#endif /* CONFIG_RANDOMIZE_BASE */
+
+#endif /* CONFIG_RELOCATABLE */
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
#endif
bl start_kernel
#include <asm/fpu.h>
#include <asm/smp.h>
+static unsigned int euen_mask = CSR_EUEN_FPEN;
+
+/*
+ * The critical section between kernel_fpu_begin() and kernel_fpu_end()
+ * is non-reentrant. It is the caller's responsibility to avoid reentrance.
+ * See drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c as an example.
+ */
static DEFINE_PER_CPU(bool, in_kernel_fpu);
+static DEFINE_PER_CPU(unsigned int, euen_current);
void kernel_fpu_begin(void)
{
+ unsigned int *euen_curr;
+
preempt_disable();
WARN_ON(this_cpu_read(in_kernel_fpu));
this_cpu_write(in_kernel_fpu, true);
+ euen_curr = this_cpu_ptr(&euen_current);
- if (!is_fpu_owner())
- enable_fpu();
+ *euen_curr = csr_xchg32(euen_mask, euen_mask, LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+ if (*euen_curr & CSR_EUEN_LASXEN)
+ _save_lasx(¤t->thread.fpu);
+ else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ if (*euen_curr & CSR_EUEN_LSXEN)
+ _save_lsx(¤t->thread.fpu);
else
+#endif
+ if (*euen_curr & CSR_EUEN_FPEN)
_save_fp(¤t->thread.fpu);
write_fcsr(LOONGARCH_FCSR0, 0);
void kernel_fpu_end(void)
{
+ unsigned int *euen_curr;
+
WARN_ON(!this_cpu_read(in_kernel_fpu));
- if (!is_fpu_owner())
- disable_fpu();
+ euen_curr = this_cpu_ptr(&euen_current);
+
+#ifdef CONFIG_CPU_HAS_LASX
+ if (*euen_curr & CSR_EUEN_LASXEN)
+ _restore_lasx(¤t->thread.fpu);
else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ if (*euen_curr & CSR_EUEN_LSXEN)
+ _restore_lsx(¤t->thread.fpu);
+ else
+#endif
+ if (*euen_curr & CSR_EUEN_FPEN)
_restore_fp(¤t->thread.fpu);
+ *euen_curr = csr_xchg32(*euen_curr, euen_mask, LOONGARCH_CSR_EUEN);
+
this_cpu_write(in_kernel_fpu, false);
preempt_enable();
}
EXPORT_SYMBOL_GPL(kernel_fpu_end);
+
+static int __init init_euen_mask(void)
+{
+ if (cpu_has_lsx)
+ euen_mask |= CSR_EUEN_LSXEN;
+
+ if (cpu_has_lasx)
+ euen_mask |= CSR_EUEN_LASXEN;
+
+ return 0;
+}
+arch_initcall(init_euen_mask);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * LoongArch KGDB support
+ *
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/kdebug.h>
+#include <linux/kgdb.h>
+#include <linux/processor.h>
+#include <linux/ptrace.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fpu.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/inst.h>
+#include <asm/irq_regs.h>
+#include <asm/ptrace.h>
+#include <asm/sigcontext.h>
+
+int kgdb_watch_activated;
+static unsigned int stepped_opcode;
+static unsigned long stepped_address;
+
+struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = {
+ { "r0", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[0]) },
+ { "r1", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[1]) },
+ { "r2", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[2]) },
+ { "r3", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[3]) },
+ { "r4", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[4]) },
+ { "r5", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[5]) },
+ { "r6", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[6]) },
+ { "r7", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[7]) },
+ { "r8", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[8]) },
+ { "r9", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[9]) },
+ { "r10", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[10]) },
+ { "r11", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[11]) },
+ { "r12", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[12]) },
+ { "r13", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[13]) },
+ { "r14", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[14]) },
+ { "r15", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[15]) },
+ { "r16", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[16]) },
+ { "r17", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[17]) },
+ { "r18", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[18]) },
+ { "r19", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[19]) },
+ { "r20", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[20]) },
+ { "r21", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[21]) },
+ { "r22", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[22]) },
+ { "r23", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[23]) },
+ { "r24", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[24]) },
+ { "r25", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[25]) },
+ { "r26", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[26]) },
+ { "r27", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[27]) },
+ { "r28", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[28]) },
+ { "r29", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[29]) },
+ { "r30", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[30]) },
+ { "r31", GDB_SIZEOF_REG, offsetof(struct pt_regs, regs[31]) },
+ { "orig_a0", GDB_SIZEOF_REG, offsetof(struct pt_regs, orig_a0) },
+ { "pc", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_era) },
+ { "badv", GDB_SIZEOF_REG, offsetof(struct pt_regs, csr_badvaddr) },
+ { "f0", GDB_SIZEOF_REG, 0 },
+ { "f1", GDB_SIZEOF_REG, 1 },
+ { "f2", GDB_SIZEOF_REG, 2 },
+ { "f3", GDB_SIZEOF_REG, 3 },
+ { "f4", GDB_SIZEOF_REG, 4 },
+ { "f5", GDB_SIZEOF_REG, 5 },
+ { "f6", GDB_SIZEOF_REG, 6 },
+ { "f7", GDB_SIZEOF_REG, 7 },
+ { "f8", GDB_SIZEOF_REG, 8 },
+ { "f9", GDB_SIZEOF_REG, 9 },
+ { "f10", GDB_SIZEOF_REG, 10 },
+ { "f11", GDB_SIZEOF_REG, 11 },
+ { "f12", GDB_SIZEOF_REG, 12 },
+ { "f13", GDB_SIZEOF_REG, 13 },
+ { "f14", GDB_SIZEOF_REG, 14 },
+ { "f15", GDB_SIZEOF_REG, 15 },
+ { "f16", GDB_SIZEOF_REG, 16 },
+ { "f17", GDB_SIZEOF_REG, 17 },
+ { "f18", GDB_SIZEOF_REG, 18 },
+ { "f19", GDB_SIZEOF_REG, 19 },
+ { "f20", GDB_SIZEOF_REG, 20 },
+ { "f21", GDB_SIZEOF_REG, 21 },
+ { "f22", GDB_SIZEOF_REG, 22 },
+ { "f23", GDB_SIZEOF_REG, 23 },
+ { "f24", GDB_SIZEOF_REG, 24 },
+ { "f25", GDB_SIZEOF_REG, 25 },
+ { "f26", GDB_SIZEOF_REG, 26 },
+ { "f27", GDB_SIZEOF_REG, 27 },
+ { "f28", GDB_SIZEOF_REG, 28 },
+ { "f29", GDB_SIZEOF_REG, 29 },
+ { "f30", GDB_SIZEOF_REG, 30 },
+ { "f31", GDB_SIZEOF_REG, 31 },
+ { "fcc0", 1, 0 },
+ { "fcc1", 1, 1 },
+ { "fcc2", 1, 2 },
+ { "fcc3", 1, 3 },
+ { "fcc4", 1, 4 },
+ { "fcc5", 1, 5 },
+ { "fcc6", 1, 6 },
+ { "fcc7", 1, 7 },
+ { "fcsr", 4, 0 },
+};
+
+char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ int reg_offset, reg_size;
+
+ if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+ return NULL;
+
+ reg_offset = dbg_reg_def[regno].offset;
+ reg_size = dbg_reg_def[regno].size;
+
+ if (reg_offset == -1)
+ goto out;
+
+ /* Handle general-purpose/orig_a0/pc/badv registers */
+ if (regno <= DBG_PT_REGS_END) {
+ memcpy(mem, (void *)regs + reg_offset, reg_size);
+ goto out;
+ }
+
+ if (!(regs->csr_euen & CSR_EUEN_FPEN))
+ goto out;
+
+ save_fp(current);
+
+ /* Handle FP registers */
+ switch (regno) {
+ case DBG_FCSR: /* Process the fcsr */
+ memcpy(mem, (void *)¤t->thread.fpu.fcsr, reg_size);
+ break;
+ case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
+ memcpy(mem, (void *)¤t->thread.fpu.fcc + reg_offset, reg_size);
+ break;
+ case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
+ memcpy(mem, (void *)¤t->thread.fpu.fpr[reg_offset], reg_size);
+ break;
+ default:
+ break;
+ }
+
+out:
+ return dbg_reg_def[regno].name;
+}
+
+int dbg_set_reg(int regno, void *mem, struct pt_regs *regs)
+{
+ int reg_offset, reg_size;
+
+ if (regno < 0 || regno >= DBG_MAX_REG_NUM)
+ return -EINVAL;
+
+ reg_offset = dbg_reg_def[regno].offset;
+ reg_size = dbg_reg_def[regno].size;
+
+ if (reg_offset == -1)
+ return 0;
+
+ /* Handle general-purpose/orig_a0/pc/badv registers */
+ if (regno <= DBG_PT_REGS_END) {
+ memcpy((void *)regs + reg_offset, mem, reg_size);
+ return 0;
+ }
+
+ if (!(regs->csr_euen & CSR_EUEN_FPEN))
+ return 0;
+
+ /* Handle FP registers */
+ switch (regno) {
+ case DBG_FCSR: /* Process the fcsr */
+ memcpy((void *)¤t->thread.fpu.fcsr, mem, reg_size);
+ break;
+ case DBG_FCC_BASE ... DBG_FCC_END: /* Process the fcc */
+ memcpy((void *)¤t->thread.fpu.fcc + reg_offset, mem, reg_size);
+ break;
+ case DBG_FPR_BASE ... DBG_FPR_END: /* Process the fpr */
+ memcpy((void *)¤t->thread.fpu.fpr[reg_offset], mem, reg_size);
+ break;
+ default:
+ break;
+ }
+
+ restore_fp(current);
+
+ return 0;
+}
+
+/*
+ * Similar to regs_to_gdb_regs() except that process is sleeping and so
+ * we may not be able to get all the info.
+ */
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
+{
+ /* Initialize to zero */
+ memset((char *)gdb_regs, 0, NUMREGBYTES);
+
+ gdb_regs[DBG_LOONGARCH_RA] = p->thread.reg01;
+ gdb_regs[DBG_LOONGARCH_TP] = (long)p;
+ gdb_regs[DBG_LOONGARCH_SP] = p->thread.reg03;
+
+ /* S0 - S8 */
+ gdb_regs[DBG_LOONGARCH_S0] = p->thread.reg23;
+ gdb_regs[DBG_LOONGARCH_S1] = p->thread.reg24;
+ gdb_regs[DBG_LOONGARCH_S2] = p->thread.reg25;
+ gdb_regs[DBG_LOONGARCH_S3] = p->thread.reg26;
+ gdb_regs[DBG_LOONGARCH_S4] = p->thread.reg27;
+ gdb_regs[DBG_LOONGARCH_S5] = p->thread.reg28;
+ gdb_regs[DBG_LOONGARCH_S6] = p->thread.reg29;
+ gdb_regs[DBG_LOONGARCH_S7] = p->thread.reg30;
+ gdb_regs[DBG_LOONGARCH_S8] = p->thread.reg31;
+
+ /*
+ * PC use return address (RA), i.e. the moment after return from __switch_to()
+ */
+ gdb_regs[DBG_LOONGARCH_PC] = p->thread.reg01;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+ regs->csr_era = pc;
+}
+
+void arch_kgdb_breakpoint(void)
+{
+ __asm__ __volatile__ ( \
+ ".globl kgdb_breakinst\n\t" \
+ "nop\n" \
+ "kgdb_breakinst:\tbreak 2\n\t"); /* BRK_KDB = 2 */
+}
+
+/*
+ * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
+ * then try to fall into the debugger
+ */
+static int kgdb_loongarch_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
+{
+ struct die_args *args = (struct die_args *)ptr;
+ struct pt_regs *regs = args->regs;
+
+ /* Userspace events, ignore. */
+ if (user_mode(regs))
+ return NOTIFY_DONE;
+
+ if (!kgdb_io_module_registered)
+ return NOTIFY_DONE;
+
+ if (atomic_read(&kgdb_active) != -1)
+ kgdb_nmicallback(smp_processor_id(), regs);
+
+ if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
+ return NOTIFY_DONE;
+
+ if (atomic_read(&kgdb_setting_breakpoint))
+ if (regs->csr_era == (unsigned long)&kgdb_breakinst)
+ regs->csr_era += LOONGARCH_INSN_SIZE;
+
+ return NOTIFY_STOP;
+}
+
+bool kgdb_breakpoint_handler(struct pt_regs *regs)
+{
+ struct die_args args = {
+ .regs = regs,
+ .str = "Break",
+ .err = BRK_KDB,
+ .trapnr = read_csr_excode(),
+ .signr = SIGTRAP,
+
+ };
+
+ return (kgdb_loongarch_notify(NULL, DIE_TRAP, &args) == NOTIFY_STOP) ? true : false;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_loongarch_notify,
+};
+
+static inline void kgdb_arch_update_addr(struct pt_regs *regs,
+ char *remcom_in_buffer)
+{
+ unsigned long addr;
+ char *ptr;
+
+ ptr = &remcom_in_buffer[1];
+ if (kgdb_hex2long(&ptr, &addr))
+ regs->csr_era = addr;
+}
+
+/* Calculate the new address for after a step */
+static int get_step_address(struct pt_regs *regs, unsigned long *next_addr)
+{
+ char cj_val;
+ unsigned int si, si_l, si_h, rd, rj, cj;
+ unsigned long pc = instruction_pointer(regs);
+ union loongarch_instruction *ip = (union loongarch_instruction *)pc;
+
+ if (pc & 3) {
+ pr_warn("%s: invalid pc 0x%lx\n", __func__, pc);
+ return -EINVAL;
+ }
+
+ *next_addr = pc + LOONGARCH_INSN_SIZE;
+
+ si_h = ip->reg0i26_format.immediate_h;
+ si_l = ip->reg0i26_format.immediate_l;
+ switch (ip->reg0i26_format.opcode) {
+ case b_op:
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+ return 0;
+ case bl_op:
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 27);
+ regs->regs[1] = pc + LOONGARCH_INSN_SIZE;
+ return 0;
+ }
+
+ rj = ip->reg1i21_format.rj;
+ cj = (rj & 0x07) + DBG_FCC_BASE;
+ si_l = ip->reg1i21_format.immediate_l;
+ si_h = ip->reg1i21_format.immediate_h;
+ dbg_get_reg(cj, &cj_val, regs);
+ switch (ip->reg1i21_format.opcode) {
+ case beqz_op:
+ if (regs->regs[rj] == 0)
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ case bnez_op:
+ if (regs->regs[rj] != 0)
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ case bceqz_op: /* bceqz_op = bcnez_op */
+ if (((rj & 0x18) == 0x00) && !cj_val) /* bceqz */
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ if (((rj & 0x18) == 0x08) && cj_val) /* bcnez */
+ *next_addr = pc + sign_extend64((si_h << 16 | si_l) << 2, 22);
+ return 0;
+ }
+
+ rj = ip->reg2i16_format.rj;
+ rd = ip->reg2i16_format.rd;
+ si = ip->reg2i16_format.immediate;
+ switch (ip->reg2i16_format.opcode) {
+ case beq_op:
+ if (regs->regs[rj] == regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bne_op:
+ if (regs->regs[rj] != regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case blt_op:
+ if ((long)regs->regs[rj] < (long)regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bge_op:
+ if ((long)regs->regs[rj] >= (long)regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bltu_op:
+ if (regs->regs[rj] < regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case bgeu_op:
+ if (regs->regs[rj] >= regs->regs[rd])
+ *next_addr = pc + sign_extend64(si << 2, 17);
+ return 0;
+ case jirl_op:
+ regs->regs[rd] = pc + LOONGARCH_INSN_SIZE;
+ *next_addr = regs->regs[rj] + sign_extend64(si << 2, 17);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int do_single_step(struct pt_regs *regs)
+{
+ int error = 0;
+ unsigned long addr = 0; /* Determine where the target instruction will send us to */
+
+ error = get_step_address(regs, &addr);
+ if (error)
+ return error;
+
+ /* Store the opcode in the stepped address */
+ error = get_kernel_nofault(stepped_opcode, (void *)addr);
+ if (error)
+ return error;
+
+ stepped_address = addr;
+
+ /* Replace the opcode with the break instruction */
+ error = copy_to_kernel_nofault((void *)stepped_address,
+ arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE);
+ flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
+
+ if (error) {
+ stepped_opcode = 0;
+ stepped_address = 0;
+ } else {
+ kgdb_single_step = 1;
+ atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id());
+ }
+
+ return error;
+}
+
+/* Undo a single step */
+static void undo_single_step(struct pt_regs *regs)
+{
+ if (stepped_opcode) {
+ copy_to_kernel_nofault((void *)stepped_address,
+ (void *)&stepped_opcode, BREAK_INSTR_SIZE);
+ flush_icache_range(stepped_address, stepped_address + BREAK_INSTR_SIZE);
+ }
+
+ stepped_opcode = 0;
+ stepped_address = 0;
+ kgdb_single_step = 0;
+ atomic_set(&kgdb_cpu_doing_single_step, -1);
+}
+
+int kgdb_arch_handle_exception(int vector, int signo, int err_code,
+ char *remcom_in_buffer, char *remcom_out_buffer,
+ struct pt_regs *regs)
+{
+ int ret = 0;
+
+ undo_single_step(regs);
+ regs->csr_prmd |= CSR_PRMD_PWE;
+
+ switch (remcom_in_buffer[0]) {
+ case 'D':
+ case 'k':
+ regs->csr_prmd &= ~CSR_PRMD_PWE;
+ fallthrough;
+ case 'c':
+ kgdb_arch_update_addr(regs, remcom_in_buffer);
+ break;
+ case 's':
+ kgdb_arch_update_addr(regs, remcom_in_buffer);
+ ret = do_single_step(regs);
+ break;
+ default:
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static struct hw_breakpoint {
+ unsigned int enabled;
+ unsigned long addr;
+ int len;
+ int type;
+ struct perf_event * __percpu *pev;
+} breakinfo[LOONGARCH_MAX_BRP];
+
+static int hw_break_reserve_slot(int breakno)
+{
+ int cpu, cnt = 0;
+ struct perf_event **pevent;
+
+ for_each_online_cpu(cpu) {
+ cnt++;
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ if (dbg_reserve_bp_slot(*pevent))
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ for_each_online_cpu(cpu) {
+ cnt--;
+ if (!cnt)
+ break;
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ dbg_release_bp_slot(*pevent);
+ }
+
+ return -1;
+}
+
+static int hw_break_release_slot(int breakno)
+{
+ int cpu;
+ struct perf_event **pevent;
+
+ if (dbg_is_early)
+ return 0;
+
+ for_each_online_cpu(cpu) {
+ pevent = per_cpu_ptr(breakinfo[breakno].pev, cpu);
+ if (dbg_release_bp_slot(*pevent))
+ /*
+ * The debugger is responsible for handing the retry on
+ * remove failure.
+ */
+ return -1;
+ }
+
+ return 0;
+}
+
+static int kgdb_set_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+ if (!breakinfo[i].enabled)
+ break;
+
+ if (i == LOONGARCH_MAX_BRP)
+ return -1;
+
+ switch (bptype) {
+ case BP_HARDWARE_BREAKPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_X;
+ break;
+ case BP_READ_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_R;
+ break;
+ case BP_WRITE_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_W;
+ break;
+ case BP_ACCESS_WATCHPOINT:
+ breakinfo[i].type = HW_BREAKPOINT_RW;
+ break;
+ default:
+ return -1;
+ }
+
+ switch (len) {
+ case 1:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_1;
+ break;
+ case 2:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_2;
+ break;
+ case 4:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_4;
+ break;
+ case 8:
+ breakinfo[i].len = HW_BREAKPOINT_LEN_8;
+ break;
+ default:
+ return -1;
+ }
+
+ breakinfo[i].addr = addr;
+ if (hw_break_reserve_slot(i)) {
+ breakinfo[i].addr = 0;
+ return -1;
+ }
+ breakinfo[i].enabled = 1;
+
+ return 0;
+}
+
+static int kgdb_remove_hw_break(unsigned long addr, int len, enum kgdb_bptype bptype)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++)
+ if (breakinfo[i].addr == addr && breakinfo[i].enabled)
+ break;
+
+ if (i == LOONGARCH_MAX_BRP)
+ return -1;
+
+ if (hw_break_release_slot(i)) {
+ pr_err("Cannot remove hw breakpoint at %lx\n", addr);
+ return -1;
+ }
+ breakinfo[i].enabled = 0;
+
+ return 0;
+}
+
+static void kgdb_disable_hw_break(struct pt_regs *regs)
+{
+ int i;
+ int cpu = raw_smp_processor_id();
+ struct perf_event *bp;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (bp->attr.disabled == 1)
+ continue;
+
+ arch_uninstall_hw_breakpoint(bp);
+ bp->attr.disabled = 1;
+ }
+
+ /* Disable hardware debugging while we are in kgdb */
+ csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+}
+
+static void kgdb_remove_all_hw_break(void)
+{
+ int i;
+ int cpu = raw_smp_processor_id();
+ struct perf_event *bp;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (!bp->attr.disabled) {
+ arch_uninstall_hw_breakpoint(bp);
+ bp->attr.disabled = 1;
+ continue;
+ }
+
+ if (hw_break_release_slot(i))
+ pr_err("KGDB: hw bpt remove failed %lx\n", breakinfo[i].addr);
+ breakinfo[i].enabled = 0;
+ }
+
+ csr_xchg32(0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+ kgdb_watch_activated = 0;
+}
+
+static void kgdb_correct_hw_break(void)
+{
+ int i, activated = 0;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ struct perf_event *bp;
+ int val;
+ int cpu = raw_smp_processor_id();
+
+ if (!breakinfo[i].enabled)
+ continue;
+
+ bp = *per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (bp->attr.disabled != 1)
+ continue;
+
+ bp->attr.bp_addr = breakinfo[i].addr;
+ bp->attr.bp_len = breakinfo[i].len;
+ bp->attr.bp_type = breakinfo[i].type;
+
+ val = hw_breakpoint_arch_parse(bp, &bp->attr, counter_arch_bp(bp));
+ if (val)
+ return;
+
+ val = arch_install_hw_breakpoint(bp);
+ if (!val)
+ bp->attr.disabled = 0;
+ activated = 1;
+ }
+
+ csr_xchg32(activated ? CSR_CRMD_WE : 0, CSR_CRMD_WE, LOONGARCH_CSR_CRMD);
+ kgdb_watch_activated = activated;
+}
+
+const struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = {0x02, 0x00, break_op >> 1, 0x00}, /* BRK_KDB = 2 */
+ .flags = KGDB_HW_BREAKPOINT,
+ .set_hw_breakpoint = kgdb_set_hw_break,
+ .remove_hw_breakpoint = kgdb_remove_hw_break,
+ .disable_hw_break = kgdb_disable_hw_break,
+ .remove_all_hw_break = kgdb_remove_all_hw_break,
+ .correct_hw_break = kgdb_correct_hw_break,
+};
+
+int kgdb_arch_init(void)
+{
+ return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
+{
+ int i, cpu;
+ struct perf_event_attr attr;
+ struct perf_event **pevent;
+
+ hw_breakpoint_init(&attr);
+
+ attr.bp_addr = (unsigned long)kgdb_arch_init;
+ attr.bp_len = HW_BREAKPOINT_LEN_4;
+ attr.bp_type = HW_BREAKPOINT_W;
+ attr.disabled = 1;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (breakinfo[i].pev)
+ continue;
+
+ breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL, NULL);
+ if (IS_ERR((void * __force)breakinfo[i].pev)) {
+ pr_err("kgdb: Could not allocate hw breakpoints.\n");
+ breakinfo[i].pev = NULL;
+ return;
+ }
+
+ for_each_online_cpu(cpu) {
+ pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
+ if (pevent[0]->destroy) {
+ pevent[0]->destroy = NULL;
+ release_bp_slot(*pevent);
+ }
+ }
+ }
+}
+
+void kgdb_arch_exit(void)
+{
+ int i;
+
+ for (i = 0; i < LOONGARCH_MAX_BRP; i++) {
+ if (breakinfo[i].pev) {
+ unregister_wide_hw_breakpoint(breakinfo[i].pev);
+ breakinfo[i].pev = NULL;
+ }
+ }
+
+ unregister_die_notifier(&kgdb_notifier);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Author: Qi Hu <huqi@loongson.cn>
+ * Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
+ */
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-offsets.h>
+#include <asm/errno.h>
+#include <asm/regdef.h>
+
+#define SCR_REG_WIDTH 8
+
+ .macro EX insn, reg, src, offs
+.ex\@: \insn \reg, \src, \offs
+ _asm_extable .ex\@, .L_lbt_fault
+ .endm
+
+/*
+ * Save a thread's lbt context.
+ */
+SYM_FUNC_START(_save_lbt)
+ movscr2gr t1, $scr0 # save scr
+ stptr.d t1, a0, THREAD_SCR0
+ movscr2gr t1, $scr1
+ stptr.d t1, a0, THREAD_SCR1
+ movscr2gr t1, $scr2
+ stptr.d t1, a0, THREAD_SCR2
+ movscr2gr t1, $scr3
+ stptr.d t1, a0, THREAD_SCR3
+
+ x86mfflag t1, 0x3f # save eflags
+ stptr.d t1, a0, THREAD_EFLAGS
+ jr ra
+SYM_FUNC_END(_save_lbt)
+EXPORT_SYMBOL(_save_lbt)
+
+/*
+ * Restore a thread's lbt context.
+ */
+SYM_FUNC_START(_restore_lbt)
+ ldptr.d t1, a0, THREAD_SCR0 # restore scr
+ movgr2scr $scr0, t1
+ ldptr.d t1, a0, THREAD_SCR1
+ movgr2scr $scr1, t1
+ ldptr.d t1, a0, THREAD_SCR2
+ movgr2scr $scr2, t1
+ ldptr.d t1, a0, THREAD_SCR3
+ movgr2scr $scr3, t1
+
+ ldptr.d t1, a0, THREAD_EFLAGS # restore eflags
+ x86mtflag t1, 0x3f
+ jr ra
+SYM_FUNC_END(_restore_lbt)
+EXPORT_SYMBOL(_restore_lbt)
+
+/*
+ * Load scr/eflag with zero.
+ */
+SYM_FUNC_START(_init_lbt)
+ movgr2scr $scr0, zero
+ movgr2scr $scr1, zero
+ movgr2scr $scr2, zero
+ movgr2scr $scr3, zero
+
+ x86mtflag zero, 0x3f
+ jr ra
+SYM_FUNC_END(_init_lbt)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_save_lbt_context)
+ movscr2gr t1, $scr0 # save scr
+ EX st.d t1, a0, (0 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr1
+ EX st.d t1, a0, (1 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr2
+ EX st.d t1, a0, (2 * SCR_REG_WIDTH)
+ movscr2gr t1, $scr3
+ EX st.d t1, a0, (3 * SCR_REG_WIDTH)
+
+ x86mfflag t1, 0x3f # save eflags
+ EX st.w t1, a1, 0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_lbt_context)
+
+/*
+ * a0: scr
+ * a1: eflag
+ */
+SYM_FUNC_START(_restore_lbt_context)
+ EX ld.d t1, a0, (0 * SCR_REG_WIDTH) # restore scr
+ movgr2scr $scr0, t1
+ EX ld.d t1, a0, (1 * SCR_REG_WIDTH)
+ movgr2scr $scr1, t1
+ EX ld.d t1, a0, (2 * SCR_REG_WIDTH)
+ movgr2scr $scr2, t1
+ EX ld.d t1, a0, (3 * SCR_REG_WIDTH)
+ movgr2scr $scr3, t1
+
+ EX ld.w t1, a1, 0 # restore eflags
+ x86mtflag t1, 0x3f
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_lbt_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_save_ftop_context)
+ x86mftop t1
+ st.w t1, a0, 0
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_save_ftop_context)
+
+/*
+ * a0: ftop
+ */
+SYM_FUNC_START(_restore_ftop_context)
+ ld.w t1, a0, 0
+ andi t1, t1, 0x7
+ la.pcrel a0, 1f
+ alsl.d a0, t1, a0, 3
+ jr a0
+1:
+ x86mttop 0
+ b 2f
+ x86mttop 1
+ b 2f
+ x86mttop 2
+ b 2f
+ x86mttop 3
+ b 2f
+ x86mttop 4
+ b 2f
+ x86mttop 5
+ b 2f
+ x86mttop 6
+ b 2f
+ x86mttop 7
+2:
+ li.w a0, 0 # success
+ jr ra
+SYM_FUNC_END(_restore_ftop_context)
+
+.L_lbt_fault:
+ li.w a0, -EFAULT # failure
+ jr ra
void __init pcpu_populate_pte(unsigned long addr)
{
- pgd_t *pgd = pgd_offset_k(addr);
- p4d_t *p4d = p4d_offset(pgd, addr);
- pud_t *pud;
- pmd_t *pmd;
-
- if (p4d_none(*p4d)) {
- pud_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pgd_populate(&init_mm, pgd, new);
-#ifndef __PAGETABLE_PUD_FOLDED
- pud_init(new);
-#endif
- }
-
- pud = pud_offset(p4d, addr);
- if (pud_none(*pud)) {
- pmd_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pud_populate(&init_mm, pud, new);
-#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init(new);
-#endif
- }
-
- pmd = pmd_offset(pud, addr);
- if (!pmd_present(*pmd)) {
- pte_t *new;
-
- new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pmd, new);
- }
+ populate_kernel_pte(addr);
}
void __init setup_per_cpu_areas(void)
{
high_memory = (void *) __va(get_num_physpages() << PAGE_SHIFT);
memblock_free_all();
- setup_zero_pages(); /* This comes from node 0 */
}
int pcibus_to_node(struct pci_bus *bus)
#include <asm/cpu.h>
#include <asm/elf.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
regs->csr_euen = euen;
lose_fpu(0);
+ lose_lbt(0);
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
+ clear_thread_flag(TIF_LBT_CTX_LIVE);
clear_used_math();
regs->csr_era = pc;
regs->regs[3] = sp;
preempt_enable();
- if (used_math())
- memcpy(dst, src, sizeof(struct task_struct));
- else
+ if (!used_math())
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
+ else
+ memcpy(dst, src, offsetof(struct task_struct, thread.lbt.scr0));
+
+#ifdef CONFIG_CPU_HAS_LBT
+ memcpy(&dst->thread.lbt, &src->thread.lbt, sizeof(struct loongarch_lbt));
+#endif
return 0;
}
ptrace_hw_copy_thread(p);
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDSIMD);
+ clear_tsk_thread_flag(p, TIF_USEDLBT);
clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
+ clear_tsk_thread_flag(p, TIF_LBT_CTX_LIVE);
return 0;
}
#include <asm/cpu.h>
#include <asm/cpu-info.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/loongarch.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#endif /* CONFIG_CPU_HAS_LSX */
+#ifdef CONFIG_CPU_HAS_LBT
+static int lbt_get(struct task_struct *target,
+ const struct user_regset *regset,
+ struct membuf to)
+{
+ int r;
+
+ r = membuf_write(&to, &target->thread.lbt.scr0, sizeof(target->thread.lbt.scr0));
+ r = membuf_write(&to, &target->thread.lbt.scr1, sizeof(target->thread.lbt.scr1));
+ r = membuf_write(&to, &target->thread.lbt.scr2, sizeof(target->thread.lbt.scr2));
+ r = membuf_write(&to, &target->thread.lbt.scr3, sizeof(target->thread.lbt.scr3));
+ r = membuf_write(&to, &target->thread.lbt.eflags, sizeof(u32));
+ r = membuf_write(&to, &target->thread.fpu.ftop, sizeof(u32));
+
+ return r;
+}
+
+static int lbt_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int err = 0;
+ const int eflags_start = 4 * sizeof(target->thread.lbt.scr0);
+ const int ftop_start = eflags_start + sizeof(u32);
+
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.lbt.scr0,
+ 0, 4 * sizeof(target->thread.lbt.scr0));
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.lbt.eflags,
+ eflags_start, ftop_start);
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.ftop,
+ ftop_start, ftop_start + sizeof(u32));
+
+ return err;
+}
+#endif /* CONFIG_CPU_HAS_LBT */
+
#ifdef CONFIG_HAVE_HW_BREAKPOINT
/*
#ifdef CONFIG_CPU_HAS_LASX
REGSET_LASX,
#endif
+#ifdef CONFIG_CPU_HAS_LBT
+ REGSET_LBT,
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
REGSET_HW_BREAK,
REGSET_HW_WATCH,
.set = simd_set,
},
#endif
+#ifdef CONFIG_CPU_HAS_LBT
+ [REGSET_LBT] = {
+ .core_note_type = NT_LOONGARCH_LBT,
+ .n = 5,
+ .size = sizeof(u64),
+ .align = sizeof(u64),
+ .regset_get = lbt_get,
+ .set = lbt_set,
+ },
+#endif
#ifdef CONFIG_HAVE_HW_BREAKPOINT
[REGSET_HW_BREAK] = {
.core_note_type = NT_LOONGARCH_HW_BREAK,
*new_addr = (unsigned long)reloc_offset;
}
-void * __init relocate_kernel(void)
+unsigned long __init relocate_kernel(void)
{
unsigned long kernel_length;
unsigned long random_offset = 0;
void *location_new = _text; /* Default to original kernel start */
- void *kernel_entry = start_kernel; /* Default to original kernel entry point */
char *cmdline = early_ioremap(fw_arg1, COMMAND_LINE_SIZE); /* Boot command line is passed in fw_arg1 */
strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
reloc_offset += random_offset;
- /* Return the new kernel's entry point */
- kernel_entry = RELOCATED_KASLR(start_kernel);
-
/* The current thread is now within the relocated kernel */
__current_thread_info = RELOCATED_KASLR(__current_thread_info);
relocate_absolute(random_offset);
- return kernel_entry;
+ return random_offset;
}
/*
#endif
paging_init();
+
+#ifdef CONFIG_KASAN
+ kasan_init();
+#endif
}
#include <asm/cacheflush.h>
#include <asm/cpu-features.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
/* Make sure we will not lose FPU ownership */
#define lock_fpu_owner() ({ preempt_disable(); pagefault_disable(); })
#define unlock_fpu_owner() ({ pagefault_enable(); preempt_enable(); })
+/* Make sure we will not lose LBT ownership */
+#define lock_lbt_owner() ({ preempt_disable(); pagefault_disable(); })
+#define unlock_lbt_owner() ({ pagefault_enable(); preempt_enable(); })
/* Assembly functions to move context to/from the FPU */
extern asmlinkage int
extern asmlinkage int
_restore_lasx_context(void __user *fpregs, void __user *fcc, void __user *fcsr);
+#ifdef CONFIG_CPU_HAS_LBT
+extern asmlinkage int _save_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _restore_lbt_context(void __user *regs, void __user *eflags);
+extern asmlinkage int _save_ftop_context(void __user *ftop);
+extern asmlinkage int _restore_ftop_context(void __user *ftop);
+#endif
+
struct rt_sigframe {
struct siginfo rs_info;
struct ucontext rs_uctx;
struct _ctx_layout fpu;
struct _ctx_layout lsx;
struct _ctx_layout lasx;
+ struct _ctx_layout lbt;
struct _ctx_layout end;
};
return err;
}
+#ifdef CONFIG_CPU_HAS_LBT
+static int copy_lbt_to_sigcontext(struct lbt_context __user *ctx)
+{
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ err |= __put_user(current->thread.lbt.scr0, ®s[0]);
+ err |= __put_user(current->thread.lbt.scr1, ®s[1]);
+ err |= __put_user(current->thread.lbt.scr2, ®s[2]);
+ err |= __put_user(current->thread.lbt.scr3, ®s[3]);
+ err |= __put_user(current->thread.lbt.eflags, eflags);
+
+ return err;
+}
+
+static int copy_lbt_from_sigcontext(struct lbt_context __user *ctx)
+{
+ int err = 0;
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ err |= __get_user(current->thread.lbt.scr0, ®s[0]);
+ err |= __get_user(current->thread.lbt.scr1, ®s[1]);
+ err |= __get_user(current->thread.lbt.scr2, ®s[2]);
+ err |= __get_user(current->thread.lbt.scr3, ®s[3]);
+ err |= __get_user(current->thread.lbt.eflags, eflags);
+
+ return err;
+}
+
+static int copy_ftop_to_sigcontext(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return __put_user(current->thread.fpu.ftop, ftop);
+}
+
+static int copy_ftop_from_sigcontext(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return __get_user(current->thread.fpu.ftop, ftop);
+}
+#endif
+
/*
* Wrappers for the assembly _{save,restore}_fp_context functions.
*/
return _restore_lasx_context(regs, fcc, fcsr);
}
+/*
+ * Wrappers for the assembly _{save,restore}_lbt_context functions.
+ */
+#ifdef CONFIG_CPU_HAS_LBT
+static int save_hw_lbt_context(struct lbt_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ return _save_lbt_context(regs, eflags);
+}
+
+static int restore_hw_lbt_context(struct lbt_context __user *ctx)
+{
+ uint64_t __user *regs = (uint64_t *)&ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&ctx->eflags;
+
+ return _restore_lbt_context(regs, eflags);
+}
+
+static int save_hw_ftop_context(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return _save_ftop_context(ftop);
+}
+
+static int restore_hw_ftop_context(struct lbt_context __user *ctx)
+{
+ uint32_t __user *ftop = &ctx->ftop;
+
+ return _restore_ftop_context(ftop);
+}
+#endif
+
static int fcsr_pending(unsigned int __user *fcsr)
{
int err, sig = 0;
return err ?: sig;
}
+#ifdef CONFIG_CPU_HAS_LBT
+static int protected_save_lbt_context(struct extctx_layout *extctx)
+{
+ int err = 0;
+ struct sctx_info __user *info = extctx->lbt.addr;
+ struct lbt_context __user *lbt_ctx =
+ (struct lbt_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+ while (1) {
+ lock_lbt_owner();
+ if (is_lbt_owner())
+ err |= save_hw_lbt_context(lbt_ctx);
+ else
+ err |= copy_lbt_to_sigcontext(lbt_ctx);
+ if (is_fpu_owner())
+ err |= save_hw_ftop_context(lbt_ctx);
+ else
+ err |= copy_ftop_to_sigcontext(lbt_ctx);
+ unlock_lbt_owner();
+
+ err |= __put_user(LBT_CTX_MAGIC, &info->magic);
+ err |= __put_user(extctx->lbt.size, &info->size);
+
+ if (likely(!err))
+ break;
+ /* Touch the LBT context and try again */
+ err = __put_user(0, ®s[0]) | __put_user(0, eflags);
+
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+static int protected_restore_lbt_context(struct extctx_layout *extctx)
+{
+ int err = 0, tmp __maybe_unused;
+ struct sctx_info __user *info = extctx->lbt.addr;
+ struct lbt_context __user *lbt_ctx =
+ (struct lbt_context *)get_ctx_through_ctxinfo(info);
+ uint64_t __user *regs = (uint64_t *)&lbt_ctx->regs;
+ uint32_t __user *eflags = (uint32_t *)&lbt_ctx->eflags;
+
+ while (1) {
+ lock_lbt_owner();
+ if (is_lbt_owner())
+ err |= restore_hw_lbt_context(lbt_ctx);
+ else
+ err |= copy_lbt_from_sigcontext(lbt_ctx);
+ if (is_fpu_owner())
+ err |= restore_hw_ftop_context(lbt_ctx);
+ else
+ err |= copy_ftop_from_sigcontext(lbt_ctx);
+ unlock_lbt_owner();
+
+ if (likely(!err))
+ break;
+ /* Touch the LBT context and try again */
+ err = __get_user(tmp, ®s[0]) | __get_user(tmp, eflags);
+
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+#endif
+
static int setup_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
struct extctx_layout *extctx)
{
else if (extctx->fpu.addr)
err |= protected_save_fpu_context(extctx);
+#ifdef CONFIG_CPU_HAS_LBT
+ if (extctx->lbt.addr)
+ err |= protected_save_lbt_context(extctx);
+#endif
+
/* Set the "end" magic */
info = (struct sctx_info *)extctx->end.addr;
err |= __put_user(0, &info->magic);
extctx->lasx.addr = info;
break;
+ case LBT_CTX_MAGIC:
+ if (size < (sizeof(struct sctx_info) +
+ sizeof(struct lbt_context)))
+ goto invalid;
+ extctx->lbt.addr = info;
+ break;
+
default:
goto invalid;
}
else if (extctx.fpu.addr)
err |= protected_restore_fpu_context(&extctx);
+#ifdef CONFIG_CPU_HAS_LBT
+ if (extctx.lbt.addr)
+ err |= protected_restore_lbt_context(&extctx);
+#endif
+
bad:
return err;
}
sizeof(struct fpu_context), FPU_CTX_ALIGN, new_sp);
}
+#ifdef CONFIG_CPU_HAS_LBT
+ if (cpu_has_lbt && thread_lbt_context_live()) {
+ new_sp = extframe_alloc(extctx, &extctx->lbt,
+ sizeof(struct lbt_context), LBT_CTX_ALIGN, new_sp);
+ }
+#endif
+
return new_sp;
}
struct pt_regs dummyregs;
struct unwind_state state;
- regs = &dummyregs;
+ if (!regs) {
+ regs = &dummyregs;
- if (task == current) {
- regs->regs[3] = (unsigned long)__builtin_frame_address(0);
- regs->csr_era = (unsigned long)__builtin_return_address(0);
- } else {
- regs->regs[3] = thread_saved_fp(task);
- regs->csr_era = thread_saved_ra(task);
+ if (task == current) {
+ regs->regs[3] = (unsigned long)__builtin_frame_address(0);
+ regs->csr_era = (unsigned long)__builtin_return_address(0);
+ } else {
+ regs->regs[3] = thread_saved_fp(task);
+ regs->csr_era = thread_saved_ra(task);
+ }
+ regs->regs[1] = 0;
}
- regs->regs[1] = 0;
for (unwind_start(&state, task, regs);
!unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
#include <asm/break.h>
#include <asm/cpu.h>
#include <asm/fpu.h>
+#include <asm/lbt.h>
#include <asm/inst.h>
+#include <asm/kgdb.h>
#include <asm/loongarch.h>
#include <asm/mmu_context.h>
#include <asm/pgtable.h>
* pertain to them.
*/
switch (bcode) {
+ case BRK_KDB:
+ if (kgdb_breakpoint_handler(regs))
+ goto out;
+ else
+ break;
case BRK_KPROBE_BP:
if (kprobe_breakpoint_handler(regs))
goto out;
#ifndef CONFIG_HAVE_HW_BREAKPOINT
pr_warn("Hardware watch point handler not implemented!\n");
#else
+ if (kgdb_breakpoint_handler(regs))
+ goto out;
+
if (test_tsk_thread_flag(current, TIF_SINGLESTEP)) {
int llbit = (csr_read32(LOONGARCH_CSR_LLBCTL) & 0x1);
unsigned long pc = instruction_pointer(regs);
irqentry_exit(regs, state);
}
+static void init_restore_lbt(void)
+{
+ if (!thread_lbt_context_live()) {
+ /* First time LBT context user */
+ init_lbt();
+ set_thread_flag(TIF_LBT_CTX_LIVE);
+ } else {
+ if (!is_lbt_owner())
+ own_lbt_inatomic(1);
+ }
+
+ BUG_ON(!is_lbt_enabled());
+}
+
asmlinkage void noinstr do_lbt(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
- local_irq_enable();
- force_sig(SIGILL);
- local_irq_disable();
+ /*
+ * BTD (Binary Translation Disable exception) can be triggered
+ * during FP save/restore if TM (Top Mode) is on, which may
+ * cause irq_enable during 'switch_to'. To avoid this situation
+ * (including the user using 'MOVGR2GCSR' to turn on TM, which
+ * will not trigger the BTE), we need to check PRMD first.
+ */
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_enable();
+
+ if (!cpu_has_lbt) {
+ force_sig(SIGILL);
+ goto out;
+ }
+ BUG_ON(is_lbt_enabled());
+
+ preempt_disable();
+ init_restore_lbt();
+ preempt_enable();
+
+out:
+ if (regs->csr_prmd & CSR_PRMD_PIE)
+ local_irq_disable();
irqentry_exit(regs, state);
}
lib-y += delay.o memset.o memcpy.o memmove.o \
clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
+obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
+
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
#include <asm/cpu.h>
#include <asm/regdef.h>
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
- sub.d a0, a2, a0
- addi.d a0, a0, (\to) * (-8)
- jr ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
- addi.d a0, a1, -\to
- jr ra
-.endr
-
SYM_FUNC_START(__clear_user)
/*
* Some CPUs support hardware unaligned access
2: move a0, a1
jr ra
- _asm_extable 1b, .L_fixup_handle_s0
+ _asm_extable 1b, 2b
SYM_FUNC_END(__clear_user_generic)
/*
jr ra
/* fixup and ex_table */
- _asm_extable 0b, .L_fixup_handle_0
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_1
- _asm_extable 3b, .L_fixup_handle_2
- _asm_extable 4b, .L_fixup_handle_3
- _asm_extable 5b, .L_fixup_handle_4
- _asm_extable 6b, .L_fixup_handle_5
- _asm_extable 7b, .L_fixup_handle_6
- _asm_extable 8b, .L_fixup_handle_7
- _asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_1
- _asm_extable 11b, .L_fixup_handle_2
- _asm_extable 12b, .L_fixup_handle_3
- _asm_extable 13b, .L_fixup_handle_0
- _asm_extable 14b, .L_fixup_handle_1
- _asm_extable 15b, .L_fixup_handle_0
- _asm_extable 16b, .L_fixup_handle_0
- _asm_extable 17b, .L_fixup_handle_s0
- _asm_extable 18b, .L_fixup_handle_s0
- _asm_extable 19b, .L_fixup_handle_s0
- _asm_extable 20b, .L_fixup_handle_s2
- _asm_extable 21b, .L_fixup_handle_s0
- _asm_extable 22b, .L_fixup_handle_s0
- _asm_extable 23b, .L_fixup_handle_s4
- _asm_extable 24b, .L_fixup_handle_s0
- _asm_extable 25b, .L_fixup_handle_s4
- _asm_extable 26b, .L_fixup_handle_s0
- _asm_extable 27b, .L_fixup_handle_s4
- _asm_extable 28b, .L_fixup_handle_s0
+.Llarge_fixup:
+ sub.d a1, a2, a0
+
+.Lsmall_fixup:
+29: st.b zero, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, -1
+ bgt a1, zero, 29b
+
+.Lexit:
+ move a0, a1
+ jr ra
+
+ _asm_extable 0b, .Lsmall_fixup
+ _asm_extable 1b, .Llarge_fixup
+ _asm_extable 2b, .Llarge_fixup
+ _asm_extable 3b, .Llarge_fixup
+ _asm_extable 4b, .Llarge_fixup
+ _asm_extable 5b, .Llarge_fixup
+ _asm_extable 6b, .Llarge_fixup
+ _asm_extable 7b, .Llarge_fixup
+ _asm_extable 8b, .Llarge_fixup
+ _asm_extable 9b, .Llarge_fixup
+ _asm_extable 10b, .Llarge_fixup
+ _asm_extable 11b, .Llarge_fixup
+ _asm_extable 12b, .Llarge_fixup
+ _asm_extable 13b, .Llarge_fixup
+ _asm_extable 14b, .Llarge_fixup
+ _asm_extable 15b, .Llarge_fixup
+ _asm_extable 16b, .Llarge_fixup
+ _asm_extable 17b, .Lexit
+ _asm_extable 18b, .Lsmall_fixup
+ _asm_extable 19b, .Lsmall_fixup
+ _asm_extable 20b, .Lsmall_fixup
+ _asm_extable 21b, .Lsmall_fixup
+ _asm_extable 22b, .Lsmall_fixup
+ _asm_extable 23b, .Lsmall_fixup
+ _asm_extable 24b, .Lsmall_fixup
+ _asm_extable 25b, .Lsmall_fixup
+ _asm_extable 26b, .Lsmall_fixup
+ _asm_extable 27b, .Lsmall_fixup
+ _asm_extable 28b, .Lsmall_fixup
+ _asm_extable 29b, .Lexit
SYM_FUNC_END(__clear_user_fast)
#include <asm/cpu.h>
#include <asm/regdef.h>
-.irp to, 0, 1, 2, 3, 4, 5, 6, 7
-.L_fixup_handle_\to\():
- sub.d a0, a2, a0
- addi.d a0, a0, (\to) * (-8)
- jr ra
-.endr
-
-.irp to, 0, 2, 4
-.L_fixup_handle_s\to\():
- addi.d a0, a2, -\to
- jr ra
-.endr
-
SYM_FUNC_START(__copy_user)
/*
* Some CPUs support hardware unaligned access
3: move a0, a2
jr ra
- _asm_extable 1b, .L_fixup_handle_s0
- _asm_extable 2b, .L_fixup_handle_s0
+ _asm_extable 1b, 3b
+ _asm_extable 2b, 3b
SYM_FUNC_END(__copy_user_generic)
/*
sltui t0, a2, 9
bnez t0, .Lsmall
- add.d a3, a1, a2
- add.d a2, a0, a2
0: ld.d t0, a1, 0
1: st.d t0, a0, 0
+ add.d a3, a1, a2
+ add.d a2, a0, a2
/* align up destination address */
andi t1, a0, 7
7: ld.d t5, a1, 40
8: ld.d t6, a1, 48
9: ld.d t7, a1, 56
- addi.d a1, a1, 64
10: st.d t0, a0, 0
11: st.d t1, a0, 8
12: st.d t2, a0, 16
15: st.d t5, a0, 40
16: st.d t6, a0, 48
17: st.d t7, a0, 56
+ addi.d a1, a1, 64
addi.d a0, a0, 64
bltu a1, a4, .Lloop64
19: ld.d t1, a1, 8
20: ld.d t2, a1, 16
21: ld.d t3, a1, 24
- addi.d a1, a1, 32
22: st.d t0, a0, 0
23: st.d t1, a0, 8
24: st.d t2, a0, 16
25: st.d t3, a0, 24
+ addi.d a1, a1, 32
addi.d a0, a0, 32
.Llt32:
bgeu a1, a4, .Llt16
26: ld.d t0, a1, 0
27: ld.d t1, a1, 8
- addi.d a1, a1, 16
28: st.d t0, a0, 0
29: st.d t1, a0, 8
+ addi.d a1, a1, 16
addi.d a0, a0, 16
.Llt16:
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
+ addi.d a1, a1, 8
addi.d a0, a0, 8
.Llt8:
jr ra
/* fixup and ex_table */
- _asm_extable 0b, .L_fixup_handle_0
- _asm_extable 1b, .L_fixup_handle_0
- _asm_extable 2b, .L_fixup_handle_0
- _asm_extable 3b, .L_fixup_handle_0
- _asm_extable 4b, .L_fixup_handle_0
- _asm_extable 5b, .L_fixup_handle_0
- _asm_extable 6b, .L_fixup_handle_0
- _asm_extable 7b, .L_fixup_handle_0
- _asm_extable 8b, .L_fixup_handle_0
- _asm_extable 9b, .L_fixup_handle_0
- _asm_extable 10b, .L_fixup_handle_0
- _asm_extable 11b, .L_fixup_handle_1
- _asm_extable 12b, .L_fixup_handle_2
- _asm_extable 13b, .L_fixup_handle_3
- _asm_extable 14b, .L_fixup_handle_4
- _asm_extable 15b, .L_fixup_handle_5
- _asm_extable 16b, .L_fixup_handle_6
- _asm_extable 17b, .L_fixup_handle_7
- _asm_extable 18b, .L_fixup_handle_0
- _asm_extable 19b, .L_fixup_handle_0
- _asm_extable 20b, .L_fixup_handle_0
- _asm_extable 21b, .L_fixup_handle_0
- _asm_extable 22b, .L_fixup_handle_0
- _asm_extable 23b, .L_fixup_handle_1
- _asm_extable 24b, .L_fixup_handle_2
- _asm_extable 25b, .L_fixup_handle_3
- _asm_extable 26b, .L_fixup_handle_0
- _asm_extable 27b, .L_fixup_handle_0
- _asm_extable 28b, .L_fixup_handle_0
- _asm_extable 29b, .L_fixup_handle_1
- _asm_extable 30b, .L_fixup_handle_0
- _asm_extable 31b, .L_fixup_handle_0
- _asm_extable 32b, .L_fixup_handle_0
- _asm_extable 33b, .L_fixup_handle_0
- _asm_extable 34b, .L_fixup_handle_s0
- _asm_extable 35b, .L_fixup_handle_s0
- _asm_extable 36b, .L_fixup_handle_s0
- _asm_extable 37b, .L_fixup_handle_s0
- _asm_extable 38b, .L_fixup_handle_s0
- _asm_extable 39b, .L_fixup_handle_s0
- _asm_extable 40b, .L_fixup_handle_s0
- _asm_extable 41b, .L_fixup_handle_s2
- _asm_extable 42b, .L_fixup_handle_s0
- _asm_extable 43b, .L_fixup_handle_s0
- _asm_extable 44b, .L_fixup_handle_s0
- _asm_extable 45b, .L_fixup_handle_s0
- _asm_extable 46b, .L_fixup_handle_s0
- _asm_extable 47b, .L_fixup_handle_s4
- _asm_extable 48b, .L_fixup_handle_s0
- _asm_extable 49b, .L_fixup_handle_s0
- _asm_extable 50b, .L_fixup_handle_s0
- _asm_extable 51b, .L_fixup_handle_s4
- _asm_extable 52b, .L_fixup_handle_s0
- _asm_extable 53b, .L_fixup_handle_s0
- _asm_extable 54b, .L_fixup_handle_s0
- _asm_extable 55b, .L_fixup_handle_s4
- _asm_extable 56b, .L_fixup_handle_s0
- _asm_extable 57b, .L_fixup_handle_s0
+.Llarge_fixup:
+ sub.d a2, a2, a0
+
+.Lsmall_fixup:
+58: ld.b t0, a1, 0
+59: st.b t0, a0, 0
+ addi.d a0, a0, 1
+ addi.d a1, a1, 1
+ addi.d a2, a2, -1
+ bgt a2, zero, 58b
+
+.Lexit:
+ move a0, a2
+ jr ra
+
+ _asm_extable 0b, .Lsmall_fixup
+ _asm_extable 1b, .Lsmall_fixup
+ _asm_extable 2b, .Llarge_fixup
+ _asm_extable 3b, .Llarge_fixup
+ _asm_extable 4b, .Llarge_fixup
+ _asm_extable 5b, .Llarge_fixup
+ _asm_extable 6b, .Llarge_fixup
+ _asm_extable 7b, .Llarge_fixup
+ _asm_extable 8b, .Llarge_fixup
+ _asm_extable 9b, .Llarge_fixup
+ _asm_extable 10b, .Llarge_fixup
+ _asm_extable 11b, .Llarge_fixup
+ _asm_extable 12b, .Llarge_fixup
+ _asm_extable 13b, .Llarge_fixup
+ _asm_extable 14b, .Llarge_fixup
+ _asm_extable 15b, .Llarge_fixup
+ _asm_extable 16b, .Llarge_fixup
+ _asm_extable 17b, .Llarge_fixup
+ _asm_extable 18b, .Llarge_fixup
+ _asm_extable 19b, .Llarge_fixup
+ _asm_extable 20b, .Llarge_fixup
+ _asm_extable 21b, .Llarge_fixup
+ _asm_extable 22b, .Llarge_fixup
+ _asm_extable 23b, .Llarge_fixup
+ _asm_extable 24b, .Llarge_fixup
+ _asm_extable 25b, .Llarge_fixup
+ _asm_extable 26b, .Llarge_fixup
+ _asm_extable 27b, .Llarge_fixup
+ _asm_extable 28b, .Llarge_fixup
+ _asm_extable 29b, .Llarge_fixup
+ _asm_extable 30b, .Llarge_fixup
+ _asm_extable 31b, .Llarge_fixup
+ _asm_extable 32b, .Llarge_fixup
+ _asm_extable 33b, .Llarge_fixup
+ _asm_extable 34b, .Lexit
+ _asm_extable 35b, .Lexit
+ _asm_extable 36b, .Lsmall_fixup
+ _asm_extable 37b, .Lsmall_fixup
+ _asm_extable 38b, .Lsmall_fixup
+ _asm_extable 39b, .Lsmall_fixup
+ _asm_extable 40b, .Lsmall_fixup
+ _asm_extable 41b, .Lsmall_fixup
+ _asm_extable 42b, .Lsmall_fixup
+ _asm_extable 43b, .Lsmall_fixup
+ _asm_extable 44b, .Lsmall_fixup
+ _asm_extable 45b, .Lsmall_fixup
+ _asm_extable 46b, .Lsmall_fixup
+ _asm_extable 47b, .Lsmall_fixup
+ _asm_extable 48b, .Lsmall_fixup
+ _asm_extable 49b, .Lsmall_fixup
+ _asm_extable 50b, .Lsmall_fixup
+ _asm_extable 51b, .Lsmall_fixup
+ _asm_extable 52b, .Lsmall_fixup
+ _asm_extable 53b, .Lsmall_fixup
+ _asm_extable 54b, .Lsmall_fixup
+ _asm_extable 55b, .Lsmall_fixup
+ _asm_extable 56b, .Lsmall_fixup
+ _asm_extable 57b, .Lsmall_fixup
+ _asm_extable 58b, .Lexit
+ _asm_extable 59b, .Lexit
SYM_FUNC_END(__copy_user_fast)
#include <asm/cpu.h>
#include <asm/regdef.h>
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memcpy)
/*
* Some CPUs support hardware unaligned access
ALTERNATIVE "b __memcpy_generic", \
"b __memcpy_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memcpy)
-_ASM_NOKPROBE(memcpy)
+SYM_FUNC_ALIAS(__memcpy, memcpy)
EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+_ASM_NOKPROBE(memcpy)
+_ASM_NOKPROBE(__memcpy)
/*
* void *__memcpy_generic(void *dst, const void *src, size_t n)
#include <asm/cpu.h>
#include <asm/regdef.h>
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memmove)
- blt a0, a1, memcpy /* dst < src, memcpy */
- blt a1, a0, rmemcpy /* src < dst, rmemcpy */
- jr ra /* dst == src, return */
+ blt a0, a1, __memcpy /* dst < src, memcpy */
+ blt a1, a0, __rmemcpy /* src < dst, rmemcpy */
+ jr ra /* dst == src, return */
SYM_FUNC_END(memmove)
-_ASM_NOKPROBE(memmove)
+SYM_FUNC_ALIAS(__memmove, memmove)
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL(__memmove)
+
+_ASM_NOKPROBE(memmove)
+_ASM_NOKPROBE(__memmove)
-SYM_FUNC_START(rmemcpy)
+SYM_FUNC_START(__rmemcpy)
/*
* Some CPUs support hardware unaligned access
*/
ALTERNATIVE "b __rmemcpy_generic", \
"b __rmemcpy_fast", CPU_FEATURE_UAL
-SYM_FUNC_END(rmemcpy)
-_ASM_NOKPROBE(rmemcpy)
+SYM_FUNC_END(__rmemcpy)
+_ASM_NOKPROBE(__rmemcpy)
/*
* void *__rmemcpy_generic(void *dst, const void *src, size_t n)
bstrins.d \r0, \r0, 63, 32
.endm
+.section .noinstr.text, "ax"
+
SYM_FUNC_START(memset)
/*
* Some CPUs support hardware unaligned access
ALTERNATIVE "b __memset_generic", \
"b __memset_fast", CPU_FEATURE_UAL
SYM_FUNC_END(memset)
-_ASM_NOKPROBE(memset)
+SYM_FUNC_ALIAS(__memset, memset)
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL(__memset)
+
+_ASM_NOKPROBE(memset)
+_ASM_NOKPROBE(__memset)
/*
* void *__memset_generic(void *s, int c, size_t n)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include "xor_simd.h"
+
+/*
+ * Process one cache line (64 bytes) per loop. This is assuming all future
+ * popular LoongArch cores are similar performance-characteristics-wise to the
+ * current models.
+ */
+#define LINE_WIDTH 64
+
+#ifdef CONFIG_CPU_HAS_LSX
+
+#define LD(reg, base, offset) \
+ "vld $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "vst $vr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "vxor.v $vr" #dj ", $vr" #dj ", $vr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 16) \
+ LD(2, base, 32) \
+ LD(3, base, 48)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(4, base, 0) \
+ LD(5, base, 16) \
+ LD(6, base, 32) \
+ LD(7, base, 48) \
+ XOR(0, 4) \
+ XOR(1, 5) \
+ XOR(2, 6) \
+ XOR(3, 7)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 16) \
+ ST(2, base, 32) \
+ ST(3, base, 48)
+
+#define XOR_FUNC_NAME(nr) __xor_lsx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+
+#define LD(reg, base, offset) \
+ "xvld $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define ST(reg, base, offset) \
+ "xvst $xr" #reg ", %[" #base "], " #offset "\n\t"
+#define XOR(dj, k) "xvxor.v $xr" #dj ", $xr" #dj ", $xr" #k "\n\t"
+
+#define LD_INOUT_LINE(base) \
+ LD(0, base, 0) \
+ LD(1, base, 32)
+
+#define LD_AND_XOR_LINE(base) \
+ LD(2, base, 0) \
+ LD(3, base, 32) \
+ XOR(0, 2) \
+ XOR(1, 3)
+
+#define ST_LINE(base) \
+ ST(0, base, 0) \
+ ST(1, base, 32)
+
+#define XOR_FUNC_NAME(nr) __xor_lasx_##nr
+#include "xor_template.c"
+
+#undef LD
+#undef ST
+#undef XOR
+#undef LD_INOUT_LINE
+#undef LD_AND_XOR_LINE
+#undef ST_LINE
+#undef XOR_FUNC_NAME
+
+#endif /* CONFIG_CPU_HAS_LASX */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Simple interface to link xor_simd.c and xor_simd_glue.c
+ *
+ * Separating these files ensures that no SIMD instructions are run outside of
+ * the kfpu critical section.
+ */
+
+#ifndef __LOONGARCH_LIB_XOR_SIMD_H
+#define __LOONGARCH_LIB_XOR_SIMD_H
+
+#ifdef CONFIG_CPU_HAS_LSX
+void __xor_lsx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_lsx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lsx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_lsx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+void __xor_lasx_2(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2);
+void __xor_lasx_3(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3);
+void __xor_lasx_4(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4);
+void __xor_lasx_5(unsigned long bytes, unsigned long * __restrict p1,
+ const unsigned long * __restrict p2, const unsigned long * __restrict p3,
+ const unsigned long * __restrict p4, const unsigned long * __restrict p5);
+#endif /* CONFIG_CPU_HAS_LASX */
+
+#endif /* __LOONGARCH_LIB_XOR_SIMD_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * LoongArch SIMD XOR operations
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <asm/fpu.h>
+#include <asm/xor_simd.h>
+#include "xor_simd.h"
+
+#define MAKE_XOR_GLUE_2(flavor) \
+void xor_##flavor##_2(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_2(bytes, p1, p2); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_2)
+
+#define MAKE_XOR_GLUE_3(flavor) \
+void xor_##flavor##_3(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_3(bytes, p1, p2, p3); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_3)
+
+#define MAKE_XOR_GLUE_4(flavor) \
+void xor_##flavor##_4(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3, \
+ const unsigned long * __restrict p4) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_4(bytes, p1, p2, p3, p4); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_4)
+
+#define MAKE_XOR_GLUE_5(flavor) \
+void xor_##flavor##_5(unsigned long bytes, unsigned long * __restrict p1, \
+ const unsigned long * __restrict p2, \
+ const unsigned long * __restrict p3, \
+ const unsigned long * __restrict p4, \
+ const unsigned long * __restrict p5) \
+{ \
+ kernel_fpu_begin(); \
+ __xor_##flavor##_5(bytes, p1, p2, p3, p4, p5); \
+ kernel_fpu_end(); \
+} \
+EXPORT_SYMBOL_GPL(xor_##flavor##_5)
+
+#define MAKE_XOR_GLUES(flavor) \
+ MAKE_XOR_GLUE_2(flavor); \
+ MAKE_XOR_GLUE_3(flavor); \
+ MAKE_XOR_GLUE_4(flavor); \
+ MAKE_XOR_GLUE_5(flavor)
+
+#ifdef CONFIG_CPU_HAS_LSX
+MAKE_XOR_GLUES(lsx);
+#endif
+
+#ifdef CONFIG_CPU_HAS_LASX
+MAKE_XOR_GLUES(lasx);
+#endif
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Template for XOR operations, instantiated in xor_simd.c.
+ *
+ * Expected preprocessor definitions:
+ *
+ * - LINE_WIDTH
+ * - XOR_FUNC_NAME(nr)
+ * - LD_INOUT_LINE(buf)
+ * - LD_AND_XOR_LINE(buf)
+ * - ST_LINE(buf)
+ */
+
+void XOR_FUNC_NAME(2)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(3)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(4)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3,
+ const unsigned long * __restrict v4)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ LD_AND_XOR_LINE(v4)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4)
+ : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ v4 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
+
+void XOR_FUNC_NAME(5)(unsigned long bytes,
+ unsigned long * __restrict v1,
+ const unsigned long * __restrict v2,
+ const unsigned long * __restrict v3,
+ const unsigned long * __restrict v4,
+ const unsigned long * __restrict v5)
+{
+ unsigned long lines = bytes / LINE_WIDTH;
+
+ do {
+ __asm__ __volatile__ (
+ LD_INOUT_LINE(v1)
+ LD_AND_XOR_LINE(v2)
+ LD_AND_XOR_LINE(v3)
+ LD_AND_XOR_LINE(v4)
+ LD_AND_XOR_LINE(v5)
+ ST_LINE(v1)
+ : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4),
+ [v5] "r"(v5) : "memory"
+ );
+
+ v1 += LINE_WIDTH / sizeof(unsigned long);
+ v2 += LINE_WIDTH / sizeof(unsigned long);
+ v3 += LINE_WIDTH / sizeof(unsigned long);
+ v4 += LINE_WIDTH / sizeof(unsigned long);
+ v5 += LINE_WIDTH / sizeof(unsigned long);
+ } while (--lines > 0);
+}
fault.o ioremap.o maccess.o mmap.o pgtable.o page.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_KASAN) += kasan_init.o
+
+KASAN_SANITIZE_kasan_init.o := n
current_cpu_data.cache_leaves_present = leaf;
current_cpu_data.options |= LOONGARCH_CPU_PREFETCH;
- shm_align_mask = PAGE_SIZE - 1;
}
static const pgprot_t protection_map[16] = {
#include <linux/kprobes.h>
#include <linux/perf_event.h>
#include <linux/uaccess.h>
+#include <linux/kfence.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
int show_unhandled_signals = 1;
-static void __kprobes no_context(struct pt_regs *regs, unsigned long address)
+static void __kprobes no_context(struct pt_regs *regs,
+ unsigned long write, unsigned long address)
{
const int field = sizeof(unsigned long) * 2;
if (fixup_exception(regs))
return;
+ if (kfence_handle_page_fault(address, write, regs))
+ return;
+
/*
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
die("Oops", regs);
}
-static void __kprobes do_out_of_memory(struct pt_regs *regs, unsigned long address)
+static void __kprobes do_out_of_memory(struct pt_regs *regs,
+ unsigned long write, unsigned long address)
{
/*
* We ran out of memory, call the OOM killer, and return the userspace
* (which will retry the fault, or kill us if we got oom-killed).
*/
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
pagefault_out_of_memory();
{
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
/* Kernel mode? Handle exceptions or die */
if (!user_mode(regs)) {
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
*/
if (address & __UA_LIMIT) {
if (!user_mode(regs))
- no_context(regs, address);
+ no_context(regs, write, address);
else
do_sigsegv(regs, write, address, si_code);
return;
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
- no_context(regs, address);
+ no_context(regs, write, address);
return;
}
if (unlikely(fault & VM_FAULT_ERROR)) {
mmap_read_unlock(mm);
if (fault & VM_FAULT_OOM) {
- do_out_of_memory(regs, address);
+ do_out_of_memory(regs, write, address);
return;
} else if (fault & VM_FAULT_SIGSEGV) {
do_sigsegv(regs, write, address, si_code);
#include <asm/pgalloc.h>
#include <asm/tlb.h>
-/*
- * We have up to 8 empty zeroed pages so we can map one of the right colour
- * when needed. Since page is never written to after the initialization we
- * don't have to care about aliases on other CPUs.
- */
-unsigned long empty_zero_page, zero_page_mask;
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
-EXPORT_SYMBOL(zero_page_mask);
-
-void setup_zero_pages(void)
-{
- unsigned int order, i;
- struct page *page;
-
- order = 0;
-
- empty_zero_page = __get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
- if (!empty_zero_page)
- panic("Oh boy, that early out of memory?");
-
- page = virt_to_page((void *)empty_zero_page);
- split_page(page, order);
- for (i = 0; i < (1 << order); i++, page++)
- mark_page_reserved(page);
-
- zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
-}
void copy_user_highpage(struct page *to, struct page *from,
unsigned long vaddr, struct vm_area_struct *vma)
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
memblock_free_all();
- setup_zero_pages(); /* Setup zeroed pages. */
}
#endif /* !CONFIG_NUMA */
#endif
#endif
-static pte_t *fixmap_pte(unsigned long addr)
+pte_t * __init populate_kernel_pte(unsigned long addr)
{
- pgd_t *pgd;
- p4d_t *p4d;
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
pud_t *pud;
pmd_t *pmd;
- pgd = pgd_offset_k(addr);
- p4d = p4d_offset(pgd, addr);
-
- if (pgd_none(*pgd)) {
- pud_t *new __maybe_unused;
-
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pgd_populate(&init_mm, pgd, new);
+ if (p4d_none(*p4d)) {
+ pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pud)
+ panic("%s: Failed to allocate memory\n", __func__);
+ p4d_populate(&init_mm, p4d, pud);
#ifndef __PAGETABLE_PUD_FOLDED
- pud_init(new);
+ pud_init(pud);
#endif
}
pud = pud_offset(p4d, addr);
if (pud_none(*pud)) {
- pmd_t *new __maybe_unused;
-
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pud_populate(&init_mm, pud, new);
+ pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pmd)
+ panic("%s: Failed to allocate memory\n", __func__);
+ pud_populate(&init_mm, pud, pmd);
#ifndef __PAGETABLE_PMD_FOLDED
- pmd_init(new);
+ pmd_init(pmd);
#endif
}
pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
- pte_t *new __maybe_unused;
+ if (!pmd_present(*pmd)) {
+ pte_t *pte;
- new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pmd, new);
+ pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pte)
+ panic("%s: Failed to allocate memory\n", __func__);
+ pmd_populate_kernel(&init_mm, pmd, pte);
}
return pte_offset_kernel(pmd, addr);
BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
- ptep = fixmap_pte(addr);
+ ptep = populate_kernel_pte(addr);
if (!pte_none(*ptep)) {
pte_ERROR(*ptep);
return;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ */
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+
+#include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
+#include <asm-generic/sections.h>
+
+static pgd_t kasan_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+#ifdef __PAGETABLE_PUD_FOLDED
+#define __p4d_none(early, p4d) (0)
+#else
+#define __p4d_none(early, p4d) (early ? (p4d_val(p4d) == 0) : \
+(__pa(p4d_val(p4d)) == (unsigned long)__pa(kasan_early_shadow_pud)))
+#endif
+
+#ifdef __PAGETABLE_PMD_FOLDED
+#define __pud_none(early, pud) (0)
+#else
+#define __pud_none(early, pud) (early ? (pud_val(pud) == 0) : \
+(__pa(pud_val(pud)) == (unsigned long)__pa(kasan_early_shadow_pmd)))
+#endif
+
+#define __pmd_none(early, pmd) (early ? (pmd_val(pmd) == 0) : \
+(__pa(pmd_val(pmd)) == (unsigned long)__pa(kasan_early_shadow_pte)))
+
+#define __pte_none(early, pte) (early ? pte_none(pte) : \
+((pte_val(pte) & _PFN_MASK) == (unsigned long)__pa(kasan_early_shadow_page)))
+
+bool kasan_early_stage = true;
+
+/*
+ * Alloc memory for shadow memory page table.
+ */
+static phys_addr_t __init kasan_alloc_zeroed_page(int node)
+{
+ void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE,
+ __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+ if (!p)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n",
+ __func__, PAGE_SIZE, PAGE_SIZE, node, __pa(MAX_DMA_ADDRESS));
+
+ return __pa(p);
+}
+
+static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early)
+{
+ if (__pmd_none(early, READ_ONCE(*pmdp))) {
+ phys_addr_t pte_phys = early ?
+ __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pte_phys), kasan_early_shadow_pte, sizeof(kasan_early_shadow_pte));
+ pmd_populate_kernel(NULL, pmdp, (pte_t *)__va(pte_phys));
+ }
+
+ return pte_offset_kernel(pmdp, addr);
+}
+
+static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early)
+{
+ if (__pud_none(early, READ_ONCE(*pudp))) {
+ phys_addr_t pmd_phys = early ?
+ __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pmd_phys), kasan_early_shadow_pmd, sizeof(kasan_early_shadow_pmd));
+ pud_populate(&init_mm, pudp, (pmd_t *)__va(pmd_phys));
+ }
+
+ return pmd_offset(pudp, addr);
+}
+
+static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early)
+{
+ if (__p4d_none(early, READ_ONCE(*p4dp))) {
+ phys_addr_t pud_phys = early ?
+ __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node);
+ if (!early)
+ memcpy(__va(pud_phys), kasan_early_shadow_pud, sizeof(kasan_early_shadow_pud));
+ p4d_populate(&init_mm, p4dp, (pud_t *)__va(pud_phys));
+ }
+
+ return pud_offset(p4dp, addr);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pte_t *ptep = kasan_pte_offset(pmdp, addr, node, early);
+
+ do {
+ phys_addr_t page_phys = early ?
+ __pa_symbol(kasan_early_shadow_page)
+ : kasan_alloc_zeroed_page(node);
+ next = addr + PAGE_SIZE;
+ set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL));
+ } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep)));
+}
+
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pmd_t *pmdp = kasan_pmd_offset(pudp, addr, node, early);
+
+ do {
+ next = pmd_addr_end(addr, end);
+ kasan_pte_populate(pmdp, addr, next, node, early);
+ } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp)));
+}
+
+static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ pud_t *pudp = kasan_pud_offset(p4dp, addr, node, early);
+
+ do {
+ next = pud_addr_end(addr, end);
+ kasan_pmd_populate(pudp, addr, next, node, early);
+ } while (pudp++, addr = next, addr != end);
+}
+
+static void __init kasan_p4d_populate(pgd_t *pgdp, unsigned long addr,
+ unsigned long end, int node, bool early)
+{
+ unsigned long next;
+ p4d_t *p4dp = p4d_offset(pgdp, addr);
+
+ do {
+ next = p4d_addr_end(addr, end);
+ kasan_pud_populate(p4dp, addr, next, node, early);
+ } while (p4dp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+ int node, bool early)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset_k(addr);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ kasan_p4d_populate(pgdp, addr, next, node, early);
+ } while (pgdp++, addr = next, addr != end);
+
+}
+
+/* Set up full kasan mappings, ensuring that the mapped pages are zeroed */
+static void __init kasan_map_populate(unsigned long start, unsigned long end,
+ int node)
+{
+ kasan_pgd_populate(start & PAGE_MASK, PAGE_ALIGN(end), node, false);
+}
+
+asmlinkage void __init kasan_early_init(void)
+{
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, PGDIR_SIZE));
+ BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, PGDIR_SIZE));
+}
+
+static inline void kasan_set_pgd(pgd_t *pgdp, pgd_t pgdval)
+{
+ WRITE_ONCE(*pgdp, pgdval);
+}
+
+static void __init clear_pgds(unsigned long start, unsigned long end)
+{
+ /*
+ * Remove references to kasan page tables from
+ * swapper_pg_dir. pgd_clear() can't be used
+ * here because it's nop on 2,3-level pagetable setups
+ */
+ for (; start < end; start += PGDIR_SIZE)
+ kasan_set_pgd((pgd_t *)pgd_offset_k(start), __pgd(0));
+}
+
+void __init kasan_init(void)
+{
+ u64 i;
+ phys_addr_t pa_start, pa_end;
+
+ /*
+ * PGD was populated as invalid_pmd_table or invalid_pud_table
+ * in pagetable_init() which depends on how many levels of page
+ * table you are using, but we had to clean the gpd of kasan
+ * shadow memory, as the pgd value is none-zero.
+ * The assertion pgd_none is going to be false and the formal populate
+ * afterwards is not going to create any new pgd at all.
+ */
+ memcpy(kasan_pg_dir, swapper_pg_dir, sizeof(kasan_pg_dir));
+ csr_write64(__pa_symbol(kasan_pg_dir), LOONGARCH_CSR_PGDH);
+ local_flush_tlb_all();
+
+ clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ /* Maps everything to a single page of zeroes */
+ kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, NUMA_NO_NODE, true);
+
+ kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+ kasan_mem_to_shadow((void *)KFENCE_AREA_END));
+
+ kasan_early_stage = false;
+
+ /* Populate the linear mapping */
+ for_each_mem_range(i, &pa_start, &pa_end) {
+ void *start = (void *)phys_to_virt(pa_start);
+ void *end = (void *)phys_to_virt(pa_end);
+
+ if (start >= end)
+ break;
+
+ kasan_map_populate((unsigned long)kasan_mem_to_shadow(start),
+ (unsigned long)kasan_mem_to_shadow(end), NUMA_NO_NODE);
+ }
+
+ /* Populate modules mapping */
+ kasan_map_populate((unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR),
+ (unsigned long)kasan_mem_to_shadow((void *)MODULES_END), NUMA_NO_NODE);
+ /*
+ * KAsan may reuse the contents of kasan_early_shadow_pte directly, so we
+ * should make sure that it maps the zero page read-only.
+ */
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_early_shadow_pte[i],
+ pfn_pte(__phys_to_pfn(__pa_symbol(kasan_early_shadow_page)), PAGE_KERNEL_RO));
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ csr_write64(__pa_symbol(swapper_pg_dir), LOONGARCH_CSR_PGDH);
+ local_flush_tlb_all();
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KernelAddressSanitizer initialized.\n");
+}
#include <linux/mm.h>
#include <linux/mman.h>
-unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */
-EXPORT_SYMBOL(shm_align_mask);
+#define SHM_ALIGN_MASK (SHMLBA - 1)
-#define COLOUR_ALIGN(addr, pgoff) \
- ((((addr) + shm_align_mask) & ~shm_align_mask) + \
- (((pgoff) << PAGE_SHIFT) & shm_align_mask))
+#define COLOUR_ALIGN(addr, pgoff) \
+ ((((addr) + SHM_ALIGN_MASK) & ~SHM_ALIGN_MASK) \
+ + (((pgoff) << PAGE_SHIFT) & SHM_ALIGN_MASK))
enum mmap_allocation_direction {UP, DOWN};
* cache aliasing constraints.
*/
if ((flags & MAP_SHARED) &&
- ((addr - (pgoff << PAGE_SHIFT)) & shm_align_mask))
+ ((addr - (pgoff << PAGE_SHIFT)) & SHM_ALIGN_MASK))
return -EINVAL;
return addr;
}
}
info.length = len;
- info.align_mask = do_color_align ? (PAGE_MASK & shm_align_mask) : 0;
+ info.align_mask = do_color_align ? (PAGE_MASK & SHM_ALIGN_MASK) : 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (dir == DOWN) {
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
+struct page *dmw_virt_to_page(unsigned long kaddr)
+{
+ return pfn_to_page(virt_to_pfn(kaddr));
+}
+EXPORT_SYMBOL_GPL(dmw_virt_to_page);
+
+struct page *tlb_virt_to_page(unsigned long kaddr)
+{
+ return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr)));
+}
+EXPORT_SYMBOL_GPL(tlb_virt_to_page);
+
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *init, *ret = NULL;
# SPDX-License-Identifier: GPL-2.0
# Objects to go into the VDSO.
+KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
# Include the generic Makefile to check the built vdso.
include $(srctree)/lib/vdso/Makefile
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-/* Copyright(c) 1996 Kars de Jong */
-/* Based on the ide driver from 1.2.13pl8 */
-
-/*
- * Credits (alphabetical):
- *
- * - Bjoern Brauel
- * - Kars de Jong
- * - Torsten Ebeling
- * - Dwight Engen
- * - Thorsten Floeck
- * - Roman Hodek
- * - Guenther Kelleter
- * - Chris Lawrence
- * - Michael Rausch
- * - Christian Sauer
- * - Michael Schmitz
- * - Jes Soerensen
- * - Michael Thurm
- * - Geert Uytterhoeven
- */
-
-#ifndef _M68K_IDE_H
-#define _M68K_IDE_H
-
-#ifdef __KERNEL__
-#include <asm/setup.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-
-#ifdef CONFIG_MMU
-
-/*
- * Get rid of defs from io.h - ide has its private and conflicting versions
- * Since so far no single m68k platform uses ISA/PCI I/O space for IDE, we
- * always use the `raw' MMIO versions
- */
-#undef readb
-#undef readw
-#undef writeb
-#undef writew
-
-#define readb in_8
-#define readw in_be16
-#define __ide_mm_insw(port, addr, n) raw_insw((u16 *)port, addr, n)
-#define __ide_mm_insl(port, addr, n) raw_insl((u32 *)port, addr, n)
-#define writeb(val, port) out_8(port, val)
-#define writew(val, port) out_be16(port, val)
-#define __ide_mm_outsw(port, addr, n) raw_outsw((u16 *)port, addr, n)
-#define __ide_mm_outsl(port, addr, n) raw_outsl((u32 *)port, addr, n)
-
-#else
-
-#define __ide_mm_insw(port, addr, n) io_insw((unsigned int)port, addr, n)
-#define __ide_mm_insl(port, addr, n) io_insl((unsigned int)port, addr, n)
-#define __ide_mm_outsw(port, addr, n) io_outsw((unsigned int)port, addr, n)
-#define __ide_mm_outsl(port, addr, n) io_outsl((unsigned int)port, addr, n)
-
-#endif /* CONFIG_MMU */
-
-#endif /* __KERNEL__ */
-#endif /* _M68K_IDE_H */
# define phys_to_pfn(phys) (PFN_DOWN(phys))
# define pfn_to_phys(pfn) (PFN_PHYS(pfn))
-# define virt_to_pfn(vaddr) (phys_to_pfn((__pa(vaddr))))
-# define pfn_to_virt(pfn) __va(pfn_to_phys((pfn)))
-
# define virt_to_page(kaddr) (pfn_to_page(__pa(kaddr) >> PAGE_SHIFT))
# define page_to_virt(page) __va(page_to_pfn(page) << PAGE_SHIFT)
# define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT)
# define ARCH_PFN_OFFSET (memory_start >> PAGE_SHIFT)
# endif /* __ASSEMBLY__ */
-#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
-
-# define __pa(x) __virt_to_phys((unsigned long)(x))
-# define __va(x) ((void *)__phys_to_virt((unsigned long)(x)))
-
/* Convert between virtual and physical address for MMU. */
/* Handle MicroBlaze processor with virtual memory. */
#define __virt_to_phys(addr) \
#define tovirt(rd, rs) \
addik rd, rs, (CONFIG_KERNEL_START - CONFIG_KERNEL_BASE_ADDR)
+#ifndef __ASSEMBLY__
+
+# define __pa(x) __virt_to_phys((unsigned long)(x))
+# define __va(x) ((void *)__phys_to_virt((unsigned long)(x)))
+
+static inline unsigned long virt_to_pfn(const void *vaddr)
+{
+ return phys_to_pfn(__pa(vaddr));
+}
+
+static inline const void *pfn_to_virt(unsigned long pfn)
+{
+ return __va(pfn_to_phys((pfn)));
+}
+
+#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
+
+#endif /* __ASSEMBLY__ */
+
#define TOPHYS(addr) __virt_to_phys(addr)
#endif /* __KERNEL__ */
void machine_halt(void);
void machine_power_off(void);
-extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
-
# endif /* __ASSEMBLY__ */
#endif /* _ASM_MICROBLAZE_SETUP_H */
#include <linux/init.h>
#include <linux/delay.h>
-#include <linux/of_platform.h>
#include <linux/reboot.h>
void machine_shutdown(void)
memblock_dump_all();
}
-void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask)
-{
- void *p;
-
- if (mem_init_done) {
- p = kzalloc(size, mask);
- } else {
- p = memblock_alloc(size, SMP_CACHE_BYTES);
- if (!p)
- panic("%s: Failed to allocate %zu bytes\n",
- __func__, size);
- }
-
- return p;
-}
-
static const pgprot_t protection_map[16] = {
[VM_NONE] = PAGE_NONE,
[VM_READ] = PAGE_READONLY_X,
cflags-$(CONFIG_CAVIUM_CN63XXP1) += -Wa,-mfix-cn63xxp1
cflags-$(CONFIG_CPU_BMIPS) += -march=mips32 -Wa,-mips32 -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2E) += $(call cc-option,-march=loongson2e) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON2F) += $(call cc-option,-march=loongson2f) -Wa,--trap
-cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-march=loongson3a,-march=mips64r2) -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2E) += -march=loongson2e -Wa,--trap
+cflags-$(CONFIG_CPU_LOONGSON2F) += -march=loongson2f -Wa,--trap
# Some -march= flags enable MMI instructions, and GCC complains about that
# support being enabled alongside -msoft-float. Thus explicitly disable MMI.
cflags-$(CONFIG_CPU_LOONGSON2EF) += $(call cc-option,-mno-loongson-mmi)
+ifdef CONFIG_CPU_LOONGSON64
+cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap
+cflags-$(CONFIG_CC_IS_GCC) += -march=loongson3a
+cflags-$(CONFIG_CC_IS_CLANG) += -march=mips64r2
+endif
cflags-$(CONFIG_CPU_LOONGSON64) += $(call cc-option,-mno-loongson-mmi)
cflags-$(CONFIG_CPU_R4000_WORKAROUNDS) += $(call cc-option,-mfix-r4000,)
endif
endif
- ifeq ($(KBUILD_SYM32)$(call cc-option-yn,-msym32), yy)
- cflags-y += -msym32 -DKBUILD_64BIT_SYM32
+ ifeq ($(KBUILD_SYM32), y)
+ cflags-$(KBUILD_SYM32) += -msym32 -DKBUILD_64BIT_SYM32
else
ifeq ($(CONFIG_CPU_DADDI_WORKAROUNDS), y)
$(error CONFIG_CPU_DADDI_WORKAROUNDS unsupported without -msym32)
KBUILD_LDFLAGS += -m $(ld-emul)
-ifdef CONFIG_MIPS
+ifdef need-compiler
CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
#include <linux/of.h>
#include <linux/of_clk.h>
#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <linux/libfdt.h>
#include <linux/smp.h>
#include <asm/addrspace.h>
#include <linux/semaphore.h>
#include <linux/mtd/mtd.h>
#include <linux/mtd/map.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/mtd/partitions.h>
#include <asm/octeon/octeon.h>
* Mnemonic names for arguments to memcpy/__copy_user
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#define dst a0
*/
#include <linux/etherdevice.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/of_fdt.h>
+#include <linux/platform_device.h>
#include <linux/libfdt.h>
#include <asm/octeon/octeon.h>
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_DRM_AMD_ACP=y
CONFIG_DRM_AMD_DC=y
CONFIG_DRM_AMD_DC_SI=y
+CONFIG_DRM_AST=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_QXL=y
CONFIG_DRM_VIRTIO_GPU=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
generated-y += unistd_nr_n64.h
generated-y += unistd_nr_o32.h
-generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += parport.h
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-int kvm_arch_flush_remote_tlb(struct kvm *kvm);
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
#endif /* __MIPS_KVM_HOST_H__ */
#define LS1X_NAND_BASE 0x1fe78000
#define LS1X_CLK_BASE 0x1fe78030
-#include <regs-clk.h>
#include <regs-mux.h>
-#include <regs-rtc.h>
#endif /* __ASM_MACH_LOONGSON32_LOONGSON1_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2011 Zhang, Keguang <keguang.zhang@gmail.com>
- *
- * Loongson 1 Clock Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_CLK_H
-#define __ASM_MACH_LOONGSON32_REGS_CLK_H
-
-#define LS1X_CLK_REG(x) \
- ((void __iomem *)KSEG1ADDR(LS1X_CLK_BASE + (x)))
-
-#define LS1X_CLK_PLL_FREQ LS1X_CLK_REG(0x0)
-#define LS1X_CLK_PLL_DIV LS1X_CLK_REG(0x4)
-
-#if defined(CONFIG_LOONGSON1_LS1B)
-/* Clock PLL Divisor Register Bits */
-#define DIV_DC_EN BIT(31)
-#define DIV_DC_RST BIT(30)
-#define DIV_CPU_EN BIT(25)
-#define DIV_CPU_RST BIT(24)
-#define DIV_DDR_EN BIT(19)
-#define DIV_DDR_RST BIT(18)
-#define RST_DC_EN BIT(5)
-#define RST_DC BIT(4)
-#define RST_DDR_EN BIT(3)
-#define RST_DDR BIT(2)
-#define RST_CPU_EN BIT(1)
-#define RST_CPU BIT(0)
-
-#define DIV_DC_SHIFT 26
-#define DIV_CPU_SHIFT 20
-#define DIV_DDR_SHIFT 14
-
-#define DIV_DC_WIDTH 4
-#define DIV_CPU_WIDTH 4
-#define DIV_DDR_WIDTH 4
-
-#define BYPASS_DC_SHIFT 12
-#define BYPASS_DDR_SHIFT 10
-#define BYPASS_CPU_SHIFT 8
-
-#define BYPASS_DC_WIDTH 1
-#define BYPASS_DDR_WIDTH 1
-#define BYPASS_CPU_WIDTH 1
-
-#elif defined(CONFIG_LOONGSON1_LS1C)
-/* PLL/SDRAM Frequency configuration register Bits */
-#define PLL_VALID BIT(31)
-#define FRAC_N GENMASK(23, 16)
-#define RST_TIME GENMASK(3, 2)
-#define SDRAM_DIV GENMASK(1, 0)
-
-/* CPU/CAMERA/DC Frequency configuration register Bits */
-#define DIV_DC_EN BIT(31)
-#define DIV_DC GENMASK(30, 24)
-#define DIV_CAM_EN BIT(23)
-#define DIV_CAM GENMASK(22, 16)
-#define DIV_CPU_EN BIT(15)
-#define DIV_CPU GENMASK(14, 8)
-#define DIV_DC_SEL_EN BIT(5)
-#define DIV_DC_SEL BIT(4)
-#define DIV_CAM_SEL_EN BIT(3)
-#define DIV_CAM_SEL BIT(2)
-#define DIV_CPU_SEL_EN BIT(1)
-#define DIV_CPU_SEL BIT(0)
-
-#define DIV_DC_SHIFT 24
-#define DIV_CAM_SHIFT 16
-#define DIV_CPU_SHIFT 8
-#define DIV_DDR_SHIFT 0
-
-#define DIV_DC_WIDTH 7
-#define DIV_CAM_WIDTH 7
-#define DIV_CPU_WIDTH 7
-#define DIV_DDR_WIDTH 2
-
-#endif
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_CLK_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (c) 2016 Yang Ling <gnaygnil@gmail.com>
- *
- * Loongson 1 RTC timer Register Definitions.
- */
-
-#ifndef __ASM_MACH_LOONGSON32_REGS_RTC_H
-#define __ASM_MACH_LOONGSON32_REGS_RTC_H
-
-#define LS1X_RTC_REG(x) \
- ((void __iomem *)KSEG1ADDR(LS1X_RTC_BASE + (x)))
-
-#define LS1X_RTC_CTRL LS1X_RTC_REG(0x40)
-
-#define RTC_EXTCLK_OK (BIT(5) | BIT(8))
-#define RTC_EXTCLK_EN BIT(8)
-
-#endif /* __ASM_MACH_LOONGSON32_REGS_RTC_H */
* Author: Wu Zhangjin <wuzhangjin@gmail.com>
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/regdef.h>
#include <asm/stackframe.h>
#include <asm/ftrace.h>
* written by Carsten Langgaard, carstenl@mips.com
*/
#include <asm/asm.h>
-#include <asm/export.h>
#include <asm/asm-offsets.h>
#include <asm/mipsregs.h>
#include <asm/regdef.h>
* Further modifications to make this work:
* Copyright (c) 1998 Harald Koerfgen
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/errno.h>
-#include <asm/export.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
#include <asm/asm-offsets.h>
*/
#include <asm/asm.h>
#include <asm/cachectl.h>
-#include <asm/export.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
#include <asm/asm-offsets.h>
* Copyright (C) 2000 MIPS Technologies, Inc.
* Copyright (C) 1999, 2001 Silicon Graphics, Inc.
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/errno.h>
-#include <asm/export.h>
#include <asm/fpregdef.h>
#include <asm/mipsregs.h>
#include <asm/asm-offsets.h>
/* Flush slot from GPA */
kvm_mips_flush_gpa_pt(kvm, slot->base_gfn,
slot->base_gfn + slot->npages - 1);
- kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+ kvm_flush_remote_tlbs_memslot(kvm, slot);
spin_unlock(&kvm->mmu_lock);
}
needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn,
new->base_gfn + new->npages - 1);
if (needs_flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+ kvm_flush_remote_tlbs_memslot(kvm, new);
spin_unlock(&kvm->mmu_lock);
}
}
}
-int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
kvm_mips_callbacks->prepare_flush_shadow(kvm);
return 1;
}
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
-{
- kvm_flush_remote_tlbs(kvm);
-}
-
int kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
int r;
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
gpa_t gpa = range->start << PAGE_SHIFT;
- pte_t hva_pte = range->pte;
+ pte_t hva_pte = range->arg.pte;
pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa);
pte_t old_pte;
#include <linux/sched.h>
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
*/
#include <linux/ioport.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
#include <lantiq_soc.h>
#include <linux/interrupt.h>
#include <linux/ioport.h>
#include <linux/init.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <lantiq_soc.h>
#include "../clk.h"
#include <linux/clkdev.h>
#include <linux/spinlock.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/of_address.h>
#include <lantiq_soc.h>
#include <linux/err.h>
#include <linux/export.h>
#include <linux/gpio/consumer.h>
-#include <linux/of_platform.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <lantiq_soc.h>
* Copyright (C) 2014 Imagination Technologies Ltd.
*/
#include <linux/errno.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#ifdef CONFIG_64BIT
#undef CONFIG_CPU_HAS_PREFETCH
#endif
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#define dst a0
* Copyright (C) 2007 by Maciej W. Rozycki
* Copyright (C) 2011, 2012 MIPS Technologies, Inc.
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#if LONGSIZE == 4
* Copyright (C) 2011 MIPS Technologies, Inc.
*/
#include <linux/errno.h>
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#define EX(insn,reg,addr,handler) \
* Copyright (c) 1996, 1998, 1999, 2004 by Ralf Baechle
* Copyright (c) 1999 Silicon Graphics, Inc.
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#define EX(insn,reg,addr,handler) \
};
/* Real Time Clock */
-void __init ls1x_rtc_set_extclk(struct platform_device *pdev)
-{
- u32 val = __raw_readl(LS1X_RTC_CTRL);
-
- if (!(val & RTC_EXTCLK_OK))
- __raw_writel(val | RTC_EXTCLK_EN, LS1X_RTC_CTRL);
-}
-
struct platform_device ls1x_rtc_pdev = {
.name = "ls1x-rtc",
.id = -1,
static void ipi_set0_regs_init(void)
{
- ipi_set0_regs[0] = (void *)
+ ipi_set0_regs[0] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + SET0);
- ipi_set0_regs[1] = (void *)
+ ipi_set0_regs[1] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + SET0);
- ipi_set0_regs[2] = (void *)
+ ipi_set0_regs[2] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + SET0);
- ipi_set0_regs[3] = (void *)
+ ipi_set0_regs[3] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + SET0);
- ipi_set0_regs[4] = (void *)
+ ipi_set0_regs[4] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + SET0);
- ipi_set0_regs[5] = (void *)
+ ipi_set0_regs[5] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + SET0);
- ipi_set0_regs[6] = (void *)
+ ipi_set0_regs[6] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + SET0);
- ipi_set0_regs[7] = (void *)
+ ipi_set0_regs[7] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + SET0);
- ipi_set0_regs[8] = (void *)
+ ipi_set0_regs[8] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + SET0);
- ipi_set0_regs[9] = (void *)
+ ipi_set0_regs[9] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + SET0);
- ipi_set0_regs[10] = (void *)
+ ipi_set0_regs[10] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + SET0);
- ipi_set0_regs[11] = (void *)
+ ipi_set0_regs[11] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + SET0);
- ipi_set0_regs[12] = (void *)
+ ipi_set0_regs[12] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + SET0);
- ipi_set0_regs[13] = (void *)
+ ipi_set0_regs[13] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + SET0);
- ipi_set0_regs[14] = (void *)
+ ipi_set0_regs[14] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + SET0);
- ipi_set0_regs[15] = (void *)
+ ipi_set0_regs[15] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + SET0);
}
static void ipi_clear0_regs_init(void)
{
- ipi_clear0_regs[0] = (void *)
+ ipi_clear0_regs[0] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + CLEAR0);
- ipi_clear0_regs[1] = (void *)
+ ipi_clear0_regs[1] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + CLEAR0);
- ipi_clear0_regs[2] = (void *)
+ ipi_clear0_regs[2] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + CLEAR0);
- ipi_clear0_regs[3] = (void *)
+ ipi_clear0_regs[3] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + CLEAR0);
- ipi_clear0_regs[4] = (void *)
+ ipi_clear0_regs[4] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + CLEAR0);
- ipi_clear0_regs[5] = (void *)
+ ipi_clear0_regs[5] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + CLEAR0);
- ipi_clear0_regs[6] = (void *)
+ ipi_clear0_regs[6] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + CLEAR0);
- ipi_clear0_regs[7] = (void *)
+ ipi_clear0_regs[7] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + CLEAR0);
- ipi_clear0_regs[8] = (void *)
+ ipi_clear0_regs[8] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + CLEAR0);
- ipi_clear0_regs[9] = (void *)
+ ipi_clear0_regs[9] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + CLEAR0);
- ipi_clear0_regs[10] = (void *)
+ ipi_clear0_regs[10] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + CLEAR0);
- ipi_clear0_regs[11] = (void *)
+ ipi_clear0_regs[11] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + CLEAR0);
- ipi_clear0_regs[12] = (void *)
+ ipi_clear0_regs[12] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + CLEAR0);
- ipi_clear0_regs[13] = (void *)
+ ipi_clear0_regs[13] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + CLEAR0);
- ipi_clear0_regs[14] = (void *)
+ ipi_clear0_regs[14] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + CLEAR0);
- ipi_clear0_regs[15] = (void *)
+ ipi_clear0_regs[15] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + CLEAR0);
}
static void ipi_status0_regs_init(void)
{
- ipi_status0_regs[0] = (void *)
+ ipi_status0_regs[0] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + STATUS0);
- ipi_status0_regs[1] = (void *)
+ ipi_status0_regs[1] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + STATUS0);
- ipi_status0_regs[2] = (void *)
+ ipi_status0_regs[2] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + STATUS0);
- ipi_status0_regs[3] = (void *)
+ ipi_status0_regs[3] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + STATUS0);
- ipi_status0_regs[4] = (void *)
+ ipi_status0_regs[4] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + STATUS0);
- ipi_status0_regs[5] = (void *)
+ ipi_status0_regs[5] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + STATUS0);
- ipi_status0_regs[6] = (void *)
+ ipi_status0_regs[6] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + STATUS0);
- ipi_status0_regs[7] = (void *)
+ ipi_status0_regs[7] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + STATUS0);
- ipi_status0_regs[8] = (void *)
+ ipi_status0_regs[8] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + STATUS0);
- ipi_status0_regs[9] = (void *)
+ ipi_status0_regs[9] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + STATUS0);
- ipi_status0_regs[10] = (void *)
+ ipi_status0_regs[10] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + STATUS0);
- ipi_status0_regs[11] = (void *)
+ ipi_status0_regs[11] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + STATUS0);
- ipi_status0_regs[12] = (void *)
+ ipi_status0_regs[12] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + STATUS0);
- ipi_status0_regs[13] = (void *)
+ ipi_status0_regs[13] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + STATUS0);
- ipi_status0_regs[14] = (void *)
+ ipi_status0_regs[14] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + STATUS0);
- ipi_status0_regs[15] = (void *)
+ ipi_status0_regs[15] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + STATUS0);
}
static void ipi_en0_regs_init(void)
{
- ipi_en0_regs[0] = (void *)
+ ipi_en0_regs[0] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + EN0);
- ipi_en0_regs[1] = (void *)
+ ipi_en0_regs[1] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + EN0);
- ipi_en0_regs[2] = (void *)
+ ipi_en0_regs[2] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + EN0);
- ipi_en0_regs[3] = (void *)
+ ipi_en0_regs[3] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + EN0);
- ipi_en0_regs[4] = (void *)
+ ipi_en0_regs[4] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + EN0);
- ipi_en0_regs[5] = (void *)
+ ipi_en0_regs[5] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + EN0);
- ipi_en0_regs[6] = (void *)
+ ipi_en0_regs[6] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + EN0);
- ipi_en0_regs[7] = (void *)
+ ipi_en0_regs[7] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + EN0);
- ipi_en0_regs[8] = (void *)
+ ipi_en0_regs[8] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + EN0);
- ipi_en0_regs[9] = (void *)
+ ipi_en0_regs[9] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + EN0);
- ipi_en0_regs[10] = (void *)
+ ipi_en0_regs[10] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + EN0);
- ipi_en0_regs[11] = (void *)
+ ipi_en0_regs[11] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + EN0);
- ipi_en0_regs[12] = (void *)
+ ipi_en0_regs[12] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + EN0);
- ipi_en0_regs[13] = (void *)
+ ipi_en0_regs[13] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + EN0);
- ipi_en0_regs[14] = (void *)
+ ipi_en0_regs[14] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + EN0);
- ipi_en0_regs[15] = (void *)
+ ipi_en0_regs[15] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + EN0);
}
static void ipi_mailbox_buf_init(void)
{
- ipi_mailbox_buf[0] = (void *)
+ ipi_mailbox_buf[0] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE0_OFFSET + BUF);
- ipi_mailbox_buf[1] = (void *)
+ ipi_mailbox_buf[1] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE1_OFFSET + BUF);
- ipi_mailbox_buf[2] = (void *)
+ ipi_mailbox_buf[2] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE2_OFFSET + BUF);
- ipi_mailbox_buf[3] = (void *)
+ ipi_mailbox_buf[3] = (void __iomem *)
(SMP_CORE_GROUP0_BASE + SMP_CORE3_OFFSET + BUF);
- ipi_mailbox_buf[4] = (void *)
+ ipi_mailbox_buf[4] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE0_OFFSET + BUF);
- ipi_mailbox_buf[5] = (void *)
+ ipi_mailbox_buf[5] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE1_OFFSET + BUF);
- ipi_mailbox_buf[6] = (void *)
+ ipi_mailbox_buf[6] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE2_OFFSET + BUF);
- ipi_mailbox_buf[7] = (void *)
+ ipi_mailbox_buf[7] = (void __iomem *)
(SMP_CORE_GROUP1_BASE + SMP_CORE3_OFFSET + BUF);
- ipi_mailbox_buf[8] = (void *)
+ ipi_mailbox_buf[8] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE0_OFFSET + BUF);
- ipi_mailbox_buf[9] = (void *)
+ ipi_mailbox_buf[9] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE1_OFFSET + BUF);
- ipi_mailbox_buf[10] = (void *)
+ ipi_mailbox_buf[10] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE2_OFFSET + BUF);
- ipi_mailbox_buf[11] = (void *)
+ ipi_mailbox_buf[11] = (void __iomem *)
(SMP_CORE_GROUP2_BASE + SMP_CORE3_OFFSET + BUF);
- ipi_mailbox_buf[12] = (void *)
+ ipi_mailbox_buf[12] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE0_OFFSET + BUF);
- ipi_mailbox_buf[13] = (void *)
+ ipi_mailbox_buf[13] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE1_OFFSET + BUF);
- ipi_mailbox_buf[14] = (void *)
+ ipi_mailbox_buf[14] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE2_OFFSET + BUF);
- ipi_mailbox_buf[15] = (void *)
+ ipi_mailbox_buf[15] = (void __iomem *)
(SMP_CORE_GROUP3_BASE + SMP_CORE3_OFFSET + BUF);
}
* Copyright (C) 2012 MIPS Technologies, Inc.
* Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org>
*/
+#include <linux/export.h>
#include <asm/asm.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
* Copyright (C) 2012 MIPS Technologies, Inc.
* Copyright (C) 2012 Ralf Baechle <ralf@linux-mips.org>
*/
+#include <linux/export.h>
#include <asm/asm.h>
-#include <asm/export.h>
#include <asm/regdef.h>
#define FASTPATH_SIZE 128
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <linux/clk.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
#include <linux/of_pci.h>
+#include <linux/platform_device.h>
#include <asm/addrspace.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/init.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
-#include <linux/of_pci.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
#include <asm/mach-ralink/rt288x.h>
*/
#include <linux/init.h>
#include <linux/io.h>
-#include <linux/of_platform.h>
+#include <linux/spinlock.h>
#include <asm/mach-pic32/pic32.h>
*/
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
+#include <linux/platform_device.h>
#include <asm/mach-ralink/ralink_regs.h>
#include <linux/io.h>
#include <linux/bitops.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/irqdomain.h>
#include <linux/of_fdt.h>
#include <linux/kernel.h>
#include <linux/memblock.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <asm/reboot.h>
*/
#include <linux/string.h>
-#include <linux/of_fdt.h>
-#include <linux/of_platform.h>
#include <asm/bootinfo.h>
#include <asm/addrspace.h>
unsigned short vid;
int cap66 = -1;
u16 stat;
+ int ret;
/* It seems SLC90E66 needs some time after PCI reset... */
mdelay(80);
for (pci_devfn = 0; pci_devfn < 0xff; pci_devfn++) {
if (PCI_FUNC(pci_devfn))
continue;
- if (early_read_config_word(hose, top_bus, current_bus,
- pci_devfn, PCI_VENDOR_ID, &vid) !=
- PCIBIOS_SUCCESSFUL)
+ ret = early_read_config_word(hose, top_bus, current_bus,
+ pci_devfn, PCI_VENDOR_ID, &vid);
+ if (ret != PCIBIOS_SUCCESSFUL)
continue;
if (vid == 0xffff)
continue;
static void final_fixup(struct pci_dev *dev)
{
+ unsigned long timeout;
unsigned char bist;
+ int ret;
/* Do build-in self test */
- if (pci_read_config_byte(dev, PCI_BIST, &bist) == PCIBIOS_SUCCESSFUL &&
- (bist & PCI_BIST_CAPABLE)) {
- unsigned long timeout;
- pci_set_power_state(dev, PCI_D0);
- pr_info("PCI: %s BIST...", pci_name(dev));
- pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
- timeout = jiffies + HZ * 2; /* timeout after 2 sec */
- do {
- pci_read_config_byte(dev, PCI_BIST, &bist);
- if (time_after(jiffies, timeout))
- break;
- } while (bist & PCI_BIST_START);
- if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
- pr_cont("failed. (0x%x)\n", bist);
- else
- pr_cont("OK.\n");
- }
+ ret = pci_read_config_byte(dev, PCI_BIST, &bist);
+ if ((ret != PCIBIOS_SUCCESSFUL) || !(bist & PCI_BIST_CAPABLE))
+ return;
+
+ pci_set_power_state(dev, PCI_D0);
+ pr_info("PCI: %s BIST...", pci_name(dev));
+ pci_write_config_byte(dev, PCI_BIST, PCI_BIST_START);
+ timeout = jiffies + HZ * 2; /* timeout after 2 sec */
+ do {
+ pci_read_config_byte(dev, PCI_BIST, &bist);
+ if (time_after(jiffies, timeout))
+ break;
+ } while (bist & PCI_BIST_START);
+ if (bist & (PCI_BIST_CODE_MASK | PCI_BIST_START))
+ pr_cont("failed. (0x%x)\n", bist);
+ else
+ pr_cont("OK.\n");
}
#ifdef CONFIG_TOSHIBA_FPCIB0
#ifndef CONFIG_MIPS_DISABLE_VDSO
global:
__vdso_clock_gettime;
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
__vdso_gettimeofday;
+#endif
__vdso_clock_getres;
#if _MIPS_SIM != _MIPS_SIM_ABI64
__vdso_clock_gettime64;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_OPENRISC_BUG_H
+#define __ASM_OPENRISC_BUG_H
+
+#include <asm-generic/bug.h>
+
+struct pt_regs;
+
+void __noreturn die(const char *str, struct pt_regs *regs, long err);
+
+#endif /* __ASM_OPENRISC_BUG_H */
#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET))
#define __pa(x) ((unsigned long) (x) - PAGE_OFFSET)
-#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
-#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT)
+static inline unsigned long virt_to_pfn(const void *kaddr)
+{
+ return __pa(kaddr) >> PAGE_SHIFT;
+}
+
+static inline void * pfn_to_virt(unsigned long pfn)
+{
+ return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT);
+}
#define virt_to_page(addr) \
(mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT))
void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp);
unsigned long __get_wchan(struct task_struct *p);
+void show_registers(struct pt_regs *regs);
#define cpu_relax() barrier()
*/
#define __KERNEL_SYSCALLS__
+#include <linux/cpu.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <asm/io.h>
#include <asm/processor.h>
#include <asm/spr_defs.h>
+#include <asm/switch_to.h>
#include <linux/smp.h>
void show_regs(struct pt_regs *regs)
{
- extern void show_registers(struct pt_regs *regs);
-
show_regs_print_info(KERN_DEFAULT);
/* __PHX__ cleanup this mess */
show_registers(regs);
#include <asm/thread_info.h>
#include <asm/page.h>
+asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
+
+asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
+
/*
* Copy the thread state to a regset that can be interpreted by userspace.
*
unsigned char retcode[16]; /* trampoline code */
};
+asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs);
+
+asmlinkage int do_work_pending(struct pt_regs *regs, unsigned int thread_flags,
+ int syscall);
+
static int restore_sigcontext(struct pt_regs *regs,
struct sigcontext __user *sc)
{
* mode below.
*/
-int do_signal(struct pt_regs *regs, int syscall)
+static int do_signal(struct pt_regs *regs, int syscall)
{
struct ksignal ksig;
unsigned long continue_addr = 0;
#include <asm/cacheflush.h>
#include <asm/time.h>
+asmlinkage __init void secondary_start_kernel(void);
+
static void (*smp_cross_call)(const struct cpumask *, unsigned int);
unsigned long secondary_release = -1;
#include <asm/cpuinfo.h>
#include <asm/time.h>
+irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs);
+
/* Test the timer ticks to count, used in sync routine */
inline void openrisc_timer_set(unsigned long count)
{
#include <linux/kallsyms.h>
#include <linux/uaccess.h>
+#include <asm/bug.h>
#include <asm/io.h>
+#include <asm/processor.h>
#include <asm/unwinder.h>
#include <asm/sections.h>
-static int kstack_depth_to_print = 0x180;
int lwa_flag;
static unsigned long __user *lwa_addr;
+asmlinkage void unhandled_exception(struct pt_regs *regs, int ea, int vector);
+asmlinkage void do_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_fpe_trap(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address);
+asmlinkage void do_illegal_instruction(struct pt_regs *regs,
+ unsigned long address);
+
static void print_trace(void *data, unsigned long addr, int reliable)
{
const char *loglvl = data;
printk("\n");
}
-void nommu_dump_state(struct pt_regs *regs,
- unsigned long ea, unsigned long vector)
-{
- int i;
- unsigned long addr, stack = regs->sp;
-
- printk("\n\r[nommu_dump_state] :: ea %lx, vector %lx\n\r", ea, vector);
-
- printk("CPU #: %d\n"
- " PC: %08lx SR: %08lx SP: %08lx\n",
- 0, regs->pc, regs->sr, regs->sp);
- printk("GPR00: %08lx GPR01: %08lx GPR02: %08lx GPR03: %08lx\n",
- 0L, regs->gpr[1], regs->gpr[2], regs->gpr[3]);
- printk("GPR04: %08lx GPR05: %08lx GPR06: %08lx GPR07: %08lx\n",
- regs->gpr[4], regs->gpr[5], regs->gpr[6], regs->gpr[7]);
- printk("GPR08: %08lx GPR09: %08lx GPR10: %08lx GPR11: %08lx\n",
- regs->gpr[8], regs->gpr[9], regs->gpr[10], regs->gpr[11]);
- printk("GPR12: %08lx GPR13: %08lx GPR14: %08lx GPR15: %08lx\n",
- regs->gpr[12], regs->gpr[13], regs->gpr[14], regs->gpr[15]);
- printk("GPR16: %08lx GPR17: %08lx GPR18: %08lx GPR19: %08lx\n",
- regs->gpr[16], regs->gpr[17], regs->gpr[18], regs->gpr[19]);
- printk("GPR20: %08lx GPR21: %08lx GPR22: %08lx GPR23: %08lx\n",
- regs->gpr[20], regs->gpr[21], regs->gpr[22], regs->gpr[23]);
- printk("GPR24: %08lx GPR25: %08lx GPR26: %08lx GPR27: %08lx\n",
- regs->gpr[24], regs->gpr[25], regs->gpr[26], regs->gpr[27]);
- printk("GPR28: %08lx GPR29: %08lx GPR30: %08lx GPR31: %08lx\n",
- regs->gpr[28], regs->gpr[29], regs->gpr[30], regs->gpr[31]);
- printk(" RES: %08lx oGPR11: %08lx\n",
- regs->gpr[11], regs->orig_gpr11);
-
- printk("Process %s (pid: %d, stackpage=%08lx)\n",
- ((struct task_struct *)(__pa(current)))->comm,
- ((struct task_struct *)(__pa(current)))->pid,
- (unsigned long)current);
-
- printk("\nStack: ");
- printk("Stack dump [0x%08lx]:\n", (unsigned long)stack);
- for (i = 0; i < kstack_depth_to_print; i++) {
- if (((long)stack & (THREAD_SIZE - 1)) == 0)
- break;
- stack++;
-
- printk("%lx :: sp + %02d: 0x%08lx\n", stack, i * 4,
- *((unsigned long *)(__pa(stack))));
- }
- printk("\n");
-
- printk("Call Trace: ");
- i = 1;
- while (((long)stack & (THREAD_SIZE - 1)) != 0) {
- addr = *((unsigned long *)__pa(stack));
- stack++;
-
- if (kernel_text_address(addr)) {
- if (i && ((i % 6) == 0))
- printk("\n ");
- printk(" [<%08lx>]", addr);
- i++;
- }
- }
- printk("\n");
-
- printk("\nCode: ");
-
- for (i = -24; i < 24; i++) {
- unsigned long word;
-
- word = ((unsigned long *)(__pa(regs->pc)))[i];
-
- print_data(regs->pc, word, i);
- }
- printk("\n");
-}
-
/* This is normally the 'Oops' routine */
void __noreturn die(const char *str, struct pt_regs *regs, long err)
{
#include <linux/perf_event.h>
#include <linux/uaccess.h>
+#include <asm/bug.h>
#include <asm/mmu_context.h>
#include <asm/siginfo.h>
#include <asm/signal.h>
*/
volatile pgd_t *current_pgd[NR_CPUS];
-extern void __noreturn die(char *, struct pt_regs *, long);
+asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long vector, int write_acc);
/*
* This routine handles page faults. It determines the address,
void __init paging_init(void)
{
- extern void tlb_init(void);
-
int i;
printk(KERN_INFO "Setting up paging and PTEs.\n");
extern int mem_init_done;
-/**
+/*
* OK, this one's a bit tricky... ioremap can get called before memory is
* initialized (early serial console does this) and will want to alloc a page
* for its mapping. No userspace pages will ever get allocated before memory
flush_tlb_mm(mm);
}
-
-/* called once during VM initialization, from init.c */
-
-void __init tlb_init(void)
-{
- /* Do nothing... */
- /* invalidate the entire TLB */
- /* flush_tlb_all(); */
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * linux/include/asm-parisc/ide.h
- *
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- */
-
-/*
- * This file contains the PARISC architecture specific IDE code.
- */
-
-#ifndef __ASM_PARISC_IDE_H
-#define __ASM_PARISC_IDE_H
-
-/* Generic I/O and MEMIO string operations. */
-
-#define __ide_insw insw
-#define __ide_insl insl
-#define __ide_outsw outsw
-#define __ide_outsl outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- *(u16 *)addr = __raw_readw(port);
- addr += 2;
- }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- *(u32 *)addr = __raw_readl(port);
- addr += 4;
- }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- __raw_writew(*(u16 *)addr, port);
- addr += 2;
- }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- __raw_writel(*(u32 *)addr, port);
- addr += 4;
- }
-}
-
-#endif /* __ASM_PARISC_IDE_H */
+# Help: Disable -Werror
CONFIG_PPC_DISABLE_WERROR=y
+# Help: Common security options for PowerPC builds
+
# This is the equivalent of booting with lockdown=integrity
CONFIG_SECURITY=y
CONFIG_SECURITYFS=y
# UBSAN bounds checking is very cheap and good for hardening
CONFIG_UBSAN=y
-# CONFIG_UBSAN_MISC is not set
\ No newline at end of file
+# CONFIG_UBSAN_MISC is not set
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 1994-1996 Linus Torvalds & authors
- *
- * This file contains the powerpc architecture specific IDE code.
- */
-#ifndef _ASM_POWERPC_IDE_H
-#define _ASM_POWERPC_IDE_H
-
-#include <linux/compiler.h>
-#include <asm/io.h>
-
-#define __ide_mm_insw(p, a, c) readsw((void __iomem *)(p), (a), (c))
-#define __ide_mm_insl(p, a, c) readsl((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsw(p, a, c) writesw((void __iomem *)(p), (a), (c))
-#define __ide_mm_outsl(p, a, c) writesl((void __iomem *)(p), (a), (c))
-
-#endif /* _ASM_POWERPC_IDE_H */
+# Help: Build a 32-bit image
CONFIG_ARCH_RV32I=y
CONFIG_32BIT=y
# CONFIG_PORTABLE is not set
+# Help: Build a 64-bit image
CONFIG_ARCH_RV64I=y
CONFIG_64BIT=y
#ifndef CONFIG_64BIT
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
+#define SATP_MODE_SHIFT 31
#define SATP_ASID_BITS 9
#define SATP_ASID_SHIFT 22
#define SATP_ASID_MASK _AC(0x1FF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
#define SATP_MODE_57 _AC(0xa000000000000000, UL)
+#define SATP_MODE_SHIFT 60
#define SATP_ASID_BITS 16
#define SATP_ASID_SHIFT 44
#define SATP_ASID_MASK _AC(0xFFFF, UL)
void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch);
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu);
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu);
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+ u64 __user *uindices);
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg);
+
int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq);
void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu);
#endif
int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg,
- unsigned long rtype);
+ const struct kvm_one_reg *reg);
int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg,
- unsigned long rtype);
+ const struct kvm_one_reg *reg);
#endif
unsigned long marchid;
unsigned long mimpid;
unsigned long zicboz_block_size;
+ unsigned long satp_mode;
};
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
KVM_RISCV_ISA_EXT_SSAIA,
KVM_RISCV_ISA_EXT_V,
KVM_RISCV_ISA_EXT_SVNAPOT,
+ KVM_RISCV_ISA_EXT_ZBA,
+ KVM_RISCV_ISA_EXT_ZBS,
+ KVM_RISCV_ISA_EXT_ZICNTR,
+ KVM_RISCV_ISA_EXT_ZICSR,
+ KVM_RISCV_ISA_EXT_ZIFENCEI,
+ KVM_RISCV_ISA_EXT_ZIHPM,
KVM_RISCV_ISA_EXT_MAX,
};
/* ISA Extension registers are mapped as type 7 */
#define KVM_REG_RISCV_ISA_EXT (0x07 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_SINGLE (0x0 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_EN (0x1 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_DIS (0x2 << KVM_REG_RISCV_SUBTYPE_SHIFT)
+#define KVM_REG_RISCV_ISA_MULTI_REG(__ext_id) \
+ ((__ext_id) / __BITS_PER_LONG)
+#define KVM_REG_RISCV_ISA_MULTI_MASK(__ext_id) \
+ (1UL << ((__ext_id) % __BITS_PER_LONG))
+#define KVM_REG_RISCV_ISA_MULTI_REG_LAST \
+ KVM_REG_RISCV_ISA_MULTI_REG(KVM_RISCV_ISA_EXT_MAX - 1)
/* SBI extension registers are mapped as type 8 */
#define KVM_REG_RISCV_SBI_EXT (0x08 << KVM_REG_RISCV_TYPE_SHIFT)
kvm-y += vcpu_fp.o
kvm-y += vcpu_vector.o
kvm-y += vcpu_insn.o
+kvm-y += vcpu_onereg.o
kvm-y += vcpu_switch.o
kvm-y += vcpu_sbi.o
kvm-$(CONFIG_RISCV_SBI_V01) += vcpu_sbi_v01.o
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
- return -EINVAL;
+ return -ENOENT;
*out_val = 0;
if (kvm_riscv_aia_available())
struct kvm_vcpu_aia_csr *csr = &vcpu->arch.aia_context.guest_csr;
if (reg_num >= sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long))
- return -EINVAL;
+ return -ENOENT;
if (kvm_riscv_aia_available()) {
((unsigned long *)csr)[reg_num] = val;
{
}
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
-{
- kvm_flush_remote_tlbs(kvm);
-}
-
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free)
{
}
bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
int ret;
- kvm_pfn_t pfn = pte_pfn(range->pte);
+ kvm_pfn_t pfn = pte_pfn(range->arg.pte);
if (!kvm->arch.pgd)
return false;
#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/percpu.h>
-#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/sched/signal.h>
#include <linux/fs.h>
#include <linux/kvm_host.h>
#include <asm/csr.h>
#include <asm/cacheflush.h>
-#include <asm/hwcap.h>
-#include <asm/sbi.h>
-#include <asm/vector.h>
#include <asm/kvm_vcpu_vector.h>
const struct _kvm_stats_desc kvm_vcpu_stats_desc[] = {
sizeof(kvm_vcpu_stats_desc),
};
-#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
-
-#define KVM_ISA_EXT_ARR(ext) [KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
-
-/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
-static const unsigned long kvm_isa_ext_arr[] = {
- [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
- [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
- [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
- [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
- [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
- [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
- [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
- [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
-
- KVM_ISA_EXT_ARR(SSAIA),
- KVM_ISA_EXT_ARR(SSTC),
- KVM_ISA_EXT_ARR(SVINVAL),
- KVM_ISA_EXT_ARR(SVNAPOT),
- KVM_ISA_EXT_ARR(SVPBMT),
- KVM_ISA_EXT_ARR(ZBB),
- KVM_ISA_EXT_ARR(ZIHINTPAUSE),
- KVM_ISA_EXT_ARR(ZICBOM),
- KVM_ISA_EXT_ARR(ZICBOZ),
-};
-
-static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
-{
- unsigned long i;
-
- for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
- if (kvm_isa_ext_arr[i] == base_ext)
- return i;
- }
-
- return KVM_RISCV_ISA_EXT_MAX;
-}
-
-static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
-{
- switch (ext) {
- case KVM_RISCV_ISA_EXT_H:
- return false;
- case KVM_RISCV_ISA_EXT_V:
- return riscv_v_vstate_ctrl_user_allowed();
- default:
- break;
- }
-
- return true;
-}
-
-static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
-{
- switch (ext) {
- case KVM_RISCV_ISA_EXT_A:
- case KVM_RISCV_ISA_EXT_C:
- case KVM_RISCV_ISA_EXT_I:
- case KVM_RISCV_ISA_EXT_M:
- case KVM_RISCV_ISA_EXT_SSAIA:
- case KVM_RISCV_ISA_EXT_SSTC:
- case KVM_RISCV_ISA_EXT_SVINVAL:
- case KVM_RISCV_ISA_EXT_SVNAPOT:
- case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
- case KVM_RISCV_ISA_EXT_ZBB:
- return false;
- default:
- break;
- }
-
- return true;
-}
-
static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu)
{
struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
int rc;
struct kvm_cpu_context *cntx;
struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr;
- unsigned long host_isa, i;
/* Mark this VCPU never ran */
vcpu->arch.ran_atleast_once = false;
bitmap_zero(vcpu->arch.isa, RISCV_ISA_EXT_MAX);
/* Setup ISA features available to VCPU */
- for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
- host_isa = kvm_isa_ext_arr[i];
- if (__riscv_isa_extension_available(NULL, host_isa) &&
- kvm_riscv_vcpu_isa_enable_allowed(i))
- set_bit(host_isa, vcpu->arch.isa);
- }
+ kvm_riscv_vcpu_setup_isa(vcpu);
/* Setup vendor, arch, and implementation details */
vcpu->arch.mvendorid = sbi_get_mvendorid();
return VM_FAULT_SIGBUS;
}
-static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CONFIG);
- unsigned long reg_val;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- switch (reg_num) {
- case KVM_REG_RISCV_CONFIG_REG(isa):
- reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
- break;
- case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
- if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
- return -EINVAL;
- reg_val = riscv_cbom_block_size;
- break;
- case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
- if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
- return -EINVAL;
- reg_val = riscv_cboz_block_size;
- break;
- case KVM_REG_RISCV_CONFIG_REG(mvendorid):
- reg_val = vcpu->arch.mvendorid;
- break;
- case KVM_REG_RISCV_CONFIG_REG(marchid):
- reg_val = vcpu->arch.marchid;
- break;
- case KVM_REG_RISCV_CONFIG_REG(mimpid):
- reg_val = vcpu->arch.mimpid;
- break;
- default:
- return -EINVAL;
- }
-
- if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CONFIG);
- unsigned long i, isa_ext, reg_val;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- switch (reg_num) {
- case KVM_REG_RISCV_CONFIG_REG(isa):
- /*
- * This ONE REG interface is only defined for
- * single letter extensions.
- */
- if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
- return -EINVAL;
-
- if (!vcpu->arch.ran_atleast_once) {
- /* Ignore the enable/disable request for certain extensions */
- for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
- isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
- if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
- reg_val &= ~BIT(i);
- continue;
- }
- if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
- if (reg_val & BIT(i))
- reg_val &= ~BIT(i);
- if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
- if (!(reg_val & BIT(i)))
- reg_val |= BIT(i);
- }
- reg_val &= riscv_isa_extension_base(NULL);
- /* Do not modify anything beyond single letter extensions */
- reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
- (reg_val & KVM_RISCV_BASE_ISA_MASK);
- vcpu->arch.isa[0] = reg_val;
- kvm_riscv_vcpu_fp_reset(vcpu);
- } else {
- return -EOPNOTSUPP;
- }
- break;
- case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
- return -EOPNOTSUPP;
- case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
- return -EOPNOTSUPP;
- case KVM_REG_RISCV_CONFIG_REG(mvendorid):
- if (!vcpu->arch.ran_atleast_once)
- vcpu->arch.mvendorid = reg_val;
- else
- return -EBUSY;
- break;
- case KVM_REG_RISCV_CONFIG_REG(marchid):
- if (!vcpu->arch.ran_atleast_once)
- vcpu->arch.marchid = reg_val;
- else
- return -EBUSY;
- break;
- case KVM_REG_RISCV_CONFIG_REG(mimpid):
- if (!vcpu->arch.ran_atleast_once)
- vcpu->arch.mimpid = reg_val;
- else
- return -EBUSY;
- break;
- default:
- return -EINVAL;
- }
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CORE);
- unsigned long reg_val;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
- if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
- return -EINVAL;
-
- if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
- reg_val = cntx->sepc;
- else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
- reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
- reg_val = ((unsigned long *)cntx)[reg_num];
- else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
- reg_val = (cntx->sstatus & SR_SPP) ?
- KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
- else
- return -EINVAL;
-
- if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CORE);
- unsigned long reg_val;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
- if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
- return -EINVAL;
-
- if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
- cntx->sepc = reg_val;
- else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
- reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
- ((unsigned long *)cntx)[reg_num] = reg_val;
- else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
- if (reg_val == KVM_RISCV_MODE_S)
- cntx->sstatus |= SR_SPP;
- else
- cntx->sstatus &= ~SR_SPP;
- } else
- return -EINVAL;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
- unsigned long reg_num,
- unsigned long *out_val)
-{
- struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
- if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
- return -EINVAL;
-
- if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
- kvm_riscv_vcpu_flush_interrupts(vcpu);
- *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
- *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
- } else
- *out_val = ((unsigned long *)csr)[reg_num];
-
- return 0;
-}
-
-static inline int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
- unsigned long reg_num,
- unsigned long reg_val)
-{
- struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
-
- if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
- return -EINVAL;
-
- if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
- reg_val &= VSIP_VALID_MASK;
- reg_val <<= VSIP_TO_HVIP_SHIFT;
- }
-
- ((unsigned long *)csr)[reg_num] = reg_val;
-
- if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
- WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- int rc;
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CSR);
- unsigned long reg_val, reg_subtype;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
- reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
- switch (reg_subtype) {
- case KVM_REG_RISCV_CSR_GENERAL:
- rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val);
- break;
- case KVM_REG_RISCV_CSR_AIA:
- rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val);
- break;
- default:
- rc = -EINVAL;
- break;
- }
- if (rc)
- return rc;
-
- if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- int rc;
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_CSR);
- unsigned long reg_val, reg_subtype;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
- reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
- switch (reg_subtype) {
- case KVM_REG_RISCV_CSR_GENERAL:
- rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
- break;
- case KVM_REG_RISCV_CSR_AIA:
- rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
- break;
- default:
- rc = -EINVAL;
- break;
- }
- if (rc)
- return rc;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_ISA_EXT);
- unsigned long reg_val = 0;
- unsigned long host_isa_ext;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
- reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
- return -EINVAL;
-
- host_isa_ext = kvm_isa_ext_arr[reg_num];
- if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
- reg_val = 1; /* Mark the given extension as available */
-
- if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- unsigned long __user *uaddr =
- (unsigned long __user *)(unsigned long)reg->addr;
- unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
- KVM_REG_SIZE_MASK |
- KVM_REG_RISCV_ISA_EXT);
- unsigned long reg_val;
- unsigned long host_isa_ext;
-
- if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
- return -EINVAL;
-
- if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
- reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
- return -EINVAL;
-
- if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
- return -EFAULT;
-
- host_isa_ext = kvm_isa_ext_arr[reg_num];
- if (!__riscv_isa_extension_available(NULL, host_isa_ext))
- return -EOPNOTSUPP;
-
- if (!vcpu->arch.ran_atleast_once) {
- /*
- * All multi-letter extension and a few single letter
- * extension can be disabled
- */
- if (reg_val == 1 &&
- kvm_riscv_vcpu_isa_enable_allowed(reg_num))
- set_bit(host_isa_ext, vcpu->arch.isa);
- else if (!reg_val &&
- kvm_riscv_vcpu_isa_disable_allowed(reg_num))
- clear_bit(host_isa_ext, vcpu->arch.isa);
- else
- return -EINVAL;
- kvm_riscv_vcpu_fp_reset(vcpu);
- } else {
- return -EOPNOTSUPP;
- }
-
- return 0;
-}
-
-static int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
- case KVM_REG_RISCV_CONFIG:
- return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
- case KVM_REG_RISCV_CORE:
- return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
- case KVM_REG_RISCV_CSR:
- return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
- case KVM_REG_RISCV_TIMER:
- return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
- case KVM_REG_RISCV_FP_F:
- return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
- KVM_REG_RISCV_FP_F);
- case KVM_REG_RISCV_FP_D:
- return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
- KVM_REG_RISCV_FP_D);
- case KVM_REG_RISCV_ISA_EXT:
- return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
- case KVM_REG_RISCV_SBI_EXT:
- return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_set_reg_vector(vcpu, reg,
- KVM_REG_RISCV_VECTOR);
- default:
- break;
- }
-
- return -EINVAL;
-}
-
-static int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg)
-{
- switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
- case KVM_REG_RISCV_CONFIG:
- return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
- case KVM_REG_RISCV_CORE:
- return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
- case KVM_REG_RISCV_CSR:
- return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
- case KVM_REG_RISCV_TIMER:
- return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
- case KVM_REG_RISCV_FP_F:
- return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
- KVM_REG_RISCV_FP_F);
- case KVM_REG_RISCV_FP_D:
- return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
- KVM_REG_RISCV_FP_D);
- case KVM_REG_RISCV_ISA_EXT:
- return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
- case KVM_REG_RISCV_SBI_EXT:
- return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
- case KVM_REG_RISCV_VECTOR:
- return kvm_riscv_vcpu_get_reg_vector(vcpu, reg,
- KVM_REG_RISCV_VECTOR);
- default:
- break;
- }
-
- return -EINVAL;
-}
-
long kvm_arch_vcpu_async_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = kvm_riscv_vcpu_get_reg(vcpu, ®);
break;
}
+ case KVM_GET_REG_LIST: {
+ struct kvm_reg_list __user *user_list = argp;
+ struct kvm_reg_list reg_list;
+ unsigned int n;
+
+ r = -EFAULT;
+ if (copy_from_user(®_list, user_list, sizeof(reg_list)))
+ break;
+ n = reg_list.n;
+ reg_list.n = kvm_riscv_vcpu_num_regs(vcpu);
+ if (copy_to_user(user_list, ®_list, sizeof(reg_list)))
+ break;
+ r = -E2BIG;
+ if (n < reg_list.n)
+ break;
+ r = kvm_riscv_vcpu_copy_reg_indices(vcpu, user_list->reg);
+ break;
+ }
default:
break;
}
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
reg_val = &cntx->fp.f.f[reg_num];
else
- return -EINVAL;
+ return -ENOENT;
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
riscv_isa_extension_available(vcpu->arch.isa, d)) {
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
return -EINVAL;
reg_val = &cntx->fp.d.f[reg_num];
} else
- return -EINVAL;
+ return -ENOENT;
} else
- return -EINVAL;
+ return -ENOENT;
if (copy_to_user(uaddr, reg_val, KVM_REG_SIZE(reg->id)))
return -EFAULT;
reg_num <= KVM_REG_RISCV_FP_F_REG(f[31]))
reg_val = &cntx->fp.f.f[reg_num];
else
- return -EINVAL;
+ return -ENOENT;
} else if ((rtype == KVM_REG_RISCV_FP_D) &&
riscv_isa_extension_available(vcpu->arch.isa, d)) {
if (reg_num == KVM_REG_RISCV_FP_D_REG(fcsr)) {
return -EINVAL;
reg_val = &cntx->fp.d.f[reg_num];
} else
- return -EINVAL;
+ return -ENOENT;
} else
- return -EINVAL;
+ return -ENOENT;
if (copy_from_user(reg_val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2023 Ventana Micro Systems Inc.
+ *
+ * Authors:
+ * Anup Patel <apatel@ventanamicro.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/uaccess.h>
+#include <linux/kvm_host.h>
+#include <asm/cacheflush.h>
+#include <asm/hwcap.h>
+#include <asm/kvm_vcpu_vector.h>
+#include <asm/vector.h>
+
+#define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0)
+
+#define KVM_ISA_EXT_ARR(ext) \
+[KVM_RISCV_ISA_EXT_##ext] = RISCV_ISA_EXT_##ext
+
+/* Mapping between KVM ISA Extension ID & Host ISA extension ID */
+static const unsigned long kvm_isa_ext_arr[] = {
+ /* Single letter extensions (alphabetically sorted) */
+ [KVM_RISCV_ISA_EXT_A] = RISCV_ISA_EXT_a,
+ [KVM_RISCV_ISA_EXT_C] = RISCV_ISA_EXT_c,
+ [KVM_RISCV_ISA_EXT_D] = RISCV_ISA_EXT_d,
+ [KVM_RISCV_ISA_EXT_F] = RISCV_ISA_EXT_f,
+ [KVM_RISCV_ISA_EXT_H] = RISCV_ISA_EXT_h,
+ [KVM_RISCV_ISA_EXT_I] = RISCV_ISA_EXT_i,
+ [KVM_RISCV_ISA_EXT_M] = RISCV_ISA_EXT_m,
+ [KVM_RISCV_ISA_EXT_V] = RISCV_ISA_EXT_v,
+ /* Multi letter extensions (alphabetically sorted) */
+ KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(ZBA),
+ KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICNTR),
+ KVM_ISA_EXT_ARR(ZICSR),
+ KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZIHPM),
+};
+
+static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
+{
+ unsigned long i;
+
+ for (i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ if (kvm_isa_ext_arr[i] == base_ext)
+ return i;
+ }
+
+ return KVM_RISCV_ISA_EXT_MAX;
+}
+
+static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ case KVM_RISCV_ISA_EXT_H:
+ return false;
+ case KVM_RISCV_ISA_EXT_V:
+ return riscv_v_vstate_ctrl_user_allowed();
+ default:
+ break;
+ }
+
+ return true;
+}
+
+static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext)
+{
+ switch (ext) {
+ case KVM_RISCV_ISA_EXT_A:
+ case KVM_RISCV_ISA_EXT_C:
+ case KVM_RISCV_ISA_EXT_I:
+ case KVM_RISCV_ISA_EXT_M:
+ case KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_RISCV_ISA_EXT_ZICSR:
+ case KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ case KVM_RISCV_ISA_EXT_ZIHPM:
+ return false;
+ default:
+ break;
+ }
+
+ return true;
+}
+
+void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu)
+{
+ unsigned long host_isa, i;
+
+ for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) {
+ host_isa = kvm_isa_ext_arr[i];
+ if (__riscv_isa_extension_available(NULL, host_isa) &&
+ kvm_riscv_vcpu_isa_enable_allowed(i))
+ set_bit(host_isa, vcpu->arch.isa);
+ }
+}
+
+static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ reg_val = vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+ return -ENOENT;
+ reg_val = riscv_cbom_block_size;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+ return -ENOENT;
+ reg_val = riscv_cboz_block_size;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+ reg_val = vcpu->arch.mvendorid;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(marchid):
+ reg_val = vcpu->arch.marchid;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(mimpid):
+ reg_val = vcpu->arch.mimpid;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+ reg_val = satp_mode >> SATP_MODE_SHIFT;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CONFIG);
+ unsigned long i, isa_ext, reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_num) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ /*
+ * This ONE REG interface is only defined for
+ * single letter extensions.
+ */
+ if (fls(reg_val) >= RISCV_ISA_EXT_BASE)
+ return -EINVAL;
+
+ /*
+ * Return early (i.e. do nothing) if reg_val is the same
+ * value retrievable via kvm_riscv_vcpu_get_reg_config().
+ */
+ if (reg_val == (vcpu->arch.isa[0] & KVM_RISCV_BASE_ISA_MASK))
+ break;
+
+ if (!vcpu->arch.ran_atleast_once) {
+ /* Ignore the enable/disable request for certain extensions */
+ for (i = 0; i < RISCV_ISA_EXT_BASE; i++) {
+ isa_ext = kvm_riscv_vcpu_base2isa_ext(i);
+ if (isa_ext >= KVM_RISCV_ISA_EXT_MAX) {
+ reg_val &= ~BIT(i);
+ continue;
+ }
+ if (!kvm_riscv_vcpu_isa_enable_allowed(isa_ext))
+ if (reg_val & BIT(i))
+ reg_val &= ~BIT(i);
+ if (!kvm_riscv_vcpu_isa_disable_allowed(isa_ext))
+ if (!(reg_val & BIT(i)))
+ reg_val |= BIT(i);
+ }
+ reg_val &= riscv_isa_extension_base(NULL);
+ /* Do not modify anything beyond single letter extensions */
+ reg_val = (vcpu->arch.isa[0] & ~KVM_RISCV_BASE_ISA_MASK) |
+ (reg_val & KVM_RISCV_BASE_ISA_MASK);
+ vcpu->arch.isa[0] = reg_val;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EBUSY;
+ }
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+ return -ENOENT;
+ if (reg_val != riscv_cbom_block_size)
+ return -EINVAL;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+ return -ENOENT;
+ if (reg_val != riscv_cboz_block_size)
+ return -EINVAL;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+ if (reg_val == vcpu->arch.mvendorid)
+ break;
+ if (!vcpu->arch.ran_atleast_once)
+ vcpu->arch.mvendorid = reg_val;
+ else
+ return -EBUSY;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(marchid):
+ if (reg_val == vcpu->arch.marchid)
+ break;
+ if (!vcpu->arch.ran_atleast_once)
+ vcpu->arch.marchid = reg_val;
+ else
+ return -EBUSY;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(mimpid):
+ if (reg_val == vcpu->arch.mimpid)
+ break;
+ if (!vcpu->arch.ran_atleast_once)
+ vcpu->arch.mimpid = reg_val;
+ else
+ return -EBUSY;
+ break;
+ case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+ if (reg_val != (satp_mode >> SATP_MODE_SHIFT))
+ return -EINVAL;
+ break;
+ default:
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -ENOENT;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ reg_val = cntx->sepc;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ reg_val = ((unsigned long *)cntx)[reg_num];
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode))
+ reg_val = (cntx->sstatus & SR_SPP) ?
+ KVM_RISCV_MODE_S : KVM_RISCV_MODE_U;
+ else
+ return -ENOENT;
+
+ if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_core(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CORE);
+ unsigned long reg_val;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+ if (reg_num >= sizeof(struct kvm_riscv_core) / sizeof(unsigned long))
+ return -ENOENT;
+
+ if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ if (reg_num == KVM_REG_RISCV_CORE_REG(regs.pc))
+ cntx->sepc = reg_val;
+ else if (KVM_REG_RISCV_CORE_REG(regs.pc) < reg_num &&
+ reg_num <= KVM_REG_RISCV_CORE_REG(regs.t6))
+ ((unsigned long *)cntx)[reg_num] = reg_val;
+ else if (reg_num == KVM_REG_RISCV_CORE_REG(mode)) {
+ if (reg_val == KVM_RISCV_MODE_S)
+ cntx->sstatus |= SR_SPP;
+ else
+ cntx->sstatus &= ~SR_SPP;
+ } else
+ return -ENOENT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_general_get_csr(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long *out_val)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -ENOENT;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ kvm_riscv_vcpu_flush_interrupts(vcpu);
+ *out_val = (csr->hvip >> VSIP_TO_HVIP_SHIFT) & VSIP_VALID_MASK;
+ *out_val |= csr->hvip & ~IRQ_LOCAL_MASK;
+ } else
+ *out_val = ((unsigned long *)csr)[reg_num];
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_general_set_csr(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long reg_val)
+{
+ struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr;
+
+ if (reg_num >= sizeof(struct kvm_riscv_csr) / sizeof(unsigned long))
+ return -ENOENT;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip)) {
+ reg_val &= VSIP_VALID_MASK;
+ reg_val <<= VSIP_TO_HVIP_SHIFT;
+ }
+
+ ((unsigned long *)csr)[reg_num] = reg_val;
+
+ if (reg_num == KVM_REG_RISCV_CSR_REG(sip))
+ WRITE_ONCE(vcpu->arch.irqs_pending_mask[0], 0);
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ int rc;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val, reg_subtype;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_CSR_GENERAL:
+ rc = kvm_riscv_vcpu_general_get_csr(vcpu, reg_num, ®_val);
+ break;
+ case KVM_REG_RISCV_CSR_AIA:
+ rc = kvm_riscv_vcpu_aia_get_csr(vcpu, reg_num, ®_val);
+ break;
+ default:
+ rc = -ENOENT;
+ break;
+ }
+ if (rc)
+ return rc;
+
+ if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_csr(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ int rc;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_CSR);
+ unsigned long reg_val, reg_subtype;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_CSR_GENERAL:
+ rc = kvm_riscv_vcpu_general_set_csr(vcpu, reg_num, reg_val);
+ break;
+ case KVM_REG_RISCV_CSR_AIA:
+ rc = kvm_riscv_vcpu_aia_set_csr(vcpu, reg_num, reg_val);
+ break;
+ default:
+ rc = -ENOENT;
+ break;
+ }
+ if (rc)
+ return rc;
+
+ return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_single(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long *reg_val)
+{
+ unsigned long host_isa_ext;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+ reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -ENOENT;
+
+ *reg_val = 0;
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (__riscv_isa_extension_available(vcpu->arch.isa, host_isa_ext))
+ *reg_val = 1; /* Mark the given extension as available */
+
+ return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_single(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long reg_val)
+{
+ unsigned long host_isa_ext;
+
+ if (reg_num >= KVM_RISCV_ISA_EXT_MAX ||
+ reg_num >= ARRAY_SIZE(kvm_isa_ext_arr))
+ return -ENOENT;
+
+ host_isa_ext = kvm_isa_ext_arr[reg_num];
+ if (!__riscv_isa_extension_available(NULL, host_isa_ext))
+ return -ENOENT;
+
+ if (reg_val == test_bit(host_isa_ext, vcpu->arch.isa))
+ return 0;
+
+ if (!vcpu->arch.ran_atleast_once) {
+ /*
+ * All multi-letter extension and a few single letter
+ * extension can be disabled
+ */
+ if (reg_val == 1 &&
+ kvm_riscv_vcpu_isa_enable_allowed(reg_num))
+ set_bit(host_isa_ext, vcpu->arch.isa);
+ else if (!reg_val &&
+ kvm_riscv_vcpu_isa_disable_allowed(reg_num))
+ clear_bit(host_isa_ext, vcpu->arch.isa);
+ else
+ return -EINVAL;
+ kvm_riscv_vcpu_fp_reset(vcpu);
+ } else {
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static int riscv_vcpu_get_isa_ext_multi(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long *reg_val)
+{
+ unsigned long i, ext_id, ext_val;
+
+ if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+ return -ENOENT;
+
+ for (i = 0; i < BITS_PER_LONG; i++) {
+ ext_id = i + reg_num * BITS_PER_LONG;
+ if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+ break;
+
+ ext_val = 0;
+ riscv_vcpu_get_isa_ext_single(vcpu, ext_id, &ext_val);
+ if (ext_val)
+ *reg_val |= KVM_REG_RISCV_ISA_MULTI_MASK(ext_id);
+ }
+
+ return 0;
+}
+
+static int riscv_vcpu_set_isa_ext_multi(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ unsigned long reg_val, bool enable)
+{
+ unsigned long i, ext_id;
+
+ if (reg_num > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+ return -ENOENT;
+
+ for_each_set_bit(i, ®_val, BITS_PER_LONG) {
+ ext_id = i + reg_num * BITS_PER_LONG;
+ if (ext_id >= KVM_RISCV_ISA_EXT_MAX)
+ break;
+
+ riscv_vcpu_set_isa_ext_single(vcpu, ext_id, enable);
+ }
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_get_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ int rc;
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val, reg_subtype;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ reg_val = 0;
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_SINGLE:
+ rc = riscv_vcpu_get_isa_ext_single(vcpu, reg_num, ®_val);
+ break;
+ case KVM_REG_RISCV_ISA_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ rc = riscv_vcpu_get_isa_ext_multi(vcpu, reg_num, ®_val);
+ if (!rc && reg_subtype == KVM_REG_RISCV_ISA_MULTI_DIS)
+ reg_val = ~reg_val;
+ break;
+ default:
+ rc = -ENOENT;
+ }
+ if (rc)
+ return rc;
+
+ if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ unsigned long __user *uaddr =
+ (unsigned long __user *)(unsigned long)reg->addr;
+ unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
+ KVM_REG_SIZE_MASK |
+ KVM_REG_RISCV_ISA_EXT);
+ unsigned long reg_val, reg_subtype;
+
+ if (KVM_REG_SIZE(reg->id) != sizeof(unsigned long))
+ return -EINVAL;
+
+ reg_subtype = reg_num & KVM_REG_RISCV_SUBTYPE_MASK;
+ reg_num &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
+ return -EFAULT;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_SINGLE:
+ return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val);
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true);
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false);
+ default:
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int copy_config_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ int n = 0;
+
+ for (int i = 0; i < sizeof(struct kvm_riscv_config)/sizeof(unsigned long);
+ i++) {
+ u64 size;
+ u64 reg;
+
+ /*
+ * Avoid reporting config reg if the corresponding extension
+ * was not available.
+ */
+ if (i == KVM_REG_RISCV_CONFIG_REG(zicbom_block_size) &&
+ !riscv_isa_extension_available(vcpu->arch.isa, ZICBOM))
+ continue;
+ else if (i == KVM_REG_RISCV_CONFIG_REG(zicboz_block_size) &&
+ !riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+ continue;
+
+ size = IS_ENABLED(CONFIG_32BIT) ? KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CONFIG | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ n++;
+ }
+
+ return n;
+}
+
+static unsigned long num_config_regs(const struct kvm_vcpu *vcpu)
+{
+ return copy_config_reg_indices(vcpu, NULL);
+}
+
+static inline unsigned long num_core_regs(void)
+{
+ return sizeof(struct kvm_riscv_core) / sizeof(unsigned long);
+}
+
+static int copy_core_reg_indices(u64 __user *uindices)
+{
+ int n = num_core_regs();
+
+ for (int i = 0; i < n; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CORE | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ return n;
+}
+
+static inline unsigned long num_csr_regs(const struct kvm_vcpu *vcpu)
+{
+ unsigned long n = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+
+ if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA))
+ n += sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+ return n;
+}
+
+static int copy_csr_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ int n1 = sizeof(struct kvm_riscv_csr) / sizeof(unsigned long);
+ int n2 = 0;
+
+ /* copy general csr regs */
+ for (int i = 0; i < n1; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+ KVM_REG_RISCV_CSR_GENERAL | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ /* copy AIA csr regs */
+ if (riscv_isa_extension_available(vcpu->arch.isa, SSAIA)) {
+ n2 = sizeof(struct kvm_riscv_aia_csr) / sizeof(unsigned long);
+
+ for (int i = 0; i < n2; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_CSR |
+ KVM_REG_RISCV_CSR_AIA | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+ }
+
+ return n1 + n2;
+}
+
+static inline unsigned long num_timer_regs(void)
+{
+ return sizeof(struct kvm_riscv_timer) / sizeof(u64);
+}
+
+static int copy_timer_reg_indices(u64 __user *uindices)
+{
+ int n = num_timer_regs();
+
+ for (int i = 0; i < n; i++) {
+ u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+ KVM_REG_RISCV_TIMER | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ return n;
+}
+
+static inline unsigned long num_fp_f_regs(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ if (riscv_isa_extension_available(vcpu->arch.isa, f))
+ return sizeof(cntx->fp.f) / sizeof(u32);
+ else
+ return 0;
+}
+
+static int copy_fp_f_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ int n = num_fp_f_regs(vcpu);
+
+ for (int i = 0; i < n; i++) {
+ u64 reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 |
+ KVM_REG_RISCV_FP_F | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ return n;
+}
+
+static inline unsigned long num_fp_d_regs(const struct kvm_vcpu *vcpu)
+{
+ const struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
+
+ if (riscv_isa_extension_available(vcpu->arch.isa, d))
+ return sizeof(cntx->fp.d.f) / sizeof(u64) + 1;
+ else
+ return 0;
+}
+
+static int copy_fp_d_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ int i;
+ int n = num_fp_d_regs(vcpu);
+ u64 reg;
+
+ /* copy fp.d.f indices */
+ for (i = 0; i < n-1; i++) {
+ reg = KVM_REG_RISCV | KVM_REG_SIZE_U64 |
+ KVM_REG_RISCV_FP_D | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ /* copy fp.d.fcsr indices */
+ reg = KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | i;
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ return n;
+}
+
+static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ unsigned int n = 0;
+ unsigned long isa_ext;
+
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_ISA_EXT | i;
+
+ isa_ext = kvm_isa_ext_arr[i];
+ if (!__riscv_isa_extension_available(vcpu->arch.isa, isa_ext))
+ continue;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ n++;
+ }
+
+ return n;
+}
+
+static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu)
+{
+ return copy_isa_ext_reg_indices(vcpu, NULL);;
+}
+
+static inline unsigned long num_sbi_ext_regs(void)
+{
+ /*
+ * number of KVM_REG_RISCV_SBI_SINGLE +
+ * 2 x (number of KVM_REG_RISCV_SBI_MULTI)
+ */
+ return KVM_RISCV_SBI_EXT_MAX + 2*(KVM_REG_RISCV_SBI_MULTI_REG_LAST+1);
+}
+
+static int copy_sbi_ext_reg_indices(u64 __user *uindices)
+{
+ int n;
+
+ /* copy KVM_REG_RISCV_SBI_SINGLE */
+ n = KVM_RISCV_SBI_EXT_MAX;
+ for (int i = 0; i < n; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+ KVM_REG_RISCV_SBI_SINGLE | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ /* copy KVM_REG_RISCV_SBI_MULTI */
+ n = KVM_REG_RISCV_SBI_MULTI_REG_LAST + 1;
+ for (int i = 0; i < n; i++) {
+ u64 size = IS_ENABLED(CONFIG_32BIT) ?
+ KVM_REG_SIZE_U32 : KVM_REG_SIZE_U64;
+ u64 reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+ KVM_REG_RISCV_SBI_MULTI_EN | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+
+ reg = KVM_REG_RISCV | size | KVM_REG_RISCV_SBI_EXT |
+ KVM_REG_RISCV_SBI_MULTI_DIS | i;
+
+ if (uindices) {
+ if (put_user(reg, uindices))
+ return -EFAULT;
+ uindices++;
+ }
+ }
+
+ return num_sbi_ext_regs();
+}
+
+/*
+ * kvm_riscv_vcpu_num_regs - how many registers do we present via KVM_GET/SET_ONE_REG
+ *
+ * This is for all registers.
+ */
+unsigned long kvm_riscv_vcpu_num_regs(struct kvm_vcpu *vcpu)
+{
+ unsigned long res = 0;
+
+ res += num_config_regs(vcpu);
+ res += num_core_regs();
+ res += num_csr_regs(vcpu);
+ res += num_timer_regs();
+ res += num_fp_f_regs(vcpu);
+ res += num_fp_d_regs(vcpu);
+ res += num_isa_ext_regs(vcpu);
+ res += num_sbi_ext_regs();
+
+ return res;
+}
+
+/*
+ * kvm_riscv_vcpu_copy_reg_indices - get indices of all registers.
+ */
+int kvm_riscv_vcpu_copy_reg_indices(struct kvm_vcpu *vcpu,
+ u64 __user *uindices)
+{
+ int ret;
+
+ ret = copy_config_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_core_reg_indices(uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_csr_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_timer_reg_indices(uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_fp_f_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_fp_d_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_isa_ext_reg_indices(vcpu, uindices);
+ if (ret < 0)
+ return ret;
+ uindices += ret;
+
+ ret = copy_sbi_ext_reg_indices(uindices);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+int kvm_riscv_vcpu_set_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+ case KVM_REG_RISCV_CONFIG:
+ return kvm_riscv_vcpu_set_reg_config(vcpu, reg);
+ case KVM_REG_RISCV_CORE:
+ return kvm_riscv_vcpu_set_reg_core(vcpu, reg);
+ case KVM_REG_RISCV_CSR:
+ return kvm_riscv_vcpu_set_reg_csr(vcpu, reg);
+ case KVM_REG_RISCV_TIMER:
+ return kvm_riscv_vcpu_set_reg_timer(vcpu, reg);
+ case KVM_REG_RISCV_FP_F:
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ case KVM_REG_RISCV_FP_D:
+ return kvm_riscv_vcpu_set_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_ISA_EXT:
+ return kvm_riscv_vcpu_set_reg_isa_ext(vcpu, reg);
+ case KVM_REG_RISCV_SBI_EXT:
+ return kvm_riscv_vcpu_set_reg_sbi_ext(vcpu, reg);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_set_reg_vector(vcpu, reg);
+ default:
+ break;
+ }
+
+ return -ENOENT;
+}
+
+int kvm_riscv_vcpu_get_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg)
+{
+ switch (reg->id & KVM_REG_RISCV_TYPE_MASK) {
+ case KVM_REG_RISCV_CONFIG:
+ return kvm_riscv_vcpu_get_reg_config(vcpu, reg);
+ case KVM_REG_RISCV_CORE:
+ return kvm_riscv_vcpu_get_reg_core(vcpu, reg);
+ case KVM_REG_RISCV_CSR:
+ return kvm_riscv_vcpu_get_reg_csr(vcpu, reg);
+ case KVM_REG_RISCV_TIMER:
+ return kvm_riscv_vcpu_get_reg_timer(vcpu, reg);
+ case KVM_REG_RISCV_FP_F:
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_F);
+ case KVM_REG_RISCV_FP_D:
+ return kvm_riscv_vcpu_get_reg_fp(vcpu, reg,
+ KVM_REG_RISCV_FP_D);
+ case KVM_REG_RISCV_ISA_EXT:
+ return kvm_riscv_vcpu_get_reg_isa_ext(vcpu, reg);
+ case KVM_REG_RISCV_SBI_EXT:
+ return kvm_riscv_vcpu_get_reg_sbi_ext(vcpu, reg);
+ case KVM_REG_RISCV_VECTOR:
+ return kvm_riscv_vcpu_get_reg_vector(vcpu, reg);
+ default:
+ break;
+ }
+
+ return -ENOENT;
+}
const struct kvm_riscv_sbi_extension_entry *sext = NULL;
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
- if (reg_num >= KVM_RISCV_SBI_EXT_MAX ||
- (reg_val != 1 && reg_val != 0))
+ if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
+ return -ENOENT;
+
+ if (reg_val != 1 && reg_val != 0)
return -EINVAL;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context;
if (reg_num >= KVM_RISCV_SBI_EXT_MAX)
- return -EINVAL;
+ return -ENOENT;
for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) {
if (sbi_ext[i].ext_idx == reg_num) {
unsigned long i, ext_id;
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
- return -EINVAL;
+ return -ENOENT;
for_each_set_bit(i, ®_val, BITS_PER_LONG) {
ext_id = i + reg_num * BITS_PER_LONG;
unsigned long i, ext_id, ext_val;
if (reg_num > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
- return -EINVAL;
+ return -ENOENT;
for (i = 0; i < BITS_PER_LONG; i++) {
ext_id = i + reg_num * BITS_PER_LONG;
case KVM_REG_RISCV_SBI_MULTI_DIS:
return riscv_vcpu_set_sbi_ext_multi(vcpu, reg_num, reg_val, false);
default:
- return -EINVAL;
+ return -ENOENT;
}
return 0;
reg_val = ~reg_val;
break;
default:
- rc = -EINVAL;
+ rc = -ENOENT;
}
if (rc)
return rc;
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
return -EINVAL;
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
- return -EINVAL;
+ return -ENOENT;
switch (reg_num) {
case KVM_REG_RISCV_TIMER_REG(frequency):
KVM_RISCV_TIMER_STATE_OFF;
break;
default:
- return -EINVAL;
+ return -ENOENT;
}
if (copy_to_user(uaddr, ®_val, KVM_REG_SIZE(reg->id)))
if (KVM_REG_SIZE(reg->id) != sizeof(u64))
return -EINVAL;
if (reg_num >= sizeof(struct kvm_riscv_timer) / sizeof(u64))
- return -EINVAL;
+ return -ENOENT;
if (copy_from_user(®_val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
switch (reg_num) {
case KVM_REG_RISCV_TIMER_REG(frequency):
- ret = -EOPNOTSUPP;
+ if (reg_val != riscv_timebase)
+ return -EINVAL;
break;
case KVM_REG_RISCV_TIMER_REG(time):
gt->time_delta = reg_val - get_cycles64();
ret = kvm_riscv_vcpu_timer_cancel(t);
break;
default:
- ret = -EINVAL;
+ ret = -ENOENT;
break;
}
}
#endif
-static void *kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
- unsigned long reg_num,
- size_t reg_size)
+static int kvm_riscv_vcpu_vreg_addr(struct kvm_vcpu *vcpu,
+ unsigned long reg_num,
+ size_t reg_size,
+ void **reg_addr)
{
struct kvm_cpu_context *cntx = &vcpu->arch.guest_context;
- void *reg_val;
size_t vlenb = riscv_v_vsize / 32;
if (reg_num < KVM_REG_RISCV_VECTOR_REG(0)) {
if (reg_size != sizeof(unsigned long))
- return NULL;
+ return -EINVAL;
switch (reg_num) {
case KVM_REG_RISCV_VECTOR_CSR_REG(vstart):
- reg_val = &cntx->vector.vstart;
+ *reg_addr = &cntx->vector.vstart;
break;
case KVM_REG_RISCV_VECTOR_CSR_REG(vl):
- reg_val = &cntx->vector.vl;
+ *reg_addr = &cntx->vector.vl;
break;
case KVM_REG_RISCV_VECTOR_CSR_REG(vtype):
- reg_val = &cntx->vector.vtype;
+ *reg_addr = &cntx->vector.vtype;
break;
case KVM_REG_RISCV_VECTOR_CSR_REG(vcsr):
- reg_val = &cntx->vector.vcsr;
+ *reg_addr = &cntx->vector.vcsr;
break;
case KVM_REG_RISCV_VECTOR_CSR_REG(datap):
default:
- return NULL;
+ return -ENOENT;
}
} else if (reg_num <= KVM_REG_RISCV_VECTOR_REG(31)) {
if (reg_size != vlenb)
- return NULL;
- reg_val = cntx->vector.datap
- + (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
+ return -EINVAL;
+ *reg_addr = cntx->vector.datap +
+ (reg_num - KVM_REG_RISCV_VECTOR_REG(0)) * vlenb;
} else {
- return NULL;
+ return -ENOENT;
}
- return reg_val;
+ return 0;
}
int kvm_riscv_vcpu_get_reg_vector(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg,
- unsigned long rtype)
+ const struct kvm_one_reg *reg)
{
unsigned long *isa = vcpu->arch.isa;
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK |
- rtype);
- void *reg_val = NULL;
+ KVM_REG_RISCV_VECTOR);
size_t reg_size = KVM_REG_SIZE(reg->id);
+ void *reg_addr;
+ int rc;
- if (rtype == KVM_REG_RISCV_VECTOR &&
- riscv_isa_extension_available(isa, v)) {
- reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
- }
+ if (!riscv_isa_extension_available(isa, v))
+ return -ENOENT;
- if (!reg_val)
- return -EINVAL;
+ rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr);
+ if (rc)
+ return rc;
- if (copy_to_user(uaddr, reg_val, reg_size))
+ if (copy_to_user(uaddr, reg_addr, reg_size))
return -EFAULT;
return 0;
}
int kvm_riscv_vcpu_set_reg_vector(struct kvm_vcpu *vcpu,
- const struct kvm_one_reg *reg,
- unsigned long rtype)
+ const struct kvm_one_reg *reg)
{
unsigned long *isa = vcpu->arch.isa;
unsigned long __user *uaddr =
(unsigned long __user *)(unsigned long)reg->addr;
unsigned long reg_num = reg->id & ~(KVM_REG_ARCH_MASK |
KVM_REG_SIZE_MASK |
- rtype);
- void *reg_val = NULL;
+ KVM_REG_RISCV_VECTOR);
size_t reg_size = KVM_REG_SIZE(reg->id);
+ void *reg_addr;
+ int rc;
- if (rtype == KVM_REG_RISCV_VECTOR &&
- riscv_isa_extension_available(isa, v)) {
- reg_val = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size);
- }
+ if (!riscv_isa_extension_available(isa, v))
+ return -ENOENT;
- if (!reg_val)
- return -EINVAL;
+ rc = kvm_riscv_vcpu_vreg_addr(vcpu, reg_num, reg_size, ®_addr);
+ if (rc)
+ return rc;
- if (copy_from_user(reg_val, uaddr, reg_size))
+ if (copy_from_user(reg_addr, uaddr, reg_size))
return -EFAULT;
return 0;
};
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-int __bootdata(noexec_disabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
struct ipl_parameter_block __bootdata_preserved(ipl_block);
zlib_dfltcc_support = ZLIB_DFLTCC_FULL_DEBUG;
}
- if (!strcmp(param, "noexec")) {
- rc = kstrtobool(val, &enabled);
- if (!rc && !enabled)
- noexec_disabled = 1;
- }
-
if (!strcmp(param, "facilities") && val)
modify_fac_list(val);
}
if (test_facility(78))
machine.has_edat2 = 1;
- if (!noexec_disabled && test_facility(130)) {
+ if (test_facility(130))
machine.has_nx = 1;
- __ctl_set_bit(0, 20);
- }
}
static void setup_lpp(void)
if (kasan_pte_populate_zero_shadow(pte, mode))
continue;
entry = __pte(_pa(addr, PAGE_SIZE, mode));
- entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ entry = set_pte_bit(entry, PAGE_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pte_bit(entry, __pgprot(_PAGE_NOEXEC));
set_pte(pte, entry);
pages++;
}
continue;
if (can_large_pmd(pmd, addr, next)) {
entry = __pmd(_pa(addr, _SEGMENT_SIZE, mode));
- entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pmd_bit(entry, __pgprot(_SEGMENT_ENTRY_NOEXEC));
set_pmd(pmd, entry);
pages++;
continue;
continue;
if (can_large_pud(pud, addr, next)) {
entry = __pud(_pa(addr, _REGION3_SIZE, mode));
- entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ entry = set_pud_bit(entry, REGION3_KERNEL);
+ if (!machine.has_nx)
+ entry = clear_pud_bit(entry, __pgprot(_REGION_ENTRY_NOEXEC));
set_pud(pud, entry);
pages++;
continue;
+# Help: Enable BTF debug info
CONFIG_DEBUG_INFO_BTF=y
+# Help: Enable KASan for debugging
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_KASAN_VMALLOC=y
struct hlist_node list; /* Handler queueing. */
void (*handler)(struct airq_struct *airq, struct tpi_info *tpi_info);
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
- u8 lsi_mask; /* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
u8 flags;
};
* to DMA. It _is_ used for the s390 memory zone split at 2GB caused
* by the 31 bit heritage.
*/
-#define MAX_DMA_ADDRESS 0x80000000
+#define MAX_DMA_ADDRESS __va(0x80000000)
#endif /* _ASM_S390_DMA_H */
__u64 *fac_list;
u64 cpuid;
unsigned short ibc;
+ /* subset of available UV-features for pv-guests enabled by user space */
+ struct kvm_s390_vm_cpu_uv_feat uv_feat_guest;
};
typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
*/
#define __bootdata_preserved(var) __section(".boot.preserved.data." #var) var
-extern unsigned long __samode31, __eamode31;
-extern unsigned long __stext_amode31, __etext_amode31;
+extern char *__samode31, *__eamode31;
+extern char *__stext_amode31, *__etext_amode31;
#endif
#define SET_MEMORY_INV BIT(_SET_MEMORY_INV_BIT)
#define SET_MEMORY_DEF BIT(_SET_MEMORY_DEF_BIT)
-int __set_memory(unsigned long addr, int numpages, unsigned long flags);
-
-static inline int set_memory_ro(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_RO);
-}
-
-static inline int set_memory_rw(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_RW);
-}
-
-static inline int set_memory_nx(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_NX);
-}
-
-static inline int set_memory_x(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_X);
-}
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags);
#define set_memory_rox set_memory_rox
-static inline int set_memory_rox(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_RO | SET_MEMORY_X);
-}
-static inline int set_memory_rwnx(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_RW | SET_MEMORY_NX);
+/*
+ * Generate two variants of each set_memory() function:
+ *
+ * set_memory_yy(unsigned long addr, int numpages);
+ * __set_memory_yy(void *start, void *end);
+ *
+ * The second variant exists for both convenience to avoid the usual
+ * (unsigned long) casts, but unlike the first variant it can also be used
+ * for areas larger than 8TB, which may happen at memory initialization.
+ */
+#define __SET_MEMORY_FUNC(fname, flags) \
+static inline int fname(unsigned long addr, int numpages) \
+{ \
+ return __set_memory(addr, numpages, (flags)); \
+} \
+ \
+static inline int __##fname(void *start, void *end) \
+{ \
+ unsigned long numpages; \
+ \
+ numpages = (end - start) >> PAGE_SHIFT; \
+ return __set_memory((unsigned long)start, numpages, (flags)); \
}
-static inline int set_memory_4k(unsigned long addr, int numpages)
-{
- return __set_memory(addr, numpages, SET_MEMORY_4K);
-}
+__SET_MEMORY_FUNC(set_memory_ro, SET_MEMORY_RO)
+__SET_MEMORY_FUNC(set_memory_rw, SET_MEMORY_RW)
+__SET_MEMORY_FUNC(set_memory_nx, SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_x, SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rox, SET_MEMORY_RO | SET_MEMORY_X)
+__SET_MEMORY_FUNC(set_memory_rwnx, SET_MEMORY_RW | SET_MEMORY_NX)
+__SET_MEMORY_FUNC(set_memory_4k, SET_MEMORY_4K)
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
#define ZLIB_DFLTCC_INFLATE_ONLY 3
#define ZLIB_DFLTCC_FULL_DEBUG 4
-extern int noexec_disabled;
extern unsigned long ident_map_size;
extern unsigned long max_mappable;
enum uv_feat_ind {
BIT_UV_FEAT_MISC = 0,
BIT_UV_FEAT_AIV = 1,
+ BIT_UV_FEAT_AP = 4,
+ BIT_UV_FEAT_AP_INTR = 5,
};
struct uv_cb_header {
u64 guest_handle;
u64 conf_base_stor_origin;
u64 conf_virt_stor_origin;
- u64 reserved30;
+ u8 reserved30[6];
+ union {
+ struct {
+ u16 : 14;
+ u16 ap_instr_intr : 1;
+ u16 ap_allow_instr : 1;
+ };
+ u16 raw;
+ } flags;
u64 guest_stor_origin;
u64 guest_stor_len;
u64 guest_sca;
extern struct uv_info uv_info;
+static inline bool uv_has_feature(u8 feature_bit)
+{
+ if (feature_bit >= sizeof(uv_info.uv_feature_indications) * 8)
+ return false;
+ return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
+}
+
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
__u8 reserved[1728];
};
+#define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6
+#define KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST 7
+
+#define KVM_S390_VM_CPU_UV_FEAT_NR_BITS 64
+struct kvm_s390_vm_cpu_uv_feat {
+ union {
+ struct {
+ __u64 : 4;
+ __u64 ap : 1; /* bit 4 */
+ __u64 ap_intr : 1; /* bit 5 */
+ __u64 : 58;
+ };
+ __u64 feat;
+ };
+};
+
/* kvm attributes for crypto */
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
decompressor_handled_param(mem);
decompressor_handled_param(vmalloc);
decompressor_handled_param(dfltcc);
-decompressor_handled_param(noexec);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
#if IS_ENABLED(CONFIG_KVM)
S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
__ctl_set_bit(0, 17);
}
- if (test_facility(130) && !noexec_disabled) {
+ if (test_facility(130))
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
- __ctl_set_bit(0, 20);
- }
if (test_facility(133))
S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
if (test_facility(139) && (tod_clock_base.tod >> 63)) {
VMCOREINFO_SYMBOL(lowcore_ptr);
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
- vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31);
- vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31);
+ vmcoreinfo_append_str("SAMODE31=%lx\n", (unsigned long)__samode31);
+ vmcoreinfo_append_str("EAMODE31=%lx\n", (unsigned long)__eamode31);
vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
abs_lc = get_abs_lowcore();
abs_lc->vmcore_info = paddr_vmcoreinfo_note();
* relocated above 2 GB, because it has to use 31 bit addresses.
* Such code and data is part of the .amode31 section.
*/
-unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
-unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
-unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
-unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
+char __amode31_ref *__samode31 = _samode31;
+char __amode31_ref *__eamode31 = _eamode31;
+char __amode31_ref *__stext_amode31 = _stext_amode31;
+char __amode31_ref *__etext_amode31 = _etext_amode31;
struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
-int __bootdata(noexec_disabled);
unsigned long __bootdata_preserved(max_mappable);
unsigned long __bootdata(ident_map_size);
struct physmem_info __bootdata(physmem_info);
static void __init relocate_amode31_section(void)
{
unsigned long amode31_size = __eamode31 - __samode31;
- long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
- long *ptr;
+ long amode31_offset, *ptr;
+ amode31_offset = physmem_info.reserved[RR_AMODE31].start - (unsigned long)__samode31;
pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
/* Move original AMODE31 section to the new one */
- memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
+ memmove((void *)physmem_info.reserved[RR_AMODE31].start, __samode31, amode31_size);
/* Zero out the old AMODE31 section to catch invalid accesses within it */
- memset((void *)__samode31, 0, amode31_size);
+ memset(__samode31, 0, amode31_size);
/* Update all AMODE31 region references */
for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
* shared page from a different protected VM will automatically also
* transfer its ownership.
*/
- if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
return false;
if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
return false;
#define per_event(vcpu) (vcpu->arch.sie_block->iprcc & PGM_PER)
+static bool should_handle_per_event(const struct kvm_vcpu *vcpu)
+{
+ if (!guestdbg_enabled(vcpu) || !per_event(vcpu))
+ return false;
+ if (guestdbg_sstep_enabled(vcpu) &&
+ vcpu->arch.sie_block->iprcc != PGM_PER) {
+ /*
+ * __vcpu_run() will exit after delivering the concurrently
+ * indicated condition.
+ */
+ return false;
+ }
+ return true;
+}
+
static int handle_prog(struct kvm_vcpu *vcpu)
{
psw_t psw;
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EOPNOTSUPP;
- if (guestdbg_enabled(vcpu) && per_event(vcpu)) {
+ if (should_handle_per_event(vcpu)) {
rc = kvm_s390_handle_per_event(vcpu);
if (rc)
return rc;
return handle_instruction(vcpu);
}
+static bool should_handle_per_ifetch(const struct kvm_vcpu *vcpu, int rc)
+{
+ /* Process PER, also if the instruction is processed in user space. */
+ if (!(vcpu->arch.sie_block->icptstatus & 0x02))
+ return false;
+ if (rc != 0 && rc != -EOPNOTSUPP)
+ return false;
+ if (guestdbg_sstep_enabled(vcpu) && vcpu->arch.local_int.pending_irqs)
+ /* __vcpu_run() will exit after delivering the interrupt. */
+ return false;
+ return true;
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
int rc, per_rc = 0;
rc = handle_partial_execution(vcpu);
break;
case ICPT_KSS:
- rc = kvm_s390_skey_check_enable(vcpu);
- break;
+ /* Instruction will be redriven, skip the PER check. */
+ return kvm_s390_skey_check_enable(vcpu);
case ICPT_MCHKREQ:
case ICPT_INT_ENABLE:
/*
return -EOPNOTSUPP;
}
- /* process PER, also if the instruction is processed in user space */
- if (vcpu->arch.sie_block->icptstatus & 0x02 &&
- (!rc || rc == -EOPNOTSUPP))
+ if (should_handle_per_ifetch(vcpu, rc))
per_rc = kvm_s390_handle_per_ifetch_icpt(vcpu);
return per_rc ? per_rc : rc;
}
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc = 0;
+ bool delivered = false;
unsigned long irq_type;
unsigned long irqs;
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
+ delivered |= !rc;
+ }
+
+ /*
+ * We delivered at least one interrupt and modified the PC. Force a
+ * singlestep event now.
+ */
+ if (delivered && guestdbg_sstep_enabled(vcpu)) {
+ struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
+
+ debug_exit->addr = vcpu->arch.sie_block->gpsw.addr;
+ debug_exit->type = KVM_SINGLESTEP;
+ vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING;
}
set_intercept_indicators(vcpu);
static struct airq_struct gib_alert_irq = {
.handler = gib_alert_irq_handler,
- .lsi_ptr = &gib_alert_irq.lsi_mask,
};
void kvm_s390_gib_destroy(void)
rc = -EIO;
goto out_free_gib;
}
+ /* adapter interrupts used for AP (applicable here) don't use the LSI */
+ *gib_alert_irq.lsi_ptr = 0xff;
gib->nisc = nisc;
gib_origin = virt_to_phys(gib);
return 0;
}
+#define KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK \
+( \
+ ((struct kvm_s390_vm_cpu_uv_feat){ \
+ .ap = 1, \
+ .ap_intr = 1, \
+ }) \
+ .feat \
+)
+
+static int kvm_s390_set_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *ptr = (void __user *)attr->addr;
+ unsigned long data, filter;
+
+ filter = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+ if (get_user(data, &ptr->feat))
+ return -EFAULT;
+ if (!bitmap_subset(&data, &filter, KVM_S390_VM_CPU_UV_FEAT_NR_BITS))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ kvm->arch.model.uv_feat_guest.feat = data;
+ mutex_unlock(&kvm->lock);
+
+ VM_EVENT(kvm, 3, "SET: guest UV-feat: 0x%16.16lx", data);
+
+ return 0;
+}
+
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_set_processor_subfunc(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+ ret = kvm_s390_set_uv_feat(kvm, attr);
+ break;
}
return ret;
}
return 0;
}
+static int kvm_s390_get_processor_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+ unsigned long feat = kvm->arch.model.uv_feat_guest.feat;
+
+ if (put_user(feat, &dst->feat))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+ return 0;
+}
+
+static int kvm_s390_get_machine_uv_feat(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_uv_feat __user *dst = (void __user *)attr->addr;
+ unsigned long feat;
+
+ BUILD_BUG_ON(sizeof(*dst) != sizeof(uv_info.uv_feature_indications));
+
+ feat = uv_info.uv_feature_indications & KVM_S390_VM_CPU_UV_FEAT_GUEST_MASK;
+ if (put_user(feat, &dst->feat))
+ return -EFAULT;
+ VM_EVENT(kvm, 3, "GET: guest UV-feat: 0x%16.16lx", feat);
+
+ return 0;
+}
+
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = kvm_s390_get_machine_subfunc(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
+ ret = kvm_s390_get_processor_uv_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+ ret = kvm_s390_get_machine_uv_feat(kvm, attr);
+ break;
}
return ret;
}
case KVM_S390_VM_CPU_MACHINE_FEAT:
case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
+ case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = 0;
break;
default:
struct kvm_vcpu *vcpu;
/* Disable the GISA if the ultravisor does not support AIV. */
- if (!test_bit_inv(BIT_UV_FEAT_AIV, &uv_info.uv_feature_indications))
+ if (!uv_has_feature(BIT_UV_FEAT_AIV))
kvm_s390_gisa_disable(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
+ kvm->arch.model.uv_feat_guest.feat = 0;
+
kvm_s390_crypto_init(kvm);
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
if (!kvm_is_ucontrol(vcpu->kvm)) {
rc = kvm_s390_deliver_pending_interrupts(vcpu);
- if (rc)
+ if (rc || guestdbg_exit_pending(vcpu))
return rc;
}
do {
rc = vcpu_pre_run(vcpu);
- if (rc)
+ if (rc || guestdbg_exit_pending(vcpu))
break;
kvm_vcpu_srcu_read_unlock(vcpu);
{
struct kvm_vcpu *vcpu = filp->private_data;
void __user *argp = (void __user *)arg;
+ int rc;
switch (ioctl) {
case KVM_S390_IRQ: {
if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
return -EFAULT;
- return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+ break;
}
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
return -EFAULT;
if (s390int_to_s390irq(&s390int, &s390irq))
return -EINVAL;
- return kvm_s390_inject_vcpu(vcpu, &s390irq);
+ rc = kvm_s390_inject_vcpu(vcpu, &s390irq);
+ break;
}
+ default:
+ rc = -ENOIOCTLCMD;
+ break;
}
- return -ENOIOCTLCMD;
+
+ /*
+ * To simplify single stepping of userspace-emulated instructions,
+ * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see
+ * should_handle_per_ifetch()). However, if userspace emulation injects
+ * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens
+ * after (and not before) the interrupt delivery.
+ */
+ if (!rc)
+ vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
+
+ return rc;
}
static int kvm_s390_handle_pv_vcpu_dump(struct kvm_vcpu *vcpu,
WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
uvcb.header.rc, uvcb.header.rrc);
- WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+ WARN_ONCE(cc && uvcb.header.rc != 0x104,
+ "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
/* Intended memory leak on "impossible" error */
if (!cc)
uvcb.conf_base_stor_origin =
virt_to_phys((void *)kvm->arch.pv.stor_base);
uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
+ uvcb.flags.ap_allow_instr = kvm->arch.model.uv_feat_guest.ap;
+ uvcb.flags.ap_instr_intr = kvm->arch.model.uv_feat_guest.ap_intr;
cc = uv_call_sched(0, (u64)&uvcb);
*rc = uvcb.header.rc;
*rrc = uvcb.header.rrc;
- KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x",
- uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc);
+ KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x flags %04x",
+ uvcb.guest_handle, uvcb.guest_stor_len, *rc, *rrc, uvcb.flags.raw);
/* Outputs */
kvm->arch.pv.handle = uvcb.guest_handle;
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size;
- address_markers[AMODE31_START_NR].start_address = __samode31;
- address_markers[AMODE31_END_NR].start_address = __eamode31;
+ address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31;
+ address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31;
address_markers[MODULES_NR].start_address = MODULES_VADDR;
address_markers[MODULES_END_NR].start_address = MODULES_END;
address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore;
* reliable without the misc UV feature so we need to check
* for that as well.
*/
- if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications) &&
+ if (uv_has_feature(BIT_UV_FEAT_MISC) &&
!test_bit_inv(61, ®s->int_parm_long)) {
/*
* When this happens, userspace did something that it
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
- max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+ max_zone_pfns[ZONE_DMA] = virt_to_pfn(MAX_DMA_ADDRESS);
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
free_area_init(max_zone_pfns);
}
{
unsigned long size = __end_ro_after_init - __start_ro_after_init;
- set_memory_ro((unsigned long)__start_ro_after_init, size >> PAGE_SHIFT);
+ __set_memory_ro(__start_ro_after_init, __end_ro_after_init);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
debug_checkwx();
}
return rc;
}
-int __set_memory(unsigned long addr, int numpages, unsigned long flags)
+int __set_memory(unsigned long addr, unsigned long numpages, unsigned long flags)
{
unsigned long end;
int rc;
#include <linux/memory_hotplug.h>
#include <linux/memblock.h>
-#include <linux/kasan.h>
#include <linux/pfn.h>
#include <linux/mm.h>
#include <linux/init.h>
static void try_free_pmd_table(pud_t *pud, unsigned long start)
{
- const unsigned long end = start + PUD_SIZE;
pmd_t *pmd;
int i;
- /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
- if (end > VMALLOC_START)
- return;
-
pmd = pmd_offset(pud, start);
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
if (!pmd_none(*pmd))
static void try_free_pud_table(p4d_t *p4d, unsigned long start)
{
- const unsigned long end = start + P4D_SIZE;
pud_t *pud;
int i;
- /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
- if (end > VMALLOC_START)
- return;
-
pud = pud_offset(p4d, start);
for (i = 0; i < PTRS_PER_PUD; i++, pud++) {
if (!pud_none(*pud))
static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
{
- const unsigned long end = start + PGDIR_SIZE;
p4d_t *p4d;
int i;
- /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
- if (end > VMALLOC_START)
- return;
-
p4d = p4d_offset(pgd, start);
for (i = 0; i < PTRS_PER_P4D; i++, p4d++) {
if (!p4d_none(*p4d))
if (WARN_ON_ONCE(!PAGE_ALIGNED(start | end)))
return -EINVAL;
+ /* Don't mess with any tables not fully in 1:1 mapping & vmemmap area */
+ if (WARN_ON_ONCE(end > VMALLOC_START))
+ return -EINVAL;
for (addr = start; addr < end; addr = next) {
next = pgd_addr_end(addr, end);
pgd = pgd_offset_k(addr);
mutex_unlock(&vmem_mutex);
}
-static int __init memblock_region_cmp(const void *a, const void *b)
-{
- const struct memblock_region *r1 = a;
- const struct memblock_region *r2 = b;
-
- if (r1->base < r2->base)
- return -1;
- if (r1->base > r2->base)
- return 1;
- return 0;
-}
-
-static void __init memblock_region_swap(void *a, void *b, int size)
-{
- swap(*(struct memblock_region *)a, *(struct memblock_region *)b);
-}
-
-#ifdef CONFIG_KASAN
-#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-
-static inline int set_memory_kasan(unsigned long start, unsigned long end)
-{
- start = PAGE_ALIGN_DOWN(__sha(start));
- end = PAGE_ALIGN(__sha(end));
- return set_memory_rwnx(start, (end - start) >> PAGE_SHIFT);
-}
-#endif
-
-/*
- * map whole physical memory to virtual memory (identity mapping)
- * we reserve enough space in the vmalloc area for vmemmap to hotplug
- * additional memory segments.
- */
void __init vmem_map_init(void)
{
- struct memblock_region memory_rwx_regions[] = {
- {
- .base = 0,
- .size = sizeof(struct lowcore),
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __pa(_stext),
- .size = _etext - _stext,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __pa(_sinittext),
- .size = _einittext - _sinittext,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- {
- .base = __stext_amode31,
- .size = __etext_amode31 - __stext_amode31,
- .flags = MEMBLOCK_NONE,
-#ifdef CONFIG_NUMA
- .nid = NUMA_NO_NODE,
-#endif
- },
- };
- struct memblock_type memory_rwx = {
- .regions = memory_rwx_regions,
- .cnt = ARRAY_SIZE(memory_rwx_regions),
- .max = ARRAY_SIZE(memory_rwx_regions),
- };
- phys_addr_t base, end;
- u64 i;
-
+ __set_memory_rox(_stext, _etext);
+ __set_memory_ro(_etext, __end_rodata);
+ __set_memory_rox(_sinittext, _einittext);
+ __set_memory_rox(__stext_amode31, __etext_amode31);
/*
- * Set RW+NX attribute on all memory, except regions enumerated with
- * memory_rwx exclude type. These regions need different attributes,
- * which are enforced afterwards.
- *
- * __for_each_mem_range() iterate and exclude types should be sorted.
- * The relative location of _stext and _sinittext is hardcoded in the
- * linker script. However a location of __stext_amode31 and the kernel
- * image itself are chosen dynamically. Thus, sort the exclude type.
+ * If the BEAR-enhancement facility is not installed the first
+ * prefix page is used to return to the previous context with
+ * an LPSWE instruction and therefore must be executable.
*/
- sort(&memory_rwx_regions,
- ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
- memblock_region_cmp, memblock_region_swap);
- __for_each_mem_range(i, &memblock.memory, &memory_rwx,
- NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
- set_memory_rwnx((unsigned long)__va(base),
- (end - base) >> PAGE_SHIFT);
+ if (!static_key_enabled(&cpu_has_bear))
+ set_memory_x(0, 1);
+ if (debug_pagealloc_enabled()) {
+ /*
+ * Use RELOC_HIDE() as long as __va(0) translates to NULL,
+ * since performing pointer arithmetic on a NULL pointer
+ * has undefined behavior and generates compiler warnings.
+ */
+ __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size));
}
-
-#ifdef CONFIG_KASAN
- for_each_mem_range(i, &base, &end)
- set_memory_kasan(base, end);
-#endif
- set_memory_rox((unsigned long)_stext,
- (unsigned long)(_etext - _stext) >> PAGE_SHIFT);
- set_memory_ro((unsigned long)_etext,
- (unsigned long)(__end_rodata - _etext) >> PAGE_SHIFT);
- set_memory_rox((unsigned long)_sinittext,
- (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT);
- set_memory_rox(__stext_amode31,
- (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT);
-
- /* lowcore must be executable for LPSWE */
- if (static_key_enabled(&cpu_has_bear))
- set_memory_nx(0, 1);
- set_memory_nx(PAGE_SIZE, 1);
- if (debug_pagealloc_enabled())
- set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
-
+ if (MACHINE_HAS_NX)
+ ctl_set_bit(0, 20);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
*/
int r14_off; /* Offset of saved %r14 */
int run_ctx_off; /* Offset of struct bpf_tramp_run_ctx */
+ int tccnt_off; /* Offset of saved tailcall counter */
int do_fexit; /* do_fexit: label */
};
tjit->r14_off = alloc_stack(tjit, sizeof(u64));
tjit->run_ctx_off = alloc_stack(tjit,
sizeof(struct bpf_tramp_run_ctx));
+ tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
/* The caller has already reserved STACK_FRAME_OVERHEAD bytes. */
tjit->stack_size -= STACK_FRAME_OVERHEAD;
tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
/* aghi %r15,-stack_size */
EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
+ /* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
+ _EMIT6(0xd203f000 | tjit->tccnt_off,
+ 0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
if (nr_reg_args)
EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
(nr_stack_args * sizeof(u64) - 1) << 16 |
tjit->stack_args_off,
0xf000 | tjit->orig_stack_args_off);
+ /* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
/* lgr %r1,%r8 */
EMIT4(0xb9040000, REG_1, REG_8);
/* %r1() */
if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
tjit->retval_off);
+ /* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
+ _EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
+ 0xf000 | tjit->tccnt_off);
/* aghi %r15,stack_size */
EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
/* Emit an expoline for the following indirect jump. */
generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
generic-y += agp.h
-generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/* ide.h: SPARC PCI specific IDE glue.
- *
- * Copyright (C) 1997 David S. Miller (davem@davemloft.net)
- * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
- * Adaptation from sparc64 version to sparc by Pete Zaitcev.
- */
-
-#ifndef _SPARC_IDE_H
-#define _SPARC_IDE_H
-
-#ifdef __KERNEL__
-
-#include <asm/io.h>
-#ifdef CONFIG_SPARC64
-#include <asm/spitfire.h>
-#include <asm/cacheflush.h>
-#include <asm/page.h>
-#else
-#include <linux/pgtable.h>
-#include <asm/psr.h>
-#endif
-
-#define __ide_insl(data_reg, buffer, wcount) \
- __ide_insw(data_reg, buffer, (wcount)<<1)
-#define __ide_outsl(data_reg, buffer, wcount) \
- __ide_outsw(data_reg, buffer, (wcount)<<1)
-
-/* On sparc, I/O ports and MMIO registers are accessed identically. */
-#define __ide_mm_insw __ide_insw
-#define __ide_mm_insl __ide_insl
-#define __ide_mm_outsw __ide_outsw
-#define __ide_mm_outsl __ide_outsl
-
-static inline void __ide_insw(void __iomem *port, void *dst, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
- unsigned long end = (unsigned long)dst + (count << 1);
-#endif
- u16 *ps = dst;
- u32 *pi;
-
- if(((unsigned long)ps) & 0x2) {
- *ps++ = __raw_readw(port);
- count--;
- }
- pi = (u32 *)ps;
- while(count >= 2) {
- u32 w;
-
- w = __raw_readw(port) << 16;
- w |= __raw_readw(port);
- *pi++ = w;
- count -= 2;
- }
- ps = (u16 *)pi;
- if(count)
- *ps++ = __raw_readw(port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
- __flush_dcache_range((unsigned long)dst, end);
-#endif
-}
-
-static inline void __ide_outsw(void __iomem *port, const void *src, u32 count)
-{
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
- unsigned long end = (unsigned long)src + (count << 1);
-#endif
- const u16 *ps = src;
- const u32 *pi;
-
- if(((unsigned long)src) & 0x2) {
- __raw_writew(*ps++, port);
- count--;
- }
- pi = (const u32 *)ps;
- while(count >= 2) {
- u32 w;
-
- w = *pi++;
- __raw_writew((w >> 16), port);
- __raw_writew(w, port);
- count -= 2;
- }
- ps = (const u16 *)pi;
- if(count)
- __raw_writew(*ps, port);
-
-#if defined(CONFIG_SPARC64) && defined(DCACHE_ALIASING_POSSIBLE)
- __flush_dcache_range((unsigned long)src, end);
-#endif
-}
-
-#endif /* __KERNEL__ */
-
-#endif /* _SPARC_IDE_H */
* Copyright (C) 1997 Anton Blanchard (anton@progsoc.uts.edu.au)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/pgtable.h>
#include <asm/unistd.h>
#include <asm/asmmacro.h>
-#include <asm/export.h>
#define curptr g6
* CompactPCI platform by Eric Brower, 1999.
*/
+#include <linux/export.h>
#include <linux/version.h>
#include <linux/init.h>
#include <asm/thread_info.h> /* TI_UWINMASK */
#include <asm/errno.h>
#include <asm/pgtable.h> /* PGDIR_SHIFT */
-#include <asm/export.h>
.data
/* The following are used with the prom_vector node-ops to figure out
#include <linux/version.h>
#include <linux/errno.h>
+#include <linux/export.h>
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/estate.h>
#include <asm/sfafsr.h>
#include <asm/unistd.h>
-#include <asm/export.h>
/* This section from from _start to sparc64_boot_end should fit into
* 0x0000000000404000 to 0x0000000000408000.
*/
#ifdef __KERNEL__
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
-#include <asm/export.h>
#define GLOBAL_SPARE g7
#else
#define GLOBAL_SPARE g5
* Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asi.h>
#include <asm/ptrace.h>
#include <asm/visasm.h>
#include <asm/thread_info.h>
-#include <asm/export.h>
/* On entry: %o5=current FPRS value, %g7 is callers address */
/* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */
* Copyright (C) 1999 David S. Miller (davem@redhat.com)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
ENTRY(__ashldi3)
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
ENTRY(__ashrdi3)
* Copyright (C) 1999, 2007 2012 David S. Miller (davem@davemloft.net)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asi.h>
#include <asm/backoff.h>
-#include <asm/export.h>
.text
* Copyright (C) 2000, 2007 David S. Miller (davem@davemloft.net)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asi.h>
#include <asm/backoff.h>
-#include <asm/export.h>
.text
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/page.h>
-#include <asm/export.h>
/* Zero out 64 bytes of memory at (buf + offset).
* Assumes %g1 contains zero.
* Copyright (C) 2005 David S. Miller <davem@davemloft.net>
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
* BSD4.4 portable checksum routine
*/
+#include <linux/export.h>
#include <asm/errno.h>
-#include <asm/export.h>
#define CSUM_BIGCHUNK(buf, offset, sum, t0, t1, t2, t3, t4, t5) \
ldd [buf + offset + 0x00], t0; \
* BSD4.4 portable checksum routine
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
csum_partial_fix_alignment:
* Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
*/
+#include <linux/export.h>
#include <linux/pgtable.h>
#include <asm/visasm.h>
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/spitfire.h>
#include <asm/head.h>
-#include <asm/export.h>
/* What we used to do was lock a TLB entry into a specific
* TLB slot, clear the page with interrupts disabled, then
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asi.h>
-#include <asm/export.h>
#define XCC xcc
* Copyright (C) 1997 Jakub Jelinek (jakub@redhat.com)
*/
+#include <linux/export.h>
#include <asm/visasm.h>
#include <asm/thread_info.h>
#include <asm/page.h>
#include <linux/pgtable.h>
#include <asm/spitfire.h>
#include <asm/head.h>
-#include <asm/export.h>
/* What we used to do was lock a TLB entry into a specific
* TLB slot, clear the page with interrupts disabled, then
* Returns 0 if successful, otherwise count of bytes not copied yet
*/
+#include <linux/export.h>
#include <asm/ptrace.h>
#include <asm/asmmacro.h>
#include <asm/page.h>
#include <asm/thread_info.h>
-#include <asm/export.h>
/* Work around cpp -rob */
#define ALLOC #alloc
* Copyright (C) 2005 David S. Miller <davem@davemloft.net>
*/
-#include <asm/export.h>
+#include <linux/export.h>
#ifdef __KERNEL__
#define GLOBAL_SPARE %g7
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.globl __divdi3
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.register %g2,#scratch
* and onward.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
.register %g2, #scratch
* and onward.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
.register %g2, #scratch
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
.align 32
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
ENTRY(ip_fast_csum) /* %o0 = iph, %o1 = ihl */
* Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz)
*/
+#include <linux/export.h>
#include <asm/ptrace.h>
#include <asm/psr.h>
#include <asm/smp.h>
#include <asm/spinlock.h>
-#include <asm/export.h>
.text
.align 4
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
ENTRY(__lshrdi3)
cmp %o2, 0
* This can also be tweaked for kernel stack overflow detection.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
/*
* This is the main variant and is called by C code. GCC's -pg option
* Copyright (C) 2000, 2008 David S. Miller (davem@davemloft.net)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm/export.h>
.text
ENTRY(memcmp)
* Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
-#include <asm/export.h>
+#include <linux/export.h>
+
#define FUNC(x) \
.globl x; \
.type x,@function; \
* Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
* Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu)
*/
-#include <asm/export.h>
+#include <linux/export.h>
/* In essence, this is just a fancy strlen. */
* Copyright (C) 1998 David S. Miller (davem@redhat.com)
*/
- #include <asm/export.h>
+#include <linux/export.h>
#define HI_MAGIC 0x8080808080808080
#define LO_MAGIC 0x0101010101010101
* clear_user.
*/
+#include <linux/export.h>
#include <asm/ptrace.h>
-#include <asm/export.h>
/* Work around cpp -rob */
#define ALLOC #alloc
*/
-#include <asm/export.h>
+#include <linux/export.h>
.text
.align 4
.globl __muldi3
/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
.align 4
* Copyright (C) 1996, 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm/export.h>
#define LO_MAGIC 0x01010101
#define HI_MAGIC 0x80808080
* generic strncmp routine.
*/
+#include <linux/export.h>
#include <linux/linkage.h>
-#include <asm/export.h>
.text
ENTRY(strncmp)
* Copyright (C) 1997 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asi.h>
-#include <asm/export.h>
.text
ENTRY(strncmp)
* Copyright (C) 2006 David S. Miller <davem@davemloft.net>
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/visasm.h>
#include <asm/asi.h>
#include <asm/dcu.h>
#include <asm/spitfire.h>
-#include <asm/export.h>
/*
* Requirements:
goto no_cache_flush;
/* A real file page? */
+ folio = page_folio(page);
mapping = folio_flush_mapping(folio);
if (!mapping)
goto no_cache_flush;
echo ' bzdisk/fdimage*/hdimage/isoimage also accept:'
echo ' FDARGS="..." arguments for the booted kernel'
echo ' FDINITRD=file initrd for the booted kernel'
- echo ''
- echo ' kvm_guest.config - Enable Kconfig items for running this kernel as a KVM guest'
- echo ' xen.config - Enable Kconfig items for running this kernel as a Xen guest'
- echo ' x86_debug.config - Enable tip tree debugging options for testing'
endef
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
#endif
#endif
-typedef void crash_vmclear_fn(void);
-extern crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss;
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
* kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
* also includes TDP pages) to determine whether or not a page can be used in
* the given MMU context. This is a subset of the overall kvm_cpu_role to
- * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
- * 2 bytes per gfn instead of 4 bytes per gfn.
+ * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows
+ * allocating 2 bytes per gfn instead of 4 bytes per gfn.
*
* Upper-level shadow pages having gptes are tracked for write-protection via
- * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
- * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
- * gfn_track will overflow and explosions will ensure.
+ * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must
+ * not create more than 2^16-1 upper-level shadow pages at a single gfn,
+ * otherwise gfn_write_track will overflow and explosions will ensue.
*
* A unique shadow page (SP) for a gfn is created if and only if an existing SP
* cannot be reused. The ability to reuse a SP is tracked by its role, which
u64 smi_count;
bool at_instruction_boundary;
bool tpr_access_reporting;
- bool xsaves_enabled;
bool xfd_no_write_intercept;
u64 ia32_xss;
u64 microcode_version;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
+ /*
+ * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
+ * when "struct kvm_vcpu_arch" is no longer defined in an
+ * arch/x86/include/asm header. The max is mostly arbitrary, i.e.
+ * can be increased as necessary.
+ */
+#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
+
+ /*
+ * Track whether or not the guest is allowed to use features that are
+ * governed by KVM, where "governed" means KVM needs to manage state
+ * and/or explicitly enable the feature in hardware. Typically, but
+ * not always, governed features can be used by the guest if and only
+ * if both KVM and userspace want to expose the feature to the guest.
+ */
+ struct {
+ DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
+ } governed_features;
+
u64 reserved_gpa_bits;
int maxphyaddr;
struct kvm_arch_memory_slot {
struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
- unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
+ unsigned short *gfn_write_track;
};
/*
* create an NX huge page (without hanging the guest).
*/
struct list_head possible_nx_huge_pages;
- struct kvm_page_track_notifier_node mmu_sp_tracker;
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
struct kvm_page_track_notifier_head track_notifier_head;
+#endif
/*
* Protects marking pages unsync during page faults, as TDP MMU page
* faults only take mmu_lock for read. For simplicity, the unsync
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
- void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
- void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
+ void (*write_tsc_offset)(struct kvm_vcpu *vcpu);
+ void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
* Retrieve somewhat arbitrary exit information. Intended to
#define __KVM_HAVE_ARCH_VM_FREE
void kvm_arch_free_vm(struct kvm *kvm);
-#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
!static_call(kvm_x86_flush_remote_tlbs)(kvm))
return -ENOTSUPP;
}
+#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+
#define kvm_arch_pmi_in_guest(vcpu) \
((vcpu) && (vcpu)->arch.handling_intr_from_guest)
const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
-void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
#ifndef _ASM_X86_KVM_PAGE_TRACK_H
#define _ASM_X86_KVM_PAGE_TRACK_H
-enum kvm_page_track_mode {
- KVM_PAGE_TRACK_WRITE,
- KVM_PAGE_TRACK_MAX,
-};
+#include <linux/kvm_types.h>
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
/*
* The notifier represented by @kvm_page_track_notifier_node is linked into
* the head which will be notified when guest is triggering the track event.
* It is called when guest is writing the write-tracked page
* and write emulation is finished at that time.
*
- * @vcpu: the vcpu where the write access happened.
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
* @node: this node
*/
- void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes, struct kvm_page_track_notifier_node *node);
+ void (*track_write)(gpa_t gpa, const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node);
+
/*
- * It is called when memory slot is being moved or removed
- * users can drop write-protection for the pages in that memory slot
+ * Invoked when a memory region is removed from the guest. Or in KVM
+ * terms, when a memslot is deleted.
*
- * @kvm: the kvm where memory slot being moved or removed
- * @slot: the memory slot being moved or removed
- * @node: this node
+ * @gfn: base gfn of the region being removed
+ * @nr_pages: number of pages in the to-be-removed region
+ * @node: this node
*/
- void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node);
+ void (*track_remove_region)(gfn_t gfn, unsigned long nr_pages,
+ struct kvm_page_track_notifier_node *node);
};
-int kvm_page_track_init(struct kvm *kvm);
-void kvm_page_track_cleanup(struct kvm *kvm);
+int kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n);
-bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
-int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
-
-void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
-int kvm_page_track_create_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages);
-
-void kvm_slot_page_track_add_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode);
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- gfn_t gfn, enum kvm_page_track_mode mode);
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn);
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn);
+#else
+/*
+ * Allow defining a node in a structure even if page tracking is disabled, e.g.
+ * to play nice with testing headers via direct inclusion from the command line.
+ */
+struct kvm_page_track_notifier_node {};
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n);
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
#define MRR_BIOS 0
#define MRR_APM 1
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+typedef void (cpu_emergency_virt_cb)(void);
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
+#else
+static inline void cpu_emergency_disable_virtualization(void) {}
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
struct vmcb_seg {
u16 selector;
u64 last_excp_from;
u64 last_excp_to;
u8 reserved_0x298[72];
- u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
+ u64 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */
} __packed;
/* Save area definition for SEV-ES and SEV-SNP guests */
} __packed;
-#define EXPECTED_VMCB_SAVE_AREA_SIZE 740
+#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
#define EXPECTED_SEV_ES_SAVE_AREA_SIZE 1648
#define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* CPU virtualization extensions handling
- *
- * This should carry the code for handling CPU virtualization extensions
- * that needs to live in the kernel core.
- *
- * Author: Eduardo Habkost <ehabkost@redhat.com>
- *
- * Copyright (C) 2008, Red Hat Inc.
- *
- * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc.
- */
-#ifndef _ASM_X86_VIRTEX_H
-#define _ASM_X86_VIRTEX_H
-
-#include <asm/processor.h>
-
-#include <asm/vmx.h>
-#include <asm/svm.h>
-#include <asm/tlbflush.h>
-
-/*
- * VMX functions:
- */
-
-static inline int cpu_has_vmx(void)
-{
- unsigned long ecx = cpuid_ecx(1);
- return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */
-}
-
-
-/**
- * cpu_vmxoff() - Disable VMX on the current CPU
- *
- * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
- *
- * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
- * atomically track post-VMXON state, e.g. this may be called in NMI context.
- * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
- * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
- * magically in RM, VM86, compat mode, or at CPL>0.
- */
-static inline int cpu_vmxoff(void)
-{
- asm_volatile_goto("1: vmxoff\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "cc", "memory" : fault);
-
- cr4_clear_bits(X86_CR4_VMXE);
- return 0;
-
-fault:
- cr4_clear_bits(X86_CR4_VMXE);
- return -EIO;
-}
-
-static inline int cpu_vmx_enabled(void)
-{
- return __read_cr4() & X86_CR4_VMXE;
-}
-
-/** Disable VMX if it is enabled on the current CPU
- *
- * You shouldn't call this if cpu_has_vmx() returns 0.
- */
-static inline void __cpu_emergency_vmxoff(void)
-{
- if (cpu_vmx_enabled())
- cpu_vmxoff();
-}
-
-/** Disable VMX if it is supported and enabled on the current CPU
- */
-static inline void cpu_emergency_vmxoff(void)
-{
- if (cpu_has_vmx())
- __cpu_emergency_vmxoff();
-}
-
-
-
-
-/*
- * SVM functions:
- */
-
-/** Check if the CPU has SVM support
- *
- * You can use the 'msg' arg to get a message describing the problem,
- * if the function returns zero. Simply pass NULL if you are not interested
- * on the messages; gcc should take care of not generating code for
- * the messages on this case.
- */
-static inline int cpu_has_svm(const char **msg)
-{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
- boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) {
- if (msg)
- *msg = "not amd or hygon";
- return 0;
- }
-
- if (boot_cpu_data.extended_cpuid_level < SVM_CPUID_FUNC) {
- if (msg)
- *msg = "can't execute cpuid_8000000a";
- return 0;
- }
-
- if (!boot_cpu_has(X86_FEATURE_SVM)) {
- if (msg)
- *msg = "svm not available";
- return 0;
- }
- return 1;
-}
-
-
-/** Disable SVM on the current CPU
- *
- * You should call this only if cpu_has_svm() returned true.
- */
-static inline void cpu_svm_disable(void)
-{
- uint64_t efer;
-
- wrmsrl(MSR_VM_HSAVE_PA, 0);
- rdmsrl(MSR_EFER, efer);
- if (efer & EFER_SVME) {
- /*
- * Force GIF=1 prior to disabling SVM to ensure INIT and NMI
- * aren't blocked, e.g. if a fatal error occurred between CLGI
- * and STGI. Note, STGI may #UD if SVM is disabled from NMI
- * context between reading EFER and executing STGI. In that
- * case, GIF must already be set, otherwise the NMI would have
- * been blocked, so just eat the fault.
- */
- asm_volatile_goto("1: stgi\n\t"
- _ASM_EXTABLE(1b, %l[fault])
- ::: "memory" : fault);
-fault:
- wrmsrl(MSR_EFER, efer & ~EFER_SVME);
- }
-}
-
-/** Makes sure SVM is disabled, if it is supported on the CPU
- */
-static inline void cpu_emergency_svm_disable(void)
-{
- if (cpu_has_svm(NULL))
- cpu_svm_disable();
-}
-
-#endif /* _ASM_X86_VIRTEX_H */
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
-#define SECONDARY_EXEC_XSAVES VMCS_CONTROL_BIT(XSAVES)
+#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
#define SECONDARY_EXEC_PT_USE_GPA VMCS_CONTROL_BIT(PT_USE_GPA)
#define SECONDARY_EXEC_TSC_SCALING VMCS_CONTROL_BIT(TSC_SCALING)
unsigned int type;
};
-/*
- * This is used to VMCLEAR all VMCSs loaded on the
- * processor. And when loading kvm_intel module, the
- * callback function pointer will be assigned.
- *
- * protected by rcu.
- */
-crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
-EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
-
-static inline void cpu_crash_vmclear_loaded_vmcss(void)
-{
- crash_vmclear_fn *do_vmclear_operation = NULL;
-
- rcu_read_lock();
- do_vmclear_operation = rcu_dereference(crash_vmclear_loaded_vmcss);
- if (do_vmclear_operation)
- do_vmclear_operation();
- rcu_read_unlock();
-}
-
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
crash_save_cpu(regs, cpu);
/*
- * VMCLEAR VMCSs loaded on all cpus if needed.
- */
- cpu_crash_vmclear_loaded_vmcss();
-
- /*
* Disable Intel PT to stop its logging
*/
cpu_emergency_stop_pt();
crash_smp_send_stop();
- /*
- * VMCLEAR VMCSs loaded on this cpu if needed.
- */
- cpu_crash_vmclear_loaded_vmcss();
-
cpu_emergency_disable_virtualization();
/*
#include <asm/reboot_fixups.h>
#include <asm/reboot.h>
#include <asm/pci_x86.h>
-#include <asm/virtext.h>
#include <asm/cpu.h>
#include <asm/nmi.h>
#include <asm/smp.h>
static inline void nmi_shootdown_cpus_on_restart(void);
+#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
+/* RCU-protected callback to disable virtualization prior to reboot. */
+static cpu_emergency_virt_cb __rcu *cpu_emergency_virt_callback;
+
+void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback)
+{
+ if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback)))
+ return;
+
+ rcu_assign_pointer(cpu_emergency_virt_callback, callback);
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_register_virt_callback);
+
+void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback)
+{
+ if (WARN_ON_ONCE(rcu_access_pointer(cpu_emergency_virt_callback) != callback))
+ return;
+
+ rcu_assign_pointer(cpu_emergency_virt_callback, NULL);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cpu_emergency_unregister_virt_callback);
+
+/*
+ * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
+ * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
+ * GIF=0, i.e. if the crash occurred between CLGI and STGI.
+ */
+void cpu_emergency_disable_virtualization(void)
+{
+ cpu_emergency_virt_cb *callback;
+
+ /*
+ * IRQs must be disabled as KVM enables virtualization in hardware via
+ * function call IPIs, i.e. IRQs need to be disabled to guarantee
+ * virtualization stays disabled.
+ */
+ lockdep_assert_irqs_disabled();
+
+ rcu_read_lock();
+ callback = rcu_dereference(cpu_emergency_virt_callback);
+ if (callback)
+ callback();
+ rcu_read_unlock();
+}
+
static void emergency_reboot_disable_virtualization(void)
{
- /* Just make sure we won't change CPUs while doing this */
local_irq_disable();
/*
* Do the NMI shootdown even if virtualization is off on _this_ CPU, as
* other CPUs may have virtualization enabled.
*/
- if (cpu_has_vmx() || cpu_has_svm(NULL)) {
+ if (rcu_access_pointer(cpu_emergency_virt_callback)) {
/* Safely force _this_ CPU out of VMX/SVM operation. */
cpu_emergency_disable_virtualization();
nmi_shootdown_cpus_on_restart();
}
}
-
+#else
+static void emergency_reboot_disable_virtualization(void) { }
+#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
void __attribute__((weak)) mach_reboot_fixups(void)
{
}
#endif
-
/* This is the CPU performing the emergency shutdown work. */
int crashing_cpu = -1;
-/*
- * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
- * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
- * GIF=0, i.e. if the crash occurred between CLGI and STGI.
- */
-void cpu_emergency_disable_virtualization(void)
-{
- cpu_emergency_vmxoff();
- cpu_emergency_svm_disable();
-}
-
#if defined(CONFIG_SMP)
static nmi_shootdown_cb shootdown_callback;
config KVM_AMD
tristate "KVM for AMD processors support"
- depends on KVM
+ depends on KVM && (CPU_SUP_AMD || CPU_SUP_HYGON)
help
Provides support for KVM on AMD processors equipped with the AMD-V
(SVM) extensions.
If in doubt, say "N".
+config KVM_PROVE_MMU
+ bool "Prove KVM MMU correctness"
+ depends on DEBUG_KERNEL
+ depends on KVM
+ depends on EXPERT
+ help
+ Enables runtime assertions in KVM's MMU that are too costly to enable
+ in anything remotely resembling a production environment, e.g. this
+ gates code that verifies a to-be-freed page table doesn't have any
+ present SPTEs.
+
+ If in doubt, say "N".
+
config KVM_EXTERNAL_WRITE_TRACKING
bool
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kvm_host.h>
+#include "linux/lockdep.h"
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
struct kvm_cpuid_entry2 *e;
int i;
+ /*
+ * KVM has a semi-arbitrary rule that querying the guest's CPUID model
+ * with IRQs disabled is disallowed. The CPUID model can legitimately
+ * have over one hundred entries, i.e. the lookup is slow, and IRQs are
+ * typically disabled in KVM only when KVM is in a performance critical
+ * path, e.g. the core VM-Enter/VM-Exit run loop. Nothing will break
+ * if this rule is violated, this assertion is purely to flag potential
+ * performance issues. If this fires, consider moving the lookup out
+ * of the hotpath, e.g. by caching information during CPUID updates.
+ */
+ lockdep_assert_irqs_enabled();
+
for (i = 0; i < nent; i++) {
e = &entries[i];
{
struct kvm_lapic *apic = vcpu->arch.apic;
struct kvm_cpuid_entry2 *best;
+ bool allow_gbpages;
+
+ BUILD_BUG_ON(KVM_NR_GOVERNED_FEATURES > KVM_MAX_NR_GOVERNED_FEATURES);
+ bitmap_zero(vcpu->arch.governed_features.enabled,
+ KVM_MAX_NR_GOVERNED_FEATURES);
+
+ /*
+ * If TDP is enabled, let the guest use GBPAGES if they're supported in
+ * hardware. The hardware page walker doesn't let KVM disable GBPAGES,
+ * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
+ * walk for performance and complexity reasons. Not to mention KVM
+ * _can't_ solve the problem because GVA->GPA walks aren't visible to
+ * KVM once a TDP translation is installed. Mimic hardware behavior so
+ * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
+ * If TDP is disabled, honor *only* guest CPUID as KVM has full control
+ * and can install smaller shadow pages if the host lacks 1GiB support.
+ */
+ allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
+ guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
+ if (allow_gbpages)
+ kvm_governed_feature_set(vcpu, X86_FEATURE_GBPAGES);
best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
);
kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
- F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
+ F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) |
+ F(AMX_COMPLEX)
);
kvm_cpu_cap_mask(CPUID_D_1_EAX,
cpuid_entry_override(entry, CPUID_8000_0001_EDX);
cpuid_entry_override(entry, CPUID_8000_0001_ECX);
break;
+ case 0x80000005:
+ /* Pass host L1 cache and TLB info. */
+ break;
case 0x80000006:
/* Drop reserved bits, pass host L2 cache and TLB info. */
entry->edx &= ~GENMASK(17, 16);
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
+enum kvm_governed_features {
+#define KVM_GOVERNED_FEATURE(x) KVM_GOVERNED_##x,
+#include "governed_features.h"
+ KVM_NR_GOVERNED_FEATURES
+};
+
+static __always_inline int kvm_governed_feature_index(unsigned int x86_feature)
+{
+ switch (x86_feature) {
+#define KVM_GOVERNED_FEATURE(x) case x: return KVM_GOVERNED_##x;
+#include "governed_features.h"
+ default:
+ return -1;
+ }
+}
+
+static __always_inline bool kvm_is_governed_feature(unsigned int x86_feature)
+{
+ return kvm_governed_feature_index(x86_feature) >= 0;
+}
+
+static __always_inline void kvm_governed_feature_set(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
+{
+ BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+ __set_bit(kvm_governed_feature_index(x86_feature),
+ vcpu->arch.governed_features.enabled);
+}
+
+static __always_inline void kvm_governed_feature_check_and_set(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
+{
+ if (kvm_cpu_cap_has(x86_feature) && guest_cpuid_has(vcpu, x86_feature))
+ kvm_governed_feature_set(vcpu, x86_feature);
+}
+
+static __always_inline bool guest_can_use(struct kvm_vcpu *vcpu,
+ unsigned int x86_feature)
+{
+ BUILD_BUG_ON(!kvm_is_governed_feature(x86_feature));
+
+ return test_bit(kvm_governed_feature_index(x86_feature),
+ vcpu->arch.governed_features.enabled);
+}
+
#endif
op->addr.mem,
&op->val,
op->bytes);
- break;
case OP_MEM_STR:
return segmented_write(ctxt,
op->addr.mem,
op->data,
op->bytes * op->count);
- break;
case OP_XMM:
kvm_write_sse_reg(op->addr.xmm, &op->vec_val);
break;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(KVM_GOVERNED_FEATURE) || defined(KVM_GOVERNED_X86_FEATURE)
+BUILD_BUG()
+#endif
+
+#define KVM_GOVERNED_X86_FEATURE(x) KVM_GOVERNED_FEATURE(X86_FEATURE_##x)
+
+KVM_GOVERNED_X86_FEATURE(GBPAGES)
+KVM_GOVERNED_X86_FEATURE(XSAVES)
+KVM_GOVERNED_X86_FEATURE(VMX)
+KVM_GOVERNED_X86_FEATURE(NRIPS)
+KVM_GOVERNED_X86_FEATURE(TSCRATEMSR)
+KVM_GOVERNED_X86_FEATURE(V_VMSAVE_VMLOAD)
+KVM_GOVERNED_X86_FEATURE(LBRV)
+KVM_GOVERNED_X86_FEATURE(PAUSEFILTER)
+KVM_GOVERNED_X86_FEATURE(PFTHRESHOLD)
+KVM_GOVERNED_X86_FEATURE(VGIF)
+KVM_GOVERNED_X86_FEATURE(VNMI)
+
+#undef KVM_GOVERNED_X86_FEATURE
+#undef KVM_GOVERNED_FEATURE
case HV_X64_MSR_VP_ASSIST_PAGE:
return hv_vcpu->cpuid_cache.features_eax &
HV_MSR_APIC_ACCESS_AVAILABLE;
- break;
case HV_X64_MSR_TSC_FREQUENCY:
case HV_X64_MSR_APIC_FREQUENCY:
return hv_vcpu->cpuid_cache.features_eax &
bool (*get_cpuid)(struct x86_emulate_ctxt *ctxt, u32 *eax, u32 *ebx,
u32 *ecx, u32 *edx, bool exact_only);
- bool (*guest_has_long_mode)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_movbe)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_fxsr)(struct x86_emulate_ctxt *ctxt);
bool (*guest_has_rdpid)(struct x86_emulate_ctxt *ctxt);
struct kvm_vcpu *vcpu;
unsigned long i;
u32 max_id = 255; /* enough space for any xAPIC ID */
- bool xapic_id_mismatch = false;
+ bool xapic_id_mismatch;
+ int r;
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
"Dirty APIC map without an in-kernel local APIC");
mutex_lock(&kvm->arch.apic_map_lock);
+
+retry:
/*
- * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
- * (if clean) or the APIC registers (if dirty).
+ * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map (if clean)
+ * or the APIC registers (if dirty). Note, on retry the map may have
+ * not yet been marked dirty by whatever task changed a vCPU's x2APIC
+ * ID, i.e. the map may still show up as in-progress. In that case
+ * this task still needs to retry and complete its calculation.
*/
if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
return;
}
+ /*
+ * Reset the mismatch flag between attempts so that KVM does the right
+ * thing if a vCPU changes its xAPIC ID, but do NOT reset max_id, i.e.
+ * keep max_id strictly increasing. Disallowing max_id from shrinking
+ * ensures KVM won't get stuck in an infinite loop, e.g. if the vCPU
+ * with the highest x2APIC ID is toggling its APIC on and off.
+ */
+ xapic_id_mismatch = false;
+
kvm_for_each_vcpu(i, vcpu, kvm)
if (kvm_apic_present(vcpu))
max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
if (!kvm_apic_present(vcpu))
continue;
- if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+ r = kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch);
+ if (r) {
kvfree(new);
new = NULL;
+ if (r == -E2BIG) {
+ cond_resched();
+ goto retry;
+ }
+
goto out;
}
void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes);
static inline int kvm_mmu_reload(struct kvm_vcpu *vcpu)
{
#include "kvm_cache_regs.h"
#include "smm.h"
#include "kvm_emulate.h"
+#include "page_track.h"
#include "cpuid.h"
#include "spte.h"
#include <asm/io.h>
#include <asm/set_memory.h>
#include <asm/vmx.h>
-#include <asm/kvm_page_track.h>
+
#include "trace.h"
extern bool itlb_multihit_kvm_mitigation;
static int tdp_root_level __read_mostly;
static int max_tdp_level __read_mostly;
-#ifdef MMU_DEBUG
-bool dbg = 0;
-module_param(dbg, bool, 0644);
-#endif
-
#define PTE_PREFETCH_NUM 8
#include <trace/events/kvm.h>
return kvm_x86_ops.flush_remote_tlbs_range;
}
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
- gfn_t nr_pages)
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
{
- int ret = -EOPNOTSUPP;
+ if (!kvm_x86_ops.flush_remote_tlbs_range)
+ return -EOPNOTSUPP;
- if (kvm_x86_ops.flush_remote_tlbs_range)
- ret = static_call(kvm_x86_flush_remote_tlbs_range)(kvm, start_gfn,
- nr_pages);
- if (ret)
- kvm_flush_remote_tlbs(kvm);
+ return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
}
static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index);
*/
static void mmu_spte_set(u64 *sptep, u64 new_spte)
{
- WARN_ON(is_shadow_present_pte(*sptep));
+ WARN_ON_ONCE(is_shadow_present_pte(*sptep));
__set_spte(sptep, new_spte);
}
{
u64 old_spte = *sptep;
- WARN_ON(!is_shadow_present_pte(new_spte));
+ WARN_ON_ONCE(!is_shadow_present_pte(new_spte));
check_spte_writable_invariants(new_spte);
if (!is_shadow_present_pte(old_spte)) {
else
old_spte = __update_clear_spte_slow(sptep, new_spte);
- WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
+ WARN_ON_ONCE(spte_to_pfn(old_spte) != spte_to_pfn(new_spte));
return old_spte;
}
* by a refcounted page, the refcount is elevated.
*/
page = kvm_pfn_to_refcounted_page(pfn);
- WARN_ON(page && !page_count(page));
+ WARN_ON_ONCE(page && !page_count(page));
if (is_accessed_spte(old_spte))
kvm_set_pfn_accessed(pfn);
for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) {
linfo = lpage_info_slot(gfn, slot, i);
linfo->disallow_lpage += count;
- WARN_ON(linfo->disallow_lpage < 0);
+ WARN_ON_ONCE(linfo->disallow_lpage < 0);
}
}
/* the non-leaf shadow pages are keeping readonly. */
if (sp->role.level > PG_LEVEL_4K)
- return kvm_slot_page_track_add_page(kvm, slot, gfn,
- KVM_PAGE_TRACK_WRITE);
+ return __kvm_write_track_add_gfn(kvm, slot, gfn);
kvm_mmu_gfn_disallow_lpage(slot, gfn);
slots = kvm_memslots_for_spte_role(kvm, sp->role);
slot = __gfn_to_memslot(slots, gfn);
if (sp->role.level > PG_LEVEL_4K)
- return kvm_slot_page_track_remove_page(kvm, slot, gfn,
- KVM_PAGE_TRACK_WRITE);
+ return __kvm_write_track_remove_gfn(kvm, slot, gfn);
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
int count = 0;
if (!rmap_head->val) {
- rmap_printk("%p %llx 0->1\n", spte, *spte);
rmap_head->val = (unsigned long)spte;
} else if (!(rmap_head->val & 1)) {
- rmap_printk("%p %llx 1->many\n", spte, *spte);
desc = kvm_mmu_memory_cache_alloc(cache);
desc->sptes[0] = (u64 *)rmap_head->val;
desc->sptes[1] = spte;
rmap_head->val = (unsigned long)desc | 1;
++count;
} else {
- rmap_printk("%p %llx many->many\n", spte, *spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
count = desc->tail_count + desc->spte_count;
return count;
}
-static void pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+static void pte_list_desc_remove_entry(struct kvm *kvm,
+ struct kvm_rmap_head *rmap_head,
struct pte_list_desc *desc, int i)
{
struct pte_list_desc *head_desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
* when adding an entry and the previous head is full, and heads are
* removed (this flow) when they become empty.
*/
- BUG_ON(j < 0);
+ KVM_BUG_ON_DATA_CORRUPTION(j < 0, kvm);
/*
* Replace the to-be-freed SPTE with the last valid entry from the head
mmu_free_pte_list_desc(head_desc);
}
-static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
+static void pte_list_remove(struct kvm *kvm, u64 *spte,
+ struct kvm_rmap_head *rmap_head)
{
struct pte_list_desc *desc;
int i;
- if (!rmap_head->val) {
- pr_err("%s: %p 0->BUG\n", __func__, spte);
- BUG();
- } else if (!(rmap_head->val & 1)) {
- rmap_printk("%p 1->0\n", spte);
- if ((u64 *)rmap_head->val != spte) {
- pr_err("%s: %p 1->BUG\n", __func__, spte);
- BUG();
- }
+ if (KVM_BUG_ON_DATA_CORRUPTION(!rmap_head->val, kvm))
+ return;
+
+ if (!(rmap_head->val & 1)) {
+ if (KVM_BUG_ON_DATA_CORRUPTION((u64 *)rmap_head->val != spte, kvm))
+ return;
+
rmap_head->val = 0;
} else {
- rmap_printk("%p many->many\n", spte);
desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
while (desc) {
for (i = 0; i < desc->spte_count; ++i) {
if (desc->sptes[i] == spte) {
- pte_list_desc_remove_entry(rmap_head, desc, i);
+ pte_list_desc_remove_entry(kvm, rmap_head,
+ desc, i);
return;
}
}
desc = desc->more;
}
- pr_err("%s: %p many->many\n", __func__, spte);
- BUG();
+
+ KVM_BUG_ON_DATA_CORRUPTION(true, kvm);
}
}
struct kvm_rmap_head *rmap_head, u64 *sptep)
{
mmu_spte_clear_track_bits(kvm, sptep);
- pte_list_remove(sptep, rmap_head);
+ pte_list_remove(kvm, sptep, rmap_head);
}
/* Return true if at least one SPTE was zapped, false otherwise */
slot = __gfn_to_memslot(slots, gfn);
rmap_head = gfn_to_rmap(gfn, sp->role.level, slot);
- pte_list_remove(spte, rmap_head);
+ pte_list_remove(kvm, spte, rmap_head);
}
/*
struct kvm_mmu_page *sp;
sp = sptep_to_sp(sptep);
- WARN_ON(sp->role.level == PG_LEVEL_4K);
+ WARN_ON_ONCE(sp->role.level == PG_LEVEL_4K);
drop_spte(kvm, sptep);
!(pt_protect && is_mmu_writable_spte(spte)))
return false;
- rmap_printk("spte %p %llx\n", sptep, *sptep);
-
if (pt_protect)
spte &= ~shadow_mmu_writable_mask;
spte = spte & ~PT_WRITABLE_MASK;
{
u64 spte = *sptep;
- rmap_printk("spte %p %llx\n", sptep, *sptep);
-
- MMU_WARN_ON(!spte_ad_enabled(spte));
+ KVM_MMU_WARN_ON(!spte_ad_enabled(spte));
spte &= ~shadow_dirty_mask;
return mmu_spte_update(sptep, spte);
}
u64 new_spte;
kvm_pfn_t new_pfn;
- WARN_ON(pte_huge(pte));
+ WARN_ON_ONCE(pte_huge(pte));
new_pfn = pte_pfn(pte);
restart:
for_each_rmap_spte(rmap_head, &iter, sptep) {
- rmap_printk("spte %p %llx gfn %llx (%d)\n",
- sptep, *sptep, gfn, level);
-
need_flush = true;
if (pte_write(pte)) {
for_each_slot_rmap_range(range->slot, PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL,
range->start, range->end - 1, &iterator)
ret |= handler(kvm, iterator.rmap, range->slot, iterator.gfn,
- iterator.level, range->pte);
+ iterator.level, range->arg.pte);
return ret;
}
return young;
}
-#ifdef MMU_DEBUG
-static int is_empty_shadow_page(u64 *spt)
+static void kvm_mmu_check_sptes_at_free(struct kvm_mmu_page *sp)
{
- u64 *pos;
- u64 *end;
+#ifdef CONFIG_KVM_PROVE_MMU
+ int i;
- for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
- if (is_shadow_present_pte(*pos)) {
- printk(KERN_ERR "%s: %p %llx\n", __func__,
- pos, *pos);
- return 0;
- }
- return 1;
-}
+ for (i = 0; i < SPTE_ENT_PER_PAGE; i++) {
+ if (KVM_MMU_WARN_ON(is_shadow_present_pte(sp->spt[i])))
+ pr_err_ratelimited("SPTE %llx (@ %p) for gfn %llx shadow-present at free",
+ sp->spt[i], &sp->spt[i],
+ kvm_mmu_page_get_gfn(sp, i));
+ }
#endif
+}
/*
* This value is the sum of all of the kvm instances's
static void kvm_mmu_free_shadow_page(struct kvm_mmu_page *sp)
{
- MMU_WARN_ON(!is_empty_shadow_page(sp->spt));
+ kvm_mmu_check_sptes_at_free(sp);
+
hlist_del(&sp->hash_link);
list_del(&sp->link);
free_page((unsigned long)sp->spt);
pte_list_add(cache, parent_pte, &sp->parent_ptes);
}
-static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
+static void mmu_page_remove_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- pte_list_remove(parent_pte, &sp->parent_ptes);
+ pte_list_remove(kvm, parent_pte, &sp->parent_ptes);
}
-static void drop_parent_pte(struct kvm_mmu_page *sp,
+static void drop_parent_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
u64 *parent_pte)
{
- mmu_page_remove_parent_pte(sp, parent_pte);
+ mmu_page_remove_parent_pte(kvm, sp, parent_pte);
mmu_spte_clear_no_track(parent_pte);
}
static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
{
--sp->unsync_children;
- WARN_ON((int)sp->unsync_children < 0);
+ WARN_ON_ONCE((int)sp->unsync_children < 0);
__clear_bit(idx, sp->unsync_child_bitmap);
}
static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
- WARN_ON(!sp->unsync);
+ WARN_ON_ONCE(!sp->unsync);
trace_kvm_mmu_sync_page(sp);
sp->unsync = 0;
--kvm->stat.mmu_unsync;
if (pvec->nr == 0)
return 0;
- WARN_ON(pvec->page[0].idx != INVALID_INDEX);
+ WARN_ON_ONCE(pvec->page[0].idx != INVALID_INDEX);
sp = pvec->page[0].sp;
level = sp->role.level;
- WARN_ON(level == PG_LEVEL_4K);
+ WARN_ON_ONCE(level == PG_LEVEL_4K);
parents->parent[level-2] = sp;
if (!sp)
return;
- WARN_ON(idx == INVALID_INDEX);
+ WARN_ON_ONCE(idx == INVALID_INDEX);
clear_unsync_child_bit(sp, idx);
level++;
} while (!sp->unsync_children);
if (ret < 0)
break;
- WARN_ON(!list_empty(&invalid_list));
+ WARN_ON_ONCE(!list_empty(&invalid_list));
if (ret > 0)
kvm_flush_remote_tlbs(kvm);
}
if (child->role.access == direct_access)
return;
- drop_parent_pte(child, sptep);
+ drop_parent_pte(vcpu->kvm, child, sptep);
kvm_flush_remote_tlbs_sptep(vcpu->kvm, sptep);
}
}
drop_spte(kvm, spte);
} else {
child = spte_to_child_sp(pte);
- drop_parent_pte(child, spte);
+ drop_parent_pte(kvm, child, spte);
/*
* Recursively zap nested TDP SPs, parentless SPs are
return zapped;
}
-static void kvm_mmu_unlink_parents(struct kvm_mmu_page *sp)
+static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
{
u64 *sptep;
struct rmap_iterator iter;
while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
- drop_parent_pte(sp, sptep);
+ drop_parent_pte(kvm, sp, sptep);
}
static int mmu_zap_unsync_children(struct kvm *kvm,
++kvm->stat.mmu_shadow_zapped;
*nr_zapped = mmu_zap_unsync_children(kvm, sp, invalid_list);
*nr_zapped += kvm_mmu_page_unlink_children(kvm, sp, invalid_list);
- kvm_mmu_unlink_parents(sp);
+ kvm_mmu_unlink_parents(kvm, sp);
/* Zapping children means active_mmu_pages has become unstable. */
list_unstable = *nr_zapped;
kvm_flush_remote_tlbs(kvm);
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
- WARN_ON(!sp->role.invalid || sp->root_count);
+ WARN_ON_ONCE(!sp->role.invalid || sp->root_count);
kvm_mmu_free_shadow_page(sp);
}
}
LIST_HEAD(invalid_list);
int r;
- pgprintk("%s: looking for gfn %llx\n", __func__, gfn);
r = 0;
write_lock(&kvm->mmu_lock);
for_each_gfn_valid_sp_with_gptes(kvm, sp, gfn) {
- pgprintk("%s: gfn %llx role %x\n", __func__, gfn,
- sp->role.word);
r = 1;
kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
}
* track machinery is used to write-protect upper-level shadow pages,
* i.e. this guards the role.level == 4K assertion below!
*/
- if (kvm_slot_page_track_is_active(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_gfn_is_write_tracked(kvm, slot, gfn))
return -EPERM;
/*
continue;
}
- WARN_ON(sp->role.level != PG_LEVEL_4K);
+ WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
kvm_unsync_page(kvm, sp);
}
if (locked)
bool prefetch = !fault || fault->prefetch;
bool write_fault = fault && fault->write;
- pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
- *sptep, write_fault, gfn);
-
if (unlikely(is_noslot_pfn(pfn))) {
vcpu->stat.pf_mmio_spte_created++;
mark_mmio_spte(vcpu, sptep, gfn, pte_access);
u64 pte = *sptep;
child = spte_to_child_sp(pte);
- drop_parent_pte(child, sptep);
+ drop_parent_pte(vcpu->kvm, child, sptep);
flush = true;
} else if (pfn != spte_to_pfn(*sptep)) {
- pgprintk("hfn old %llx new %llx\n",
- spte_to_pfn(*sptep), pfn);
drop_spte(vcpu->kvm, sptep);
flush = true;
} else
if (flush)
kvm_flush_remote_tlbs_gfn(vcpu->kvm, gfn, level);
- pgprintk("%s: setting spte %llx\n", __func__, *sptep);
-
if (!was_rmapped) {
WARN_ON_ONCE(ret == RET_PF_SPURIOUS);
rmap_add(vcpu, slot, sptep, gfn, pte_access);
u64 *spte, *start = NULL;
int i;
- WARN_ON(!sp->role.direct);
+ WARN_ON_ONCE(!sp->role.direct);
i = spte_index(sptep) & ~(PTE_PREFETCH_NUM - 1);
spte = sp->spt + i;
if (!VALID_PAGE(*root_hpa))
return;
- /*
- * The "root" may be a special root, e.g. a PAE entry, treat it as a
- * SPTE to ensure any non-PA bits are dropped.
- */
- sp = spte_to_child_sp(*root_hpa);
- if (WARN_ON(!sp))
+ sp = root_to_sp(*root_hpa);
+ if (WARN_ON_ONCE(!sp))
return;
if (is_tdp_mmu_page(sp))
&invalid_list);
if (free_active_root) {
- if (to_shadow_page(mmu->root.hpa)) {
+ if (kvm_mmu_is_dummy_root(mmu->root.hpa)) {
+ /* Nothing to cleanup for dummy roots. */
+ } else if (root_to_sp(mmu->root.hpa)) {
mmu_free_root_page(kvm, &mmu->root.hpa, &invalid_list);
} else if (mmu->pae_root) {
for (i = 0; i < 4; ++i) {
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu)
{
unsigned long roots_to_free = 0;
+ struct kvm_mmu_page *sp;
hpa_t root_hpa;
int i;
if (!VALID_PAGE(root_hpa))
continue;
- if (!to_shadow_page(root_hpa) ||
- to_shadow_page(root_hpa)->role.guest_mode)
+ sp = root_to_sp(root_hpa);
+ if (!sp || sp->role.guest_mode)
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
}
}
EXPORT_SYMBOL_GPL(kvm_mmu_free_guest_mode_roots);
-
-static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn)
-{
- int ret = 0;
-
- if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
- kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
- ret = 1;
- }
-
- return ret;
-}
-
static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, int quadrant,
u8 level)
{
root_pgd = kvm_mmu_get_guest_pgd(vcpu, mmu);
root_gfn = root_pgd >> PAGE_SHIFT;
- if (mmu_check_root(vcpu, root_gfn))
- return 1;
+ if (!kvm_vcpu_is_visible_gfn(vcpu, root_gfn)) {
+ mmu->root.hpa = kvm_mmu_get_dummy_root();
+ return 0;
+ }
/*
* On SVM, reading PDPTRs might access guest memory, which might fault
if (!(pdptrs[i] & PT_PRESENT_MASK))
continue;
- if (mmu_check_root(vcpu, pdptrs[i] >> PAGE_SHIFT))
- return 1;
+ if (!kvm_vcpu_is_visible_gfn(vcpu, pdptrs[i] >> PAGE_SHIFT))
+ pdptrs[i] = 0;
}
}
{
struct kvm_mmu_page *sp;
- if (!VALID_PAGE(root))
+ if (!VALID_PAGE(root) || kvm_mmu_is_dummy_root(root))
return false;
/*
* requirement isn't satisfied.
*/
smp_rmb();
- sp = to_shadow_page(root);
+ sp = root_to_sp(root);
/*
* PAE roots (somewhat arbitrarily) aren't backed by shadow pages, the
if (vcpu->arch.mmu->cpu_role.base.level >= PT64_ROOT_4LEVEL) {
hpa_t root = vcpu->arch.mmu->root.hpa;
- sp = to_shadow_page(root);
if (!is_unsync_root(root))
return;
+ sp = root_to_sp(root);
+
write_lock(&vcpu->kvm->mmu_lock);
mmu_sync_children(vcpu, sp, true);
write_unlock(&vcpu->kvm->mmu_lock);
return RET_PF_EMULATE;
reserved = get_mmio_spte(vcpu, addr, &spte);
- if (WARN_ON(reserved))
+ if (WARN_ON_ONCE(reserved))
return -EINVAL;
if (is_mmio_spte(spte)) {
* guest is writing the page which is write tracked which can
* not be fixed by page fault handler.
*/
- if (kvm_slot_page_track_is_active(vcpu->kvm, fault->slot, fault->gfn, KVM_PAGE_TRACK_WRITE))
+ if (kvm_gfn_is_write_tracked(vcpu->kvm, fault->slot, fault->gfn))
return true;
return false;
static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
- struct kvm_mmu_page *sp = to_shadow_page(vcpu->arch.mmu->root.hpa);
+ struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
/* Special roots, e.g. pae_root, are not backed by shadow pages. */
if (sp && is_obsolete_sp(vcpu->kvm, sp))
{
int r;
+ /* Dummy roots are used only for shadowing bad guest roots. */
+ if (WARN_ON_ONCE(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa)))
+ return RET_PF_RETRY;
+
if (page_fault_handle_page_track(vcpu, fault))
return RET_PF_EMULATE;
static int nonpaging_page_fault(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault)
{
- pgprintk("%s: gva %lx error %x\n", __func__, fault->addr, fault->error_code);
-
/* This path builds a PAE pagetable, we can map 2mb pages at maximum. */
fault->max_level = PG_LEVEL_2M;
return direct_page_fault(vcpu, fault);
static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd,
union kvm_mmu_page_role role)
{
- return (role.direct || pgd == root->pgd) &&
- VALID_PAGE(root->hpa) &&
- role.word == to_shadow_page(root->hpa)->role.word;
+ struct kvm_mmu_page *sp;
+
+ if (!VALID_PAGE(root->hpa))
+ return false;
+
+ if (!role.direct && pgd != root->pgd)
+ return false;
+
+ sp = root_to_sp(root->hpa);
+ if (WARN_ON_ONCE(!sp))
+ return false;
+
+ return role.word == sp->role.word;
}
/*
gpa_t new_pgd, union kvm_mmu_page_role new_role)
{
/*
- * For now, limit the caching to 64-bit hosts+VMs in order to avoid
- * having to deal with PDPTEs. We may add support for 32-bit hosts/VMs
- * later if necessary.
+ * Limit reuse to 64-bit hosts+VMs without "special" roots in order to
+ * avoid having to deal with PDPTEs and other complexities.
*/
- if (VALID_PAGE(mmu->root.hpa) && !to_shadow_page(mmu->root.hpa))
+ if (VALID_PAGE(mmu->root.hpa) && !root_to_sp(mmu->root.hpa))
kvm_mmu_free_roots(kvm, mmu, KVM_MMU_ROOT_CURRENT);
if (VALID_PAGE(mmu->root.hpa))
* If this is a direct root page, it doesn't have a write flooding
* count. Otherwise, clear the write flooding count.
*/
- if (!new_role.direct)
- __clear_sp_write_flooding_count(
- to_shadow_page(vcpu->arch.mmu->root.hpa));
+ if (!new_role.direct) {
+ struct kvm_mmu_page *sp = root_to_sp(vcpu->arch.mmu->root.hpa);
+
+ if (!WARN_ON_ONCE(!sp))
+ __clear_sp_write_flooding_count(sp);
+ }
}
EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd);
}
}
-static bool guest_can_use_gbpages(struct kvm_vcpu *vcpu)
-{
- /*
- * If TDP is enabled, let the guest use GBPAGES if they're supported in
- * hardware. The hardware page walker doesn't let KVM disable GBPAGES,
- * i.e. won't treat them as reserved, and KVM doesn't redo the GVA->GPA
- * walk for performance and complexity reasons. Not to mention KVM
- * _can't_ solve the problem because GVA->GPA walks aren't visible to
- * KVM once a TDP translation is installed. Mimic hardware behavior so
- * that KVM's is at least consistent, i.e. doesn't randomly inject #PF.
- */
- return tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
- guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES);
-}
-
static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context)
{
__reset_rsvds_bits_mask(&context->guest_rsvd_check,
vcpu->arch.reserved_gpa_bits,
context->cpu_role.base.level, is_efer_nx(context),
- guest_can_use_gbpages(vcpu),
+ guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
guest_cpuid_is_amd_or_hygon(vcpu));
}
__reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
context->root_role.level,
context->root_role.efer_nx,
- guest_can_use_gbpages(vcpu), is_pse, is_amd);
+ guest_can_use(vcpu, X86_FEATURE_GBPAGES),
+ is_pse, is_amd);
if (!shadow_me_mask)
return;
* physical address properties) in a single VM would require tracking
* all relevant CPUID information in kvm_mmu_page_role. That is very
* undesirable as it would increase the memory requirements for
- * gfn_track (see struct kvm_mmu_page_role comments). For now that
- * problem is swept under the rug; KVM's CPUID API is horrific and
+ * gfn_write_track (see struct kvm_mmu_page_role comments). For now
+ * that problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
vcpu->arch.root_mmu.root_role.word = 0;
struct kvm *kvm = vcpu->kvm;
kvm_mmu_free_roots(kvm, &vcpu->arch.root_mmu, KVM_MMU_ROOTS_ALL);
- WARN_ON(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
+ WARN_ON_ONCE(VALID_PAGE(vcpu->arch.root_mmu.root.hpa));
kvm_mmu_free_roots(kvm, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
- WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
+ WARN_ON_ONCE(VALID_PAGE(vcpu->arch.guest_mmu.root.hpa));
vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
}
/*
* When freeing obsolete roots, treat roots as obsolete if they don't
- * have an associated shadow page. This does mean KVM will get false
+ * have an associated shadow page, as it's impossible to determine if
+ * such roots are fresh or stale. This does mean KVM will get false
* positives and free roots that don't strictly need to be freed, but
* such false positives are relatively rare:
*
- * (a) only PAE paging and nested NPT has roots without shadow pages
+ * (a) only PAE paging and nested NPT have roots without shadow pages
+ * (or any shadow paging flavor with a dummy root, see note below)
* (b) remote reloads due to a memslot update obsoletes _all_ roots
* (c) KVM doesn't track previous roots for PAE paging, and the guest
* is unlikely to zap an in-use PGD.
+ *
+ * Note! Dummy roots are unique in that they are obsoleted by memslot
+ * _creation_! See also FNAME(fetch).
*/
- sp = to_shadow_page(root_hpa);
+ sp = root_to_sp(root_hpa);
return !sp || is_obsolete_sp(kvm, sp);
}
{
unsigned offset, pte_size, misaligned;
- pgprintk("misaligned: gpa %llx bytes %d role %x\n",
- gpa, bytes, sp->role.word);
-
offset = offset_in_page(gpa);
pte_size = sp->role.has_4_byte_gpte ? 4 : 8;
return spte;
}
-static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes,
- struct kvm_page_track_notifier_node *node)
+void kvm_mmu_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
+ int bytes)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
if (!READ_ONCE(vcpu->kvm->arch.indirect_shadow_pages))
return;
- pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
-
write_lock(&vcpu->kvm->mmu_lock);
gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes);
int r, emulation_type = EMULTYPE_PF;
bool direct = vcpu->arch.mmu->root_role.direct;
- if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+ /*
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP
+ * checks when emulating instructions that triggers implicit access.
+ * WARN if hardware generates a fault with an error code that collides
+ * with the KVM-defined value. Clear the flag and continue on, i.e.
+ * don't terminate the VM, as KVM can't possibly be relying on a flag
+ * that KVM doesn't know about.
+ */
+ if (WARN_ON_ONCE(error_code & PFERR_IMPLICIT_ACCESS))
+ error_code &= ~PFERR_IMPLICIT_ACCESS;
+
+ if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
return RET_PF_RETRY;
r = RET_PF_INVALID;
* pages. Skip the bogus page, otherwise we'll get stuck in an
* infinite loop if the page gets put back on the list (again).
*/
- if (WARN_ON(sp->role.invalid))
+ if (WARN_ON_ONCE(sp->role.invalid))
continue;
/*
return unlikely(!list_empty_careful(&kvm->arch.zapped_obsolete_pages));
}
-static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node)
-{
- kvm_mmu_zap_all_fast(kvm);
-}
-
int kvm_mmu_init_vm(struct kvm *kvm)
{
- struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
int r;
INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
return r;
}
- node->track_write = kvm_mmu_pte_write;
- node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
- kvm_page_track_register_notifier(kvm, node);
-
kvm->arch.split_page_header_cache.kmem_cache = mmu_page_header_cache;
kvm->arch.split_page_header_cache.gfp_zero = __GFP_ZERO;
void kvm_mmu_uninit_vm(struct kvm *kvm)
{
- struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
-
- kvm_page_track_unregister_notifier(kvm, node);
-
if (tdp_mmu_enabled)
kvm_mmu_uninit_tdp_mmu(kvm);
*/
if (walk_slot_rmaps(kvm, slot, kvm_mmu_zap_collapsible_spte,
PG_LEVEL_4K, KVM_MAX_HUGEPAGE_LEVEL - 1, true))
- kvm_arch_flush_remote_tlbs_memslot(kvm, slot);
+ kvm_flush_remote_tlbs_memslot(kvm, slot);
}
void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
}
}
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
-{
- /*
- * All current use cases for flushing the TLBs for a specific memslot
- * related to dirty logging, and many do the TLB flush out of mmu_lock.
- * The interaction between the various operations on memslot must be
- * serialized by slots_locks to ensure the TLB flush from one operation
- * is observed by any other operation on the same memslot.
- */
- lockdep_assert_held(&kvm->slots_lock);
- kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
-}
-
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
*/
}
-void kvm_mmu_zap_all(struct kvm *kvm)
+static void kvm_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *sp, *node;
LIST_HEAD(invalid_list);
write_lock(&kvm->mmu_lock);
restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link) {
- if (WARN_ON(sp->role.invalid))
+ if (WARN_ON_ONCE(sp->role.invalid))
continue;
if (__kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list, &ign))
goto restart;
write_unlock(&kvm->mmu_lock);
}
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+ kvm_mmu_zap_all(kvm);
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot)
+{
+ kvm_mmu_zap_all_fast(kvm);
+}
+
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen)
{
- WARN_ON(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
+ WARN_ON_ONCE(gen & KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS);
gen &= MMIO_SPTE_GEN_MASK;
static int get_nx_huge_pages(char *buffer, const struct kernel_param *kp)
{
if (nx_hugepage_mitigation_hard_disabled)
- return sprintf(buffer, "never\n");
+ return sysfs_emit(buffer, "never\n");
return param_get_bool(buffer, kp);
}
#include <linux/kvm_host.h>
#include <asm/kvm_host.h>
-#undef MMU_DEBUG
-
-#ifdef MMU_DEBUG
-extern bool dbg;
-
-#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
-#define rmap_printk(fmt, args...) do { if (dbg) printk("%s: " fmt, __func__, ## args); } while (0)
-#define MMU_WARN_ON(x) WARN_ON(x)
+#ifdef CONFIG_KVM_PROVE_MMU
+#define KVM_MMU_WARN_ON(x) WARN_ON_ONCE(x)
#else
-#define pgprintk(x...) do { } while (0)
-#define rmap_printk(x...) do { } while (0)
-#define MMU_WARN_ON(x) do { } while (0)
+#define KVM_MMU_WARN_ON(x) BUILD_BUG_ON_INVALID(x)
#endif
/* Page table builder macros common to shadow (host) PTEs and guest PTEs. */
#define INVALID_PAE_ROOT 0
#define IS_VALID_PAE_ROOT(x) (!!(x))
+static inline hpa_t kvm_mmu_get_dummy_root(void)
+{
+ return my_zero_pfn(0) << PAGE_SHIFT;
+}
+
+static inline bool kvm_mmu_is_dummy_root(hpa_t shadow_page)
+{
+ return is_zero_pfn(shadow_page >> PAGE_SHIFT);
+}
+
typedef u64 __rcu *tdp_ptep_t;
struct kvm_mmu_page {
struct kvm_memory_slot *slot, u64 gfn,
int min_level);
-void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t start_gfn,
- gfn_t nr_pages);
-
/* Flush the given page (huge or not) of guest memory. */
static inline void kvm_flush_remote_tlbs_gfn(struct kvm *kvm, gfn_t gfn, int level)
{
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/lockdep.h>
#include <linux/kvm_host.h>
#include <linux/rculist.h>
-#include <asm/kvm_page_track.h>
-
#include "mmu.h"
#include "mmu_internal.h"
+#include "page_track.h"
bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
{
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
{
- int i;
-
- for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
- kvfree(slot->arch.gfn_track[i]);
- slot->arch.gfn_track[i] = NULL;
- }
+ kvfree(slot->arch.gfn_write_track);
+ slot->arch.gfn_write_track = NULL;
}
-int kvm_page_track_create_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- unsigned long npages)
+static int __kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot,
+ unsigned long npages)
{
- int i;
-
- for (i = 0; i < KVM_PAGE_TRACK_MAX; i++) {
- if (i == KVM_PAGE_TRACK_WRITE &&
- !kvm_page_track_write_tracking_enabled(kvm))
- continue;
-
- slot->arch.gfn_track[i] =
- __vcalloc(npages, sizeof(*slot->arch.gfn_track[i]),
- GFP_KERNEL_ACCOUNT);
- if (!slot->arch.gfn_track[i])
- goto track_free;
- }
+ const size_t size = sizeof(*slot->arch.gfn_write_track);
- return 0;
+ if (!slot->arch.gfn_write_track)
+ slot->arch.gfn_write_track = __vcalloc(npages, size,
+ GFP_KERNEL_ACCOUNT);
-track_free:
- kvm_page_track_free_memslot(slot);
- return -ENOMEM;
+ return slot->arch.gfn_write_track ? 0 : -ENOMEM;
}
-static inline bool page_track_mode_is_valid(enum kvm_page_track_mode mode)
+int kvm_page_track_create_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ unsigned long npages)
{
- if (mode < 0 || mode >= KVM_PAGE_TRACK_MAX)
- return false;
+ if (!kvm_page_track_write_tracking_enabled(kvm))
+ return 0;
- return true;
+ return __kvm_page_track_write_tracking_alloc(slot, npages);
}
int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot)
{
- unsigned short *gfn_track;
-
- if (slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE])
- return 0;
-
- gfn_track = __vcalloc(slot->npages, sizeof(*gfn_track),
- GFP_KERNEL_ACCOUNT);
- if (gfn_track == NULL)
- return -ENOMEM;
-
- slot->arch.gfn_track[KVM_PAGE_TRACK_WRITE] = gfn_track;
- return 0;
+ return __kvm_page_track_write_tracking_alloc(slot, slot->npages);
}
-static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode, short count)
+static void update_gfn_write_track(struct kvm_memory_slot *slot, gfn_t gfn,
+ short count)
{
int index, val;
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
- val = slot->arch.gfn_track[mode][index];
+ val = slot->arch.gfn_write_track[index];
- if (WARN_ON(val + count < 0 || val + count > USHRT_MAX))
+ if (WARN_ON_ONCE(val + count < 0 || val + count > USHRT_MAX))
return;
- slot->arch.gfn_track[mode][index] += count;
+ slot->arch.gfn_write_track[index] += count;
}
-/*
- * add guest page to the tracking pool so that corresponding access on that
- * page will be intercepted.
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_add_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode)
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn)
{
+ lockdep_assert_held_write(&kvm->mmu_lock);
- if (WARN_ON(!page_track_mode_is_valid(mode)))
- return;
+ lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+ srcu_read_lock_held(&kvm->srcu));
- if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
- !kvm_page_track_write_tracking_enabled(kvm)))
+ if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
return;
- update_gfn_track(slot, gfn, mode, 1);
+ update_gfn_write_track(slot, gfn, 1);
/*
* new track stops large page mapping for the
*/
kvm_mmu_gfn_disallow_lpage(slot, gfn);
- if (mode == KVM_PAGE_TRACK_WRITE)
- if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
- kvm_flush_remote_tlbs(kvm);
+ if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn, PG_LEVEL_4K))
+ kvm_flush_remote_tlbs(kvm);
}
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
-/*
- * remove the guest page from the tracking pool which stops the interception
- * of corresponding access on that page. It is the opposed operation of
- * kvm_slot_page_track_add_page().
- *
- * It should be called under the protection both of mmu-lock and kvm->srcu
- * or kvm->slots_lock.
- *
- * @kvm: the guest instance we are interested in.
- * @slot: the @gfn belongs to.
- * @gfn: the guest page.
- * @mode: tracking mode, currently only write track is supported.
- */
-void kvm_slot_page_track_remove_page(struct kvm *kvm,
- struct kvm_memory_slot *slot, gfn_t gfn,
- enum kvm_page_track_mode mode)
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn)
{
- if (WARN_ON(!page_track_mode_is_valid(mode)))
- return;
+ lockdep_assert_held_write(&kvm->mmu_lock);
- if (WARN_ON(mode == KVM_PAGE_TRACK_WRITE &&
- !kvm_page_track_write_tracking_enabled(kvm)))
+ lockdep_assert_once(lockdep_is_held(&kvm->slots_lock) ||
+ srcu_read_lock_held(&kvm->srcu));
+
+ if (KVM_BUG_ON(!kvm_page_track_write_tracking_enabled(kvm), kvm))
return;
- update_gfn_track(slot, gfn, mode, -1);
+ update_gfn_write_track(slot, gfn, -1);
/*
* allow large page mapping for the tracked page
*/
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
-EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
/*
* check if the corresponding access on the specified guest page is tracked.
*/
-bool kvm_slot_page_track_is_active(struct kvm *kvm,
- const struct kvm_memory_slot *slot,
- gfn_t gfn, enum kvm_page_track_mode mode)
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+ const struct kvm_memory_slot *slot, gfn_t gfn)
{
int index;
- if (WARN_ON(!page_track_mode_is_valid(mode)))
- return false;
-
if (!slot)
return false;
- if (mode == KVM_PAGE_TRACK_WRITE &&
- !kvm_page_track_write_tracking_enabled(kvm))
+ if (!kvm_page_track_write_tracking_enabled(kvm))
return false;
index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K);
- return !!READ_ONCE(slot->arch.gfn_track[mode][index]);
+ return !!READ_ONCE(slot->arch.gfn_write_track[index]);
}
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
void kvm_page_track_cleanup(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
* register the notifier so that event interception for the tracked guest
* pages can be received.
*/
-void
-kvm_page_track_register_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n)
+int kvm_page_track_register_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n)
{
struct kvm_page_track_notifier_head *head;
+ if (!kvm || kvm->mm != current->mm)
+ return -ESRCH;
+
+ kvm_get_kvm(kvm);
+
head = &kvm->arch.track_notifier_head;
write_lock(&kvm->mmu_lock);
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
write_unlock(&kvm->mmu_lock);
+ return 0;
}
EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
* stop receiving the event interception. It is the opposed operation of
* kvm_page_track_register_notifier().
*/
-void
-kvm_page_track_unregister_notifier(struct kvm *kvm,
- struct kvm_page_track_notifier_node *n)
+void kvm_page_track_unregister_notifier(struct kvm *kvm,
+ struct kvm_page_track_notifier_node *n)
{
struct kvm_page_track_notifier_head *head;
hlist_del_rcu(&n->node);
write_unlock(&kvm->mmu_lock);
synchronize_srcu(&head->track_srcu);
+
+ kvm_put_kvm(kvm);
}
EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
* The node should figure out if the written page is the one that node is
* interested in by itself.
*/
-void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes)
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes)
{
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
int idx;
- head = &vcpu->kvm->arch.track_notifier_head;
+ head = &kvm->arch.track_notifier_head;
if (hlist_empty(&head->track_notifier_list))
return;
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
- srcu_read_lock_held(&head->track_srcu))
+ srcu_read_lock_held(&head->track_srcu))
if (n->track_write)
- n->track_write(vcpu, gpa, new, bytes, n);
+ n->track_write(gpa, new, bytes, n);
srcu_read_unlock(&head->track_srcu, idx);
}
/*
- * Notify the node that memory slot is being removed or moved so that it can
- * drop write-protection for the pages in the memory slot.
- *
- * The node should figure out it has any write-protected pages in this slot
- * by itself.
+ * Notify external page track nodes that a memory region is being removed from
+ * the VM, e.g. so that users can free any associated metadata.
*/
-void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
{
struct kvm_page_track_notifier_head *head;
struct kvm_page_track_notifier_node *n;
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_srcu(n, &head->track_notifier_list, node,
- srcu_read_lock_held(&head->track_srcu))
- if (n->track_flush_slot)
- n->track_flush_slot(kvm, slot, n);
+ srcu_read_lock_held(&head->track_srcu))
+ if (n->track_remove_region)
+ n->track_remove_region(slot->base_gfn, slot->npages, n);
srcu_read_unlock(&head->track_srcu, idx);
}
+
+/*
+ * add guest page to the tracking pool so that corresponding access on that
+ * page will be intercepted.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_add_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+
+ slot = gfn_to_memslot(kvm, gfn);
+ if (!slot) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -EINVAL;
+ }
+
+ write_lock(&kvm->mmu_lock);
+ __kvm_write_track_add_gfn(kvm, slot, gfn);
+ write_unlock(&kvm->mmu_lock);
+
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_add_gfn);
+
+/*
+ * remove the guest page from the tracking pool which stops the interception
+ * of corresponding access on that page.
+ *
+ * @kvm: the guest instance we are interested in.
+ * @gfn: the guest page.
+ */
+int kvm_write_track_remove_gfn(struct kvm *kvm, gfn_t gfn)
+{
+ struct kvm_memory_slot *slot;
+ int idx;
+
+ idx = srcu_read_lock(&kvm->srcu);
+
+ slot = gfn_to_memslot(kvm, gfn);
+ if (!slot) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ return -EINVAL;
+ }
+
+ write_lock(&kvm->mmu_lock);
+ __kvm_write_track_remove_gfn(kvm, slot, gfn);
+ write_unlock(&kvm->mmu_lock);
+
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_track_remove_gfn);
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_PAGE_TRACK_H
+#define __KVM_X86_PAGE_TRACK_H
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_page_track.h>
+
+
+bool kvm_page_track_write_tracking_enabled(struct kvm *kvm);
+int kvm_page_track_write_tracking_alloc(struct kvm_memory_slot *slot);
+
+void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
+int kvm_page_track_create_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ unsigned long npages);
+
+void __kvm_write_track_add_gfn(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn);
+void __kvm_write_track_remove_gfn(struct kvm *kvm,
+ struct kvm_memory_slot *slot, gfn_t gfn);
+
+bool kvm_gfn_is_write_tracked(struct kvm *kvm,
+ const struct kvm_memory_slot *slot, gfn_t gfn);
+
+#ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING
+int kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
+
+void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa, const u8 *new, int bytes);
+void kvm_page_track_delete_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm)
+{
+ return !hlist_empty(&kvm->arch.track_notifier_head.track_notifier_list);
+}
+#else
+static inline int kvm_page_track_init(struct kvm *kvm) { return 0; }
+static inline void kvm_page_track_cleanup(struct kvm *kvm) { }
+
+static inline void __kvm_page_track_write(struct kvm *kvm, gpa_t gpa,
+ const u8 *new, int bytes) { }
+static inline void kvm_page_track_delete_slot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) { }
+
+static inline bool kvm_page_track_has_external_user(struct kvm *kvm) { return false; }
+
+#endif /* CONFIG_KVM_EXTERNAL_WRITE_TRACKING */
+
+static inline void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+ const u8 *new, int bytes)
+{
+ __kvm_page_track_write(vcpu->kvm, gpa, new, bytes);
+
+ kvm_mmu_track_write(vcpu, gpa, new, bytes);
+}
+
+#endif /* __KVM_X86_PAGE_TRACK_H */
}
#endif
walker->max_level = walker->level;
- ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
/*
* FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
pte_access = ~0;
+
+ /*
+ * Queue a page fault for injection if this assertion fails, as callers
+ * assume that walker.fault contains sane info on a walk failure. I.e.
+ * avoid making the situation worse by inducing even worse badness
+ * between when the assertion fails and when KVM kicks the vCPU out to
+ * userspace (because the VM is bugged).
+ */
+ if (KVM_BUG_ON(is_long_mode(vcpu) && !is_pae(vcpu), vcpu->kvm))
+ goto error;
+
++walker->level;
do {
+ struct kvm_memory_slot *slot;
unsigned long host_addr;
pt_access = pte_access;
if (unlikely(real_gpa == INVALID_GPA))
return 0;
- host_addr = kvm_vcpu_gfn_to_hva_prot(vcpu, gpa_to_gfn(real_gpa),
+ slot = kvm_vcpu_gfn_to_memslot(vcpu, gpa_to_gfn(real_gpa));
+ if (!kvm_is_visible_memslot(slot))
+ goto error;
+
+ host_addr = gfn_to_hva_memslot_prot(slot, gpa_to_gfn(real_gpa),
&walker->pte_writable[walker->level - 1]);
if (unlikely(kvm_is_error_hva(host_addr)))
goto error;
goto retry_walk;
}
- pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __func__, (u64)pte, walker->pte_access,
- walker->pt_access[walker->level - 1]);
return 1;
error:
if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
return false;
- pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(gpte);
FNAME(protect_clean_gpte)(vcpu->arch.mmu, &pte_access, gpte);
if (FNAME(gpte_changed)(vcpu, gw, top_level))
goto out_gpte_changed;
- if (WARN_ON(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+ if (WARN_ON_ONCE(!VALID_PAGE(vcpu->arch.mmu->root.hpa)))
+ goto out_gpte_changed;
+
+ /*
+ * Load a new root and retry the faulting instruction in the extremely
+ * unlikely scenario that the guest root gfn became visible between
+ * loading a dummy root and handling the resulting page fault, e.g. if
+ * userspace create a memslot in the interim.
+ */
+ if (unlikely(kvm_mmu_is_dummy_root(vcpu->arch.mmu->root.hpa))) {
+ kvm_make_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu);
goto out_gpte_changed;
+ }
for_each_shadow_entry(vcpu, fault->addr, it) {
gfn_t table_gfn;
struct guest_walker walker;
int r;
- pgprintk("%s: addr %lx err %x\n", __func__, fault->addr, fault->error_code);
WARN_ON_ONCE(fault->is_tdp);
/*
* The page is not mapped by the guest. Let the guest handle it.
*/
if (!r) {
- pgprintk("%s: guest page fault\n", __func__);
if (!fault->prefetch)
kvm_inject_emulated_page_fault(vcpu, &walker.fault);
{
int offset = 0;
- WARN_ON(sp->role.level != PG_LEVEL_4K);
+ WARN_ON_ONCE(sp->role.level != PG_LEVEL_4K);
if (PTTYPE == 32)
offset = sp->role.quadrant << SPTE_LEVEL_BITS;
{
u64 mask;
- WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
+ WARN_ON_ONCE(gen & ~MMIO_SPTE_GEN_MASK);
mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
* shadow pages and unsync'ing pages is not allowed.
*/
if (mmu_try_to_unsync_pages(vcpu->kvm, slot, gfn, can_unsync, prefetch)) {
- pgprintk("%s: found shadow page for %llx, marking ro\n",
- __func__, gfn);
wrprot = true;
pte_access &= ~ACC_WRITE_MASK;
spte &= ~(PT_WRITABLE_MASK | shadow_mmu_writable_mask);
if ((spte & PT_WRITABLE_MASK) && kvm_slot_dirty_track_enabled(slot)) {
/* Enforced by kvm_mmu_hugepage_adjust. */
- WARN_ON(level > PG_LEVEL_4K);
+ WARN_ON_ONCE(level > PG_LEVEL_4K);
mark_page_dirty_in_slot(vcpu->kvm, slot, gfn);
}
#ifndef KVM_X86_MMU_SPTE_H
#define KVM_X86_MMU_SPTE_H
+#include "mmu.h"
#include "mmu_internal.h"
/*
return to_shadow_page(__pa(sptep));
}
+static inline struct kvm_mmu_page *root_to_sp(hpa_t root)
+{
+ if (kvm_mmu_is_dummy_root(root))
+ return NULL;
+
+ /*
+ * The "root" may be a special root, e.g. a PAE entry, treat it as a
+ * SPTE to ensure any non-PA bits are dropped.
+ */
+ return spte_to_child_sp(root);
+}
+
static inline bool is_mmio_spte(u64 spte)
{
return (spte & shadow_mmio_mask) == shadow_mmio_value &&
static inline bool spte_ad_enabled(u64 spte)
{
- MMU_WARN_ON(!is_shadow_present_pte(spte));
+ KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
return (spte & SPTE_TDP_AD_MASK) != SPTE_TDP_AD_DISABLED;
}
static inline bool spte_ad_need_write_protect(u64 spte)
{
- MMU_WARN_ON(!is_shadow_present_pte(spte));
+ KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
/*
* This is benign for non-TDP SPTEs as SPTE_TDP_AD_ENABLED is '0',
* and non-TDP SPTEs will never set these bits. Optimize for 64-bit
static inline u64 spte_shadow_accessed_mask(u64 spte)
{
- MMU_WARN_ON(!is_shadow_present_pte(spte));
+ KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
return spte_ad_enabled(spte) ? shadow_accessed_mask : 0;
}
static inline u64 spte_shadow_dirty_mask(u64 spte)
{
- MMU_WARN_ON(!is_shadow_present_pte(spte));
+ KVM_MMU_WARN_ON(!is_shadow_present_pte(spte));
return spte_ad_enabled(spte) ? shadow_dirty_mask : 0;
}
void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
int min_level, gfn_t next_last_level_gfn)
{
- int root_level = root->role.level;
-
- WARN_ON(root_level < 1);
- WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
+ if (WARN_ON_ONCE(!root || (root->role.level < 1) ||
+ (root->role.level > PT64_ROOT_MAX_LEVEL))) {
+ iter->valid = false;
+ return;
+ }
iter->next_last_level_gfn = next_last_level_gfn;
- iter->root_level = root_level;
+ iter->root_level = root->role.level;
iter->min_level = min_level;
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root->spt;
iter->as_id = kvm_mmu_page_as_id(root);
bool is_leaf = is_present && is_last_spte(new_spte, level);
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
- WARN_ON(level > PT64_ROOT_MAX_LEVEL);
- WARN_ON(level < PG_LEVEL_4K);
- WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+ WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
+ WARN_ON_ONCE(level < PG_LEVEL_4K);
+ WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
/*
* If this warning were to trigger it would indicate that there was a
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
- if (WARN_ON(!is_mmio_spte(old_spte) &&
- !is_mmio_spte(new_spte) &&
- !is_removed_spte(new_spte)))
+ if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
+ !is_mmio_spte(new_spte) &&
+ !is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
"different nonpresent SPTE, unless one or both\n"
* should be used. If operating under the MMU lock in write mode, the
* use of the removed SPTE should not be necessary.
*/
- WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
+ WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
else
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
- for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
+ for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
/*
* Yield if the MMU lock is contended or this thread needs to return control
struct tdp_iter *iter,
bool flush, bool shared)
{
- WARN_ON(iter->yielded);
+ WARN_ON_ONCE(iter->yielded);
/* Ensure forward progress has been made before yielding. */
if (iter->next_last_level_gfn == iter->yielded_gfn)
rcu_read_lock();
- WARN_ON(iter->gfn > iter->next_last_level_gfn);
+ WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
iter->yielded = true;
}
u64 new_spte;
/* Huge pages aren't expected to be modified without first being zapped. */
- WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+ WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
if (iter->level != PG_LEVEL_4K ||
!is_shadow_present_pte(iter->old_spte))
*/
tdp_mmu_iter_set_spte(kvm, iter, 0);
- if (!pte_write(range->pte)) {
+ if (!pte_write(range->arg.pte)) {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
- pte_pfn(range->pte));
+ pte_pfn(range->arg.pte));
tdp_mmu_iter_set_spte(kvm, iter, new_spte);
}
if (!is_shadow_present_pte(iter.old_spte))
continue;
- MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
if (!(iter.old_spte & dbit))
continue;
shadow_dirty_mask;
struct tdp_iter iter;
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
rcu_read_lock();
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
if (!mask)
break;
- MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
if (iter.level > PG_LEVEL_4K ||
!(mask & (1UL << (iter.gfn - gfn))))
{
struct kvm_mmu_page *root;
- lockdep_assert_held_write(&kvm->mmu_lock);
for_each_tdp_mmu_root(kvm, root, slot->as_id)
clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
}
struct kvm_x86_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
- if (!static_call(kvm_x86_pmu_hw_event_available)(pmc))
- return false;
-
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (!filter)
return true;
static bool pmc_event_is_allowed(struct kvm_pmc *pmc)
{
return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) &&
+ static_call(kvm_x86_pmu_hw_event_available)(pmc) &&
check_pmu_event_filter(pmc);
}
/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define X86_FEATURE_AMX_COMPLEX KVM_X86_FEATURE(CPUID_7_1_EDX, 8)
#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
/* CPUID level 0x80000007 (EDX). */
int ret = 0;
unsigned long flags;
struct amd_svm_iommu_ir *ir;
+ u64 entry;
/**
* In some cases, the existing irte is updated and re-set,
ir->data = pi->ir_data;
spin_lock_irqsave(&svm->ir_list_lock, flags);
+
+ /*
+ * Update the target pCPU for IOMMU doorbells if the vCPU is running.
+ * If the vCPU is NOT running, i.e. is blocking or scheduled out, KVM
+ * will update the pCPU info when the vCPU awkened and/or scheduled in.
+ * See also avic_vcpu_load().
+ */
+ entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
+ amd_iommu_update_ga(entry & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK,
+ true, pi->ir_data);
+
list_add(&ir->node, &svm->ir_list);
spin_unlock_irqrestore(&svm->ir_list_lock, flags);
out:
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
int ret = 0;
- unsigned long flags;
struct amd_svm_iommu_ir *ir;
struct vcpu_svm *svm = to_svm(vcpu);
+ lockdep_assert_held(&svm->ir_list_lock);
+
if (!kvm_arch_has_assigned_device(vcpu->kvm))
return 0;
* Here, we go through the per-vcpu ir_list to update all existing
* interrupt remapping table entry targeting this vcpu.
*/
- spin_lock_irqsave(&svm->ir_list_lock, flags);
-
if (list_empty(&svm->ir_list))
- goto out;
+ return 0;
list_for_each_entry(ir, &svm->ir_list, node) {
ret = amd_iommu_update_ga(cpu, r, ir->data);
if (ret)
- break;
+ return ret;
}
-out:
- spin_unlock_irqrestore(&svm->ir_list_lock, flags);
- return ret;
+ return 0;
}
void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long flags;
lockdep_assert_preemption_disabled();
if (kvm_vcpu_is_blocking(vcpu))
return;
+ /*
+ * Grab the per-vCPU interrupt remapping lock even if the VM doesn't
+ * _currently_ have assigned devices, as that can change. Holding
+ * ir_list_lock ensures that either svm_ir_list_add() will consume
+ * up-to-date entry information, or that this task will wait until
+ * svm_ir_list_add() completes to set the new target pCPU.
+ */
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+
entry = READ_ONCE(*(svm->avic_physical_id_cache));
WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
+
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
}
void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
+ unsigned long flags;
lockdep_assert_preemption_disabled();
+ /*
+ * Note, reading the Physical ID entry outside of ir_list_lock is safe
+ * as only the pCPU that has loaded (or is loading) the vCPU is allowed
+ * to modify the entry, and preemption is disabled. I.e. the vCPU
+ * can't be scheduled out and thus avic_vcpu_{put,load}() can't run
+ * recursively.
+ */
entry = READ_ONCE(*(svm->avic_physical_id_cache));
/* Nothing to do if IsRunning == '0' due to vCPU blocking. */
if (!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK))
return;
+ /*
+ * Take and hold the per-vCPU interrupt remapping lock while updating
+ * the Physical ID entry even though the lock doesn't protect against
+ * multiple writers (see above). Holding ir_list_lock ensures that
+ * either svm_ir_list_add() will consume up-to-date entry information,
+ * or that this task will wait until svm_ir_list_add() completes to
+ * mark the vCPU as not running.
+ */
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
+
avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
+
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
+
}
void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
{
- if (!svm->v_vmload_vmsave_enabled)
+ if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
return true;
if (!nested_npt_enabled(svm))
bool new_vmcb12 = false;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
nested_vmcb02_compute_g_pat(svm);
vmcb_mark_dirty(vmcb02, VMCB_DT);
}
- kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+ kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
- svm_set_efer(&svm->vcpu, svm->nested.save.efer);
+ svm_set_efer(vcpu, svm->nested.save.efer);
- svm_set_cr0(&svm->vcpu, svm->nested.save.cr0);
- svm_set_cr4(&svm->vcpu, svm->nested.save.cr4);
+ svm_set_cr0(vcpu, svm->nested.save.cr0);
+ svm_set_cr4(vcpu, svm->nested.save.cr4);
svm->vcpu.arch.cr2 = vmcb12->save.cr2;
- kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
- kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
- kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
+ kvm_rax_write(vcpu, vmcb12->save.rax);
+ kvm_rsp_write(vcpu, vmcb12->save.rsp);
+ kvm_rip_write(vcpu, vmcb12->save.rip);
/* In case we don't even reach vcpu_run, the fields are not updated */
vmcb02->save.rax = vmcb12->save.rax;
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
- if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
/*
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
* exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
*/
- if (svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
+ if (guest_can_use(vcpu, X86_FEATURE_VGIF) &&
+ (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK))
int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
else
int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
- if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
- WARN_ON(!svm->tsc_scaling_enabled);
+ if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+ svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
nested_svm_update_tsc_ratio_msr(vcpu);
- }
vmcb02->control.int_ctl =
(svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
* what a nrips=0 CPU would do (L1 is responsible for advancing RIP
* prior to injecting the event).
*/
- if (svm->nrips_enabled)
+ if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
vmcb02->control.next_rip = svm->nested.ctl.next_rip;
else if (boot_cpu_has(X86_FEATURE_NRIPS))
vmcb02->control.next_rip = vmcb12_rip;
svm->soft_int_injected = true;
svm->soft_int_csbase = vmcb12_csbase;
svm->soft_int_old_rip = vmcb12_rip;
- if (svm->nrips_enabled)
+ if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
svm->soft_int_next_rip = svm->nested.ctl.next_rip;
else
svm->soft_int_next_rip = vmcb12_rip;
vmcb02->control.virt_ext = vmcb01->control.virt_ext &
LBR_CTL_ENABLE_MASK;
- if (svm->lbrv_enabled)
+ if (guest_can_use(vcpu, X86_FEATURE_LBRV))
vmcb02->control.virt_ext |=
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK);
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
- pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
- pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
+ if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER))
+ pause_count12 = svm->nested.ctl.pause_filter_count;
+ else
+ pause_count12 = 0;
+ if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD))
+ pause_thresh12 = svm->nested.ctl.pause_filter_thresh;
+ else
+ pause_thresh12 = 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
/* use guest values since host doesn't intercept PAUSE */
vmcb02->control.pause_filter_count = pause_count12;
if (vmcb12->control.exit_code != SVM_EXIT_ERR)
nested_save_pending_event_to_vmcb12(svm, vmcb12);
- if (svm->nrips_enabled)
+ if (guest_can_use(vcpu, X86_FEATURE_NRIPS))
vmcb12->control.next_rip = vmcb02->control.next_rip;
vmcb12->control.int_ctl = svm->nested.ctl.int_ctl;
if (!nested_exit_on_intr(svm))
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
- if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
svm_copy_lbrs(vmcb12, vmcb02);
svm_update_lbrv(vcpu);
} else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) {
vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
}
- if (svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) {
- WARN_ON(!svm->tsc_scaling_enabled);
+ if (kvm_caps.has_tsc_control &&
+ vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
- __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+ svm_write_tsc_multiplier(vcpu);
}
svm->nested.ctl.nested_cr3 = 0;
vcpu->arch.tsc_scaling_ratio =
kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
svm->tsc_ratio_msr);
- __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+ svm_write_tsc_multiplier(vcpu);
}
/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
#include <asm/pkru.h>
#include <asm/trapnr.h>
#include <asm/fpu/xcr.h>
+#include <asm/debugreg.h>
#include "mmu.h"
#include "x86.h"
/* enable/disable SEV-ES support */
static bool sev_es_enabled = true;
module_param_named(sev_es, sev_es_enabled, bool, 0444);
+
+/* enable/disable SEV-ES DebugSwap support */
+static bool sev_es_debug_swap_enabled = true;
+module_param_named(debug_swap, sev_es_debug_swap_enabled, bool, 0444);
#else
#define sev_enabled false
#define sev_es_enabled false
+#define sev_es_debug_swap_enabled false
#endif /* CONFIG_KVM_AMD_SEV */
static u8 sev_enc_bit;
save->xss = svm->vcpu.arch.ia32_xss;
save->dr6 = svm->vcpu.arch.dr6;
+ if (sev_es_debug_swap_enabled)
+ save->sev_features |= SVM_SEV_FEAT_DEBUG_SWAP;
+
pr_debug("Virtual Machine Save Area (VMSA):\n");
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
struct vcpu_svm *svm = to_svm(vcpu);
int ret;
+ if (vcpu->guest_debug) {
+ pr_warn_once("KVM_SET_GUEST_DEBUG for SEV-ES guest is not supported");
+ return -EINVAL;
+ }
+
/* Perform some pre-encryption checks against the VMSA */
ret = sev_es_sync_vmsa(svm);
if (ret)
* Note, the source is not required to have the same number of
* vCPUs as the destination when migrating a vanilla SEV VM.
*/
- src_vcpu = kvm_get_vcpu(dst_kvm, i);
+ src_vcpu = kvm_get_vcpu(src_kvm, i);
src_svm = to_svm(src_vcpu);
/*
bool sev_es_supported = false;
bool sev_supported = false;
- if (!sev_enabled || !npt_enabled)
+ if (!sev_enabled || !npt_enabled || !nrips)
goto out;
/*
sev_enabled = sev_supported;
sev_es_enabled = sev_es_supported;
+ if (!sev_es_enabled || !cpu_feature_enabled(X86_FEATURE_DEBUG_SWAP) ||
+ !cpu_feature_enabled(X86_FEATURE_NO_NESTED_DATA_BP))
+ sev_es_debug_swap_enabled = false;
#endif
}
svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
- ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
+ ++vcpu->stat.nmi_window_exits;
+ svm->nmi_masked = false;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ ret = 1;
break;
case SVM_VMGEXIT_AP_HLT_LOOP:
ret = kvm_emulate_ap_reset_hold(vcpu);
static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
+ struct vmcb *vmcb = svm->vmcb01.ptr;
struct kvm_vcpu *vcpu = &svm->vcpu;
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
/*
* An SEV-ES guest requires a VMSA area that is a separate from the
* VMCB page. Do not include the encryption mask on the VMSA physical
- * address since hardware will access it using the guest key.
+ * address since hardware will access it using the guest key. Note,
+ * the VMSA will be NULL if this vCPU is the destination for intrahost
+ * migration, and will be copied later.
*/
- svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
+ if (svm->sev_es.vmsa)
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
svm_set_intercept(svm, TRAP_CR4_WRITE);
svm_set_intercept(svm, TRAP_CR8_WRITE);
- /* No support for enable_vmware_backdoor */
- clr_exception_intercept(svm, GP_VECTOR);
+ vmcb->control.intercepts[INTERCEPT_DR] = 0;
+ if (!sev_es_debug_swap_enabled) {
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+ recalc_intercepts(svm);
+ } else {
+ /*
+ * Disable #DB intercept iff DebugSwap is enabled. KVM doesn't
+ * allow debugging SEV-ES guests, and enables DebugSwap iff
+ * NO_NESTED_DATA_BP is supported, so there's no reason to
+ * intercept #DB when DebugSwap is enabled. For simplicity
+ * with respect to guest debug, intercept #DB for other VMs
+ * even if NO_NESTED_DATA_BP is supported, i.e. even if the
+ * guest can't DoS the CPU with infinite #DB vectoring.
+ */
+ clr_exception_intercept(svm, DB_VECTOR);
+ }
/* Can't intercept XSETBV, HV can't modify XCR0 directly */
svm_clr_intercept(svm, INTERCEPT_XSETBV);
svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
clr_exception_intercept(svm, UD_VECTOR);
+ /*
+ * Don't intercept #GP for SEV guests, e.g. for the VMware backdoor, as
+ * KVM can't decrypt guest memory to decode the faulting instruction.
+ */
+ clr_exception_intercept(svm, GP_VECTOR);
+
if (sev_es_guest(svm->vcpu.kvm))
sev_es_init_vmcb(svm);
}
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa)
{
/*
- * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
- * of which one step is to perform a VMLOAD. KVM performs the
- * corresponding VMSAVE in svm_prepare_guest_switch for both
- * traditional and SEV-ES guests.
+ * All host state for SEV-ES guests is categorized into three swap types
+ * based on how it is handled by hardware during a world switch:
+ *
+ * A: VMRUN: Host state saved in host save area
+ * VMEXIT: Host state loaded from host save area
+ *
+ * B: VMRUN: Host state _NOT_ saved in host save area
+ * VMEXIT: Host state loaded from host save area
+ *
+ * C: VMRUN: Host state _NOT_ saved in host save area
+ * VMEXIT: Host state initialized to default(reset) values
+ *
+ * Manually save type-B state, i.e. state that is loaded by VMEXIT but
+ * isn't saved by VMRUN, that isn't already saved by VMSAVE (performed
+ * by common SVM code).
*/
-
- /* XCR0 is restored on VMEXIT, save the current host value */
hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
-
- /* PKRU is restored on VMEXIT, save the current host value */
hostsa->pkru = read_pkru();
-
- /* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
hostsa->xss = host_xss;
+
+ /*
+ * If DebugSwap is enabled, debug registers are loaded but NOT saved by
+ * the CPU (Type-B). If DebugSwap is disabled/unsupported, the CPU both
+ * saves and loads debug registers (Type-A).
+ */
+ if (sev_es_debug_swap_enabled) {
+ hostsa->dr0 = native_get_debugreg(0);
+ hostsa->dr1 = native_get_debugreg(1);
+ hostsa->dr2 = native_get_debugreg(2);
+ hostsa->dr3 = native_get_debugreg(3);
+ hostsa->dr0_addr_mask = amd_get_dr_addr_mask(0);
+ hostsa->dr1_addr_mask = amd_get_dr_addr_mask(1);
+ hostsa->dr2_addr_mask = amd_get_dr_addr_mask(2);
+ hostsa->dr3_addr_mask = amd_get_dr_addr_mask(3);
+ }
}
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
#include <asm/spec-ctrl.h>
#include <asm/cpu_device_id.h>
#include <asm/traps.h>
+#include <asm/reboot.h>
#include <asm/fpu/api.h>
-#include <asm/virtext.h>
-
#include <trace/events/ipi.h>
#include "trace.h"
module_param(nested, int, S_IRUGO);
/* enable/disable Next RIP Save */
-static int nrips = true;
+int nrips = true;
module_param(nrips, int, 0444);
/* enable/disable Virtual VMLOAD VMSAVE */
svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
}
+static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
+ void *insn, int insn_len);
static int __svm_skip_emulated_instruction(struct kvm_vcpu *vcpu,
bool commit_side_effects)
}
if (!svm->next_rip) {
+ /*
+ * FIXME: Drop this when kvm_emulate_instruction() does the
+ * right thing and treats "can't emulate" as outright failure
+ * for EMULTYPE_SKIP.
+ */
+ if (!svm_can_emulate_instruction(vcpu, EMULTYPE_SKIP, NULL, 0))
+ return 0;
+
if (unlikely(!commit_side_effects))
old_rflags = svm->vmcb->save.rflags;
vcpu->arch.osvw.status |= 1;
}
-static bool kvm_is_svm_supported(void)
+static bool __kvm_is_svm_supported(void)
{
- int cpu = raw_smp_processor_id();
- const char *msg;
+ int cpu = smp_processor_id();
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
u64 vm_cr;
- if (!cpu_has_svm(&msg)) {
- pr_err("SVM not supported by CPU %d, %s\n", cpu, msg);
+ if (c->x86_vendor != X86_VENDOR_AMD &&
+ c->x86_vendor != X86_VENDOR_HYGON) {
+ pr_err("CPU %d isn't AMD or Hygon\n", cpu);
+ return false;
+ }
+
+ if (!cpu_has(c, X86_FEATURE_SVM)) {
+ pr_err("SVM not supported by CPU %d\n", cpu);
return false;
}
return true;
}
+static bool kvm_is_svm_supported(void)
+{
+ bool supported;
+
+ migrate_disable();
+ supported = __kvm_is_svm_supported();
+ migrate_enable();
+
+ return supported;
+}
+
static int svm_check_processor_compat(void)
{
- if (!kvm_is_svm_supported())
+ if (!__kvm_is_svm_supported())
return -EIO;
return 0;
}
-void __svm_write_tsc_multiplier(u64 multiplier)
+static void __svm_write_tsc_multiplier(u64 multiplier)
{
- preempt_disable();
-
if (multiplier == __this_cpu_read(current_tsc_ratio))
- goto out;
+ return;
wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
__this_cpu_write(current_tsc_ratio, multiplier);
-out:
- preempt_enable();
+}
+
+static inline void kvm_cpu_svm_disable(void)
+{
+ uint64_t efer;
+
+ wrmsrl(MSR_VM_HSAVE_PA, 0);
+ rdmsrl(MSR_EFER, efer);
+ if (efer & EFER_SVME) {
+ /*
+ * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
+ * NMI aren't blocked.
+ */
+ stgi();
+ wrmsrl(MSR_EFER, efer & ~EFER_SVME);
+ }
+}
+
+static void svm_emergency_disable(void)
+{
+ kvm_rebooting = true;
+
+ kvm_cpu_svm_disable();
}
static void svm_hardware_disable(void)
if (tsc_scaling)
__svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
- cpu_svm_disable();
+ kvm_cpu_svm_disable();
amd_pmu_disable_virt();
}
}
+static void set_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
+ vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
+
+ recalc_intercepts(svm);
+}
+
+static void clr_dr_intercepts(struct vcpu_svm *svm)
+{
+ struct vmcb *vmcb = svm->vmcb01.ptr;
+
+ vmcb->control.intercepts[INTERCEPT_DR] = 0;
+
+ recalc_intercepts(svm);
+}
+
static int direct_access_msr_slot(u32 msr)
{
u32 i;
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
-static int svm_get_lbr_msr(struct vcpu_svm *svm, u32 index)
+static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
{
/*
- * If the LBR virtualization is disabled, the LBR msrs are always
- * kept in the vmcb01 to avoid copying them on nested guest entries.
- *
- * If nested, and the LBR virtualization is enabled/disabled, the msrs
- * are moved between the vmcb01 and vmcb02 as needed.
+ * If LBR virtualization is disabled, the LBR MSRs are always kept in
+ * vmcb01. If LBR virtualization is enabled and L1 is running VMs of
+ * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
*/
- struct vmcb *vmcb =
- (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK) ?
- svm->vmcb : svm->vmcb01.ptr;
-
- switch (index) {
- case MSR_IA32_DEBUGCTLMSR:
- return vmcb->save.dbgctl;
- case MSR_IA32_LASTBRANCHFROMIP:
- return vmcb->save.br_from;
- case MSR_IA32_LASTBRANCHTOIP:
- return vmcb->save.br_to;
- case MSR_IA32_LASTINTFROMIP:
- return vmcb->save.last_excp_from;
- case MSR_IA32_LASTINTTOIP:
- return vmcb->save.last_excp_to;
- default:
- KVM_BUG(false, svm->vcpu.kvm,
- "%s: Unknown MSR 0x%x", __func__, index);
- return 0;
- }
+ return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
+ svm->vmcb01.ptr;
}
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
-
- bool enable_lbrv = svm_get_lbr_msr(svm, MSR_IA32_DEBUGCTLMSR) &
- DEBUGCTLMSR_LBR;
-
- bool current_enable_lbrv = !!(svm->vmcb->control.virt_ext &
- LBR_CTL_ENABLE_MASK);
-
- if (unlikely(is_guest_mode(vcpu) && svm->lbrv_enabled))
- if (unlikely(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))
- enable_lbrv = true;
+ bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
+ bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+ (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv == current_enable_lbrv)
return;
return svm->tsc_ratio_msr;
}
-static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void svm_write_tsc_offset(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset;
- svm->vmcb->control.tsc_offset = offset;
+ svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset;
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{
- __svm_write_tsc_multiplier(multiplier);
+ preempt_disable();
+ if (to_svm(vcpu)->guest_state_loaded)
+ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
+ preempt_enable();
}
-
/* Evaluate instruction intercepts that depend on guest CPUID features. */
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm)
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 0, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 0, 0);
-
- svm->v_vmload_vmsave_enabled = false;
} else {
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
* Guest access to VMware backdoor ports could legitimately
* trigger #GP because of TSS I/O permission bitmap.
* We intercept those #GP and allow access to them anyway
- * as VMware does. Don't intercept #GP for SEV guests as KVM can't
- * decrypt guest memory to decode the faulting instruction.
+ * as VMware does.
*/
- if (enable_vmware_backdoor && !sev_guest(vcpu->kvm))
+ if (enable_vmware_backdoor)
set_exception_intercept(svm, GP_VECTOR);
svm_set_intercept(svm, INTERCEPT_INTR);
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (vcpu->arch.guest_state_protected)
+ if (WARN_ON_ONCE(sev_es_guest(vcpu->kvm)))
return;
get_debugreg(vcpu->arch.db[0], 0);
{
struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON_ONCE(sev_es_guest(vcpu->kvm));
+
++vcpu->stat.nmi_window_exits;
svm->awaiting_iret_completion = true;
svm_clr_iret_intercept(svm);
- if (!sev_es_guest(vcpu->kvm))
- svm->nmi_iret_rip = kvm_rip_read(vcpu);
+ svm->nmi_iret_rip = kvm_rip_read(vcpu);
kvm_make_request(KVM_REQ_EVENT, vcpu);
return 1;
unsigned long val;
int err = 0;
+ /*
+ * SEV-ES intercepts DR7 only to disable guest debugging and the guest issues a VMGEXIT
+ * for DR7 write only. KVM cannot change DR7 (always swapped as type 'A') so return early.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return 1;
+
if (vcpu->guest_debug == 0) {
/*
* No more DR vmexits; force a reload of the debug registers
switch (msr_info->index) {
case MSR_AMD64_TSC_RATIO:
- if (!msr_info->host_initiated && !svm->tsc_scaling_enabled)
+ if (!msr_info->host_initiated &&
+ !guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR))
return 1;
msr_info->data = svm->tsc_ratio_msr;
break;
msr_info->data = svm->tsc_aux;
break;
case MSR_IA32_DEBUGCTLMSR:
+ msr_info->data = svm_get_lbr_vmcb(svm)->save.dbgctl;
+ break;
case MSR_IA32_LASTBRANCHFROMIP:
+ msr_info->data = svm_get_lbr_vmcb(svm)->save.br_from;
+ break;
case MSR_IA32_LASTBRANCHTOIP:
+ msr_info->data = svm_get_lbr_vmcb(svm)->save.br_to;
+ break;
case MSR_IA32_LASTINTFROMIP:
+ msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_from;
+ break;
case MSR_IA32_LASTINTTOIP:
- msr_info->data = svm_get_lbr_msr(svm, msr_info->index);
+ msr_info->data = svm_get_lbr_vmcb(svm)->save.last_excp_to;
break;
case MSR_VM_HSAVE_PA:
msr_info->data = svm->nested.hsave_msr;
switch (ecx) {
case MSR_AMD64_TSC_RATIO:
- if (!svm->tsc_scaling_enabled) {
+ if (!guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR)) {
if (!msr->host_initiated)
return 1;
svm->tsc_ratio_msr = data;
- if (svm->tsc_scaling_enabled && is_guest_mode(vcpu))
+ if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) &&
+ is_guest_mode(vcpu))
nested_svm_update_tsc_ratio_msr(vcpu);
break;
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
- if (svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK)
- svm->vmcb->save.dbgctl = data;
- else
- svm->vmcb01.ptr->save.dbgctl = data;
-
+ svm_get_lbr_vmcb(svm)->save.dbgctl = data;
svm_update_lbrv(vcpu);
-
break;
case MSR_VM_HSAVE_PA:
/*
if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */
+ /*
+ * SEV-ES guests are responsible for signaling when a vCPU is ready to
+ * receive a new NMI, as SEV-ES guests can't be single-stepped, i.e.
+ * KVM can't intercept and single-step IRET to detect when NMIs are
+ * unblocked (architecturally speaking). See SVM_VMGEXIT_NMI_COMPLETE.
+ *
+ * Note, GIF is guaranteed to be '1' for SEV-ES guests as hardware
+ * ignores SEV-ES guest writes to EFER.SVME *and* CLGI/STGI are not
+ * supported NAEs in the GHCB protocol.
+ */
+ if (sev_es_guest(vcpu->kvm))
+ return;
+
if (!gif_set(svm)) {
if (vgif)
svm_set_intercept(svm, INTERCEPT_STGI);
svm->soft_int_injected = false;
/*
- * If we've made progress since setting HF_IRET_MASK, we've
+ * If we've made progress since setting awaiting_iret_completion, we've
* executed an IRET and can allow NMI injection.
*/
if (svm->awaiting_iret_completion &&
- (sev_es_guest(vcpu->kvm) ||
- kvm_rip_read(vcpu) != svm->nmi_iret_rip)) {
+ kvm_rip_read(vcpu) != svm->nmi_iret_rip) {
svm->awaiting_iret_completion = false;
svm->nmi_masked = false;
kvm_make_request(KVM_REQ_EVENT, vcpu);
struct vcpu_svm *svm = to_svm(vcpu);
struct kvm_cpuid_entry2 *best;
- vcpu->arch.xsaves_enabled = guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- boot_cpu_has(X86_FEATURE_XSAVE) &&
- boot_cpu_has(X86_FEATURE_XSAVES);
-
- /* Update nrips enabled cache */
- svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
- guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
-
- svm->tsc_scaling_enabled = tsc_scaling && guest_cpuid_has(vcpu, X86_FEATURE_TSCRATEMSR);
- svm->lbrv_enabled = lbrv && guest_cpuid_has(vcpu, X86_FEATURE_LBRV);
-
- svm->v_vmload_vmsave_enabled = vls && guest_cpuid_has(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
-
- svm->pause_filter_enabled = kvm_cpu_cap_has(X86_FEATURE_PAUSEFILTER) &&
- guest_cpuid_has(vcpu, X86_FEATURE_PAUSEFILTER);
+ /*
+ * SVM doesn't provide a way to disable just XSAVES in the guest, KVM
+ * can only disable all variants of by disallowing CR4.OSXSAVE from
+ * being set. As a result, if the host has XSAVE and XSAVES, and the
+ * guest has XSAVE enabled, the guest can execute XSAVES without
+ * faulting. Treat XSAVES as enabled in this case regardless of
+ * whether it's advertised to the guest so that KVM context switches
+ * XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
+ * the guest read/write access to the host's XSS.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+ boot_cpu_has(X86_FEATURE_XSAVES) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+ kvm_governed_feature_set(vcpu, X86_FEATURE_XSAVES);
- svm->pause_threshold_enabled = kvm_cpu_cap_has(X86_FEATURE_PFTHRESHOLD) &&
- guest_cpuid_has(vcpu, X86_FEATURE_PFTHRESHOLD);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_NRIPS);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_TSCRATEMSR);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LBRV);
- svm->vgif_enabled = vgif && guest_cpuid_has(vcpu, X86_FEATURE_VGIF);
+ /*
+ * Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
+ * VMLOAD drops bits 63:32 of SYSENTER (ignoring the fact that exposing
+ * SVM on Intel is bonkers and extremely unlikely to work).
+ */
+ if (!guest_cpuid_is_intel(vcpu))
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
- svm->vnmi_enabled = vnmi && guest_cpuid_has(vcpu, X86_FEATURE_VNMI);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PAUSEFILTER);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_PFTHRESHOLD);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VGIF);
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VNMI);
svm_recalc_instruction_intercepts(vcpu, svm);
* and cannot be decrypted by KVM, i.e. KVM would read cyphertext and
* decode garbage.
*
- * Inject #UD if KVM reached this point without an instruction buffer.
- * In practice, this path should never be hit by a well-behaved guest,
- * e.g. KVM doesn't intercept #UD or #GP for SEV guests, but this path
- * is still theoretically reachable, e.g. via unaccelerated fault-like
- * AVIC access, and needs to be handled by KVM to avoid putting the
- * guest into an infinite loop. Injecting #UD is somewhat arbitrary,
- * but its the least awful option given lack of insight into the guest.
+ * If KVM is NOT trying to simply skip an instruction, inject #UD if
+ * KVM reached this point without an instruction buffer. In practice,
+ * this path should never be hit by a well-behaved guest, e.g. KVM
+ * doesn't intercept #UD or #GP for SEV guests, but this path is still
+ * theoretically reachable, e.g. via unaccelerated fault-like AVIC
+ * access, and needs to be handled by KVM to avoid putting the guest
+ * into an infinite loop. Injecting #UD is somewhat arbitrary, but
+ * its the least awful option given lack of insight into the guest.
+ *
+ * If KVM is trying to skip an instruction, simply resume the guest.
+ * If a #NPF occurs while the guest is vectoring an INT3/INTO, then KVM
+ * will attempt to re-inject the INT3/INTO and skip the instruction.
+ * In that scenario, retrying the INT3/INTO and hoping the guest will
+ * make forward progress is the only option that has a chance of
+ * success (and in practice it will work the vast majority of the time).
*/
if (unlikely(!insn)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
+ if (!(emul_type & EMULTYPE_SKIP))
+ kvm_queue_exception(vcpu, UD_VECTOR);
return false;
}
svm_adjust_mmio_mask();
+ nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS);
+
/*
* Note, SEV setup consumes npt_enabled and enable_mmio_caching (which
- * may be modified by svm_adjust_mmio_mask()).
+ * may be modified by svm_adjust_mmio_mask()), as well as nrips.
*/
sev_hardware_setup();
goto err;
}
- if (nrips) {
- if (!boot_cpu_has(X86_FEATURE_NRIPS))
- nrips = false;
- }
-
enable_apicv = avic = avic && avic_hardware_setup();
if (!enable_apicv) {
.pmu_ops = &amd_pmu_ops,
};
+static void __svm_exit(void)
+{
+ kvm_x86_vendor_exit();
+
+ cpu_emergency_unregister_virt_callback(svm_emergency_disable);
+}
+
static int __init svm_init(void)
{
int r;
if (r)
return r;
+ cpu_emergency_register_virt_callback(svm_emergency_disable);
+
/*
* Common KVM initialization _must_ come last, after this, /dev/kvm is
* exposed to userspace!
return 0;
err_kvm_init:
- kvm_x86_vendor_exit();
+ __svm_exit();
return r;
}
static void __exit svm_exit(void)
{
kvm_exit();
- kvm_x86_vendor_exit();
+ __svm_exit();
}
module_init(svm_init)
#include <asm/svm.h>
#include <asm/sev-common.h>
+#include "cpuid.h"
#include "kvm_cache_regs.h"
#define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
#define MSRPM_OFFSETS 32
extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
+extern int nrips;
extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
unsigned long soft_int_next_rip;
bool soft_int_injected;
- /* optional nested SVM features that are enabled for this guest */
- bool nrips_enabled : 1;
- bool tsc_scaling_enabled : 1;
- bool v_vmload_vmsave_enabled : 1;
- bool lbrv_enabled : 1;
- bool pause_filter_enabled : 1;
- bool pause_threshold_enabled : 1;
- bool vgif_enabled : 1;
- bool vnmi_enabled : 1;
-
u32 ldr_reg;
u32 dfr_reg;
struct page *avic_backing_page;
return test_bit(bit, (unsigned long *)&control->intercepts);
}
-static inline void set_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = svm->vmcb01.ptr;
-
- if (!sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR1_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR2_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR3_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR4_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR5_WRITE);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR6_WRITE);
- }
-
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
-
- recalc_intercepts(svm);
-}
-
-static inline void clr_dr_intercepts(struct vcpu_svm *svm)
-{
- struct vmcb *vmcb = svm->vmcb01.ptr;
-
- vmcb->control.intercepts[INTERCEPT_DR] = 0;
-
- /* DR7 access must remain intercepted for an SEV-ES guest */
- if (sev_es_guest(svm->vcpu.kvm)) {
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_READ);
- vmcb_set_intercept(&vmcb->control, INTERCEPT_DR7_WRITE);
- }
-
- recalc_intercepts(svm);
-}
-
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
{
struct vmcb *vmcb = svm->vmcb01.ptr;
static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
{
- return svm->vgif_enabled && (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
+ return guest_can_use(&svm->vcpu, X86_FEATURE_VGIF) &&
+ (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
}
static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
{
- return svm->vnmi_enabled &&
+ return guest_can_use(&svm->vcpu, X86_FEATURE_VNMI) &&
(svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
}
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
-void __svm_write_tsc_multiplier(u64 multiplier);
+void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
struct vmcb_control_area *control);
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
static inline bool cpu_has_vmx_xsaves(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
- SECONDARY_EXEC_XSAVES;
+ SECONDARY_EXEC_ENABLE_XSAVES;
}
static inline bool cpu_has_vmx_waitpkg(void)
SECONDARY_EXEC_DESC | \
SECONDARY_EXEC_ENABLE_RDTSCP | \
SECONDARY_EXEC_ENABLE_INVPCID | \
- SECONDARY_EXEC_XSAVES | \
+ SECONDARY_EXEC_ENABLE_XSAVES | \
SECONDARY_EXEC_RDSEED_EXITING | \
SECONDARY_EXEC_RDRAND_EXITING | \
SECONDARY_EXEC_TSC_SCALING | \
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_ENABLE_RDTSCP |
- SECONDARY_EXEC_XSAVES |
+ SECONDARY_EXEC_ENABLE_XSAVES |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
* If if it were, XSS would have to be checked against
* the XSS exit bitmap in vmcs12.
*/
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
case EXIT_REASON_UMWAIT:
case EXIT_REASON_TPAUSE:
return nested_cpu_has2(vmcs12,
vmx = to_vmx(vcpu);
vmcs12 = get_vmcs12(vcpu);
- if (nested_vmx_allowed(vcpu) &&
+ if (guest_can_use(vcpu, X86_FEATURE_VMX) &&
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
return -EINVAL;
} else {
- if (!nested_vmx_allowed(vcpu))
+ if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return -EINVAL;
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
return -EINVAL;
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
- (!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
+ (!guest_can_use(vcpu, X86_FEATURE_VMX) ||
+ !vmx->nested.enlightened_vmcs_enabled))
return -EINVAL;
vmx_leave_nested(vcpu);
SECONDARY_EXEC_ENABLE_INVPCID |
SECONDARY_EXEC_ENABLE_VMFUNC |
SECONDARY_EXEC_RDSEED_EXITING |
- SECONDARY_EXEC_XSAVES |
+ SECONDARY_EXEC_ENABLE_XSAVES |
SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
{
- return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_XSAVES);
}
static inline bool nested_cpu_has_pml(struct vmcs12 *vmcs12)
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
+enum intel_pmu_architectural_events {
+ /*
+ * The order of the architectural events matters as support for each
+ * event is enumerated via CPUID using the index of the event.
+ */
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+
+ NR_REAL_INTEL_ARCH_EVENTS,
+
+ /*
+ * Pseudo-architectural event used to implement IA32_FIXED_CTR2, a.k.a.
+ * TSC reference cycles. The architectural reference cycles event may
+ * or may not actually use the TSC as the reference, e.g. might use the
+ * core crystal clock or the bus clock (yeah, "architectural").
+ */
+ PSEUDO_ARCH_REFERENCE_CYCLES = NR_REAL_INTEL_ARCH_EVENTS,
+ NR_INTEL_ARCH_EVENTS,
+};
+
static struct {
u8 eventsel;
u8 unit_mask;
} const intel_arch_events[] = {
- [0] = { 0x3c, 0x00 },
- [1] = { 0xc0, 0x00 },
- [2] = { 0x3c, 0x01 },
- [3] = { 0x2e, 0x4f },
- [4] = { 0x2e, 0x41 },
- [5] = { 0xc4, 0x00 },
- [6] = { 0xc5, 0x00 },
- /* The above index must match CPUID 0x0A.EBX bit vector */
- [7] = { 0x00, 0x03 },
+ [INTEL_ARCH_CPU_CYCLES] = { 0x3c, 0x00 },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED] = { 0xc0, 0x00 },
+ [INTEL_ARCH_REFERENCE_CYCLES] = { 0x3c, 0x01 },
+ [INTEL_ARCH_LLC_REFERENCES] = { 0x2e, 0x4f },
+ [INTEL_ARCH_LLC_MISSES] = { 0x2e, 0x41 },
+ [INTEL_ARCH_BRANCHES_RETIRED] = { 0xc4, 0x00 },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED] = { 0xc5, 0x00 },
+ [PSEUDO_ARCH_REFERENCE_CYCLES] = { 0x00, 0x03 },
};
/* mapping between fixed pmc index and intel_arch_events array */
-static int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {
+ [0] = INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ [1] = INTEL_ARCH_CPU_CYCLES,
+ [2] = PSEUDO_ARCH_REFERENCE_CYCLES,
+};
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
int i;
- for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) {
+ BUILD_BUG_ON(ARRAY_SIZE(intel_arch_events) != NR_INTEL_ARCH_EVENTS);
+
+ /*
+ * Disallow events reported as unavailable in guest CPUID. Note, this
+ * doesn't apply to pseudo-architectural events.
+ */
+ for (i = 0; i < NR_REAL_INTEL_ARCH_EVENTS; i++) {
if (intel_arch_events[i].eventsel != event_select ||
intel_arch_events[i].unit_mask != unit_mask)
continue;
- /* disable event that reported as not present by cpuid */
- if ((i < 7) && !(pmu->available_event_types & (1 << i)))
- return false;
-
- break;
+ return pmu->available_event_types & BIT(i);
}
return true;
static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu)
{
- size_t size = ARRAY_SIZE(fixed_pmc_events);
- struct kvm_pmc *pmc;
- u32 event;
int i;
+ BUILD_BUG_ON(ARRAY_SIZE(fixed_pmc_events) != KVM_PMC_MAX_FIXED);
+
for (i = 0; i < pmu->nr_arch_fixed_counters; i++) {
- pmc = &pmu->fixed_counters[i];
- event = fixed_pmc_events[array_index_nospec(i, size)];
+ int index = array_index_nospec(i, KVM_PMC_MAX_FIXED);
+ struct kvm_pmc *pmc = &pmu->fixed_counters[index];
+ u32 event = fixed_pmc_events[index];
+
pmc->eventsel = (intel_arch_events[event].unit_mask << 8) |
- intel_arch_events[event].eventsel;
+ intel_arch_events[event].eventsel;
}
}
if (pmu->version == 1) {
pmu->nr_arch_fixed_counters = 0;
} else {
- pmu->nr_arch_fixed_counters =
- min3(ARRAY_SIZE(fixed_pmc_events),
- (size_t) edx.split.num_counters_fixed,
- (size_t)kvm_pmu_cap.num_counters_fixed);
+ pmu->nr_arch_fixed_counters = min_t(int, edx.split.num_counters_fixed,
+ kvm_pmu_cap.num_counters_fixed);
edx.split.bit_width_fixed = min_t(int, edx.split.bit_width_fixed,
kvm_pmu_cap.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
-#include <asm/kexec.h>
+#include <asm/reboot.h>
#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/mshyperv.h>
#include <asm/mwait.h>
#include <asm/spec-ctrl.h>
-#include <asm/virtext.h>
#include <asm/vmx.h>
#include "capabilities.h"
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
-/* Control for disabling CPU Fill buffer clear */
-static bool __read_mostly vmx_fb_clear_ctrl_available;
-
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
return 0;
}
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- u64 msr;
-
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- if (msr & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
- l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
- return 0;
- }
+ if (host_arch_capabilities & ARCH_CAP_SKIP_VMENTRY_L1DFLUSH) {
+ l1tf_vmx_mitigation = VMENTER_L1D_FLUSH_NOT_REQUIRED;
+ return 0;
}
/* If set to auto use the default l1tf mitigation method */
static int vmentry_l1d_flush_get(char *s, const struct kernel_param *kp)
{
if (WARN_ON_ONCE(l1tf_vmx_mitigation >= ARRAY_SIZE(vmentry_l1d_param)))
- return sprintf(s, "???\n");
+ return sysfs_emit(s, "???\n");
- return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
-}
-
-static void vmx_setup_fb_clear_ctrl(void)
-{
- u64 msr;
-
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
- !boot_cpu_has_bug(X86_BUG_MDS) &&
- !boot_cpu_has_bug(X86_BUG_TAA)) {
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
- if (msr & ARCH_CAP_FB_CLEAR_CTRL)
- vmx_fb_clear_ctrl_available = true;
- }
+ return sysfs_emit(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+ vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA);
/*
* If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
return ret;
}
-#ifdef CONFIG_KEXEC_CORE
-static void crash_vmclear_local_loaded_vmcss(void)
+/*
+ * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
+ *
+ * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
+ * atomically track post-VMXON state, e.g. this may be called in NMI context.
+ * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
+ * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
+ * magically in RM, VM86, compat mode, or at CPL>0.
+ */
+static int kvm_cpu_vmxoff(void)
+{
+ asm_volatile_goto("1: vmxoff\n\t"
+ _ASM_EXTABLE(1b, %l[fault])
+ ::: "cc", "memory" : fault);
+
+ cr4_clear_bits(X86_CR4_VMXE);
+ return 0;
+
+fault:
+ cr4_clear_bits(X86_CR4_VMXE);
+ return -EIO;
+}
+
+static void vmx_emergency_disable(void)
{
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v;
+ kvm_rebooting = true;
+
+ /*
+ * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
+ * set in task context. If this races with VMX is disabled by an NMI,
+ * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
+ * kvm_rebooting set.
+ */
+ if (!(__read_cr4() & X86_CR4_VMXE))
+ return;
+
list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
loaded_vmcss_on_cpu_link)
vmcs_clear(v->vmcs);
+
+ kvm_cpu_vmxoff();
}
-#endif /* CONFIG_KEXEC_CORE */
static void __loaded_vmcs_clear(void *arg)
{
return kvm_caps.default_tsc_scaling_ratio;
}
-static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
+static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu)
{
- vmcs_write64(TSC_OFFSET, offset);
+ vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
}
-static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+static void vmx_write_tsc_multiplier(struct kvm_vcpu *vcpu)
{
- vmcs_write64(TSC_MULTIPLIER, multiplier);
-}
-
-/*
- * nested_vmx_allowed() checks whether a guest should be allowed to use VMX
- * instructions and MSRs (i.e., nested VMX). Nested VMX is disabled for
- * all guests if the "nested" module option is off, and can also be disabled
- * for a single guest by disabling its VMX cpuid bit.
- */
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
-{
- return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
+ vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
}
/*
[msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0];
break;
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
- if (!nested_vmx_allowed(vcpu))
+ if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return 1;
if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data))
case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
if (!msr_info->host_initiated)
return 1; /* they are read-only */
- if (!nested_vmx_allowed(vcpu))
+ if (!guest_can_use(vcpu, X86_FEATURE_VMX))
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_RTIT_CTL:
return 0;
}
-static bool kvm_is_vmx_supported(void)
+static bool __kvm_is_vmx_supported(void)
{
- int cpu = raw_smp_processor_id();
+ int cpu = smp_processor_id();
- if (!cpu_has_vmx()) {
+ if (!(cpuid_ecx(1) & feature_bit(VMX))) {
pr_err("VMX not supported by CPU %d\n", cpu);
return false;
}
return true;
}
+static bool kvm_is_vmx_supported(void)
+{
+ bool supported;
+
+ migrate_disable();
+ supported = __kvm_is_vmx_supported();
+ migrate_enable();
+
+ return supported;
+}
+
static int vmx_check_processor_compat(void)
{
int cpu = raw_smp_processor_id();
struct vmcs_config vmcs_conf;
struct vmx_capability vmx_cap;
- if (!kvm_is_vmx_supported())
+ if (!__kvm_is_vmx_supported())
return -EIO;
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0) {
{
vmclear_local_loaded_vmcss();
- if (cpu_vmxoff())
+ if (kvm_cpu_vmxoff())
kvm_spurious_fault();
hv_reset_evmcs();
vmx->rmode.vm86_active = 1;
- /*
- * Very old userspace does not call KVM_SET_TSS_ADDR before entering
- * vcpu. Warn the user that an update is overdue.
- */
- if (!kvm_vmx->tss_addr)
- pr_warn_once("KVM_SET_TSS_ADDR needs to be called before running vCPU\n");
-
vmx_segment_cache_clear(vmx);
vmcs_writel(GUEST_TR_BASE, kvm_vmx->tss_addr);
vmx->emulation_required = vmx_emulation_required(vcpu);
}
-static int vmx_get_max_tdp_level(void)
+static int vmx_get_max_ept_level(void)
{
if (cpu_has_vmx_ept_5levels())
return 5;
* based on a single guest CPUID bit, with a dedicated feature bit. This also
* verifies that the control is actually supported by KVM and hardware.
*/
-#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
-({ \
- bool __enabled; \
- \
- if (cpu_has_vmx_##name()) { \
- __enabled = guest_cpuid_has(&(vmx)->vcpu, \
- X86_FEATURE_##feat_name); \
- vmx_adjust_secondary_exec_control(vmx, exec_control, \
- SECONDARY_EXEC_##ctrl_name, __enabled, exiting); \
- } \
+#define vmx_adjust_sec_exec_control(vmx, exec_control, name, feat_name, ctrl_name, exiting) \
+({ \
+ struct kvm_vcpu *__vcpu = &(vmx)->vcpu; \
+ bool __enabled; \
+ \
+ if (cpu_has_vmx_##name()) { \
+ if (kvm_is_governed_feature(X86_FEATURE_##feat_name)) \
+ __enabled = guest_can_use(__vcpu, X86_FEATURE_##feat_name); \
+ else \
+ __enabled = guest_cpuid_has(__vcpu, X86_FEATURE_##feat_name); \
+ vmx_adjust_secondary_exec_control(vmx, exec_control, SECONDARY_EXEC_##ctrl_name,\
+ __enabled, exiting); \
+ } \
})
/* More macro magic for ENABLE_/opt-in versus _EXITING/opt-out controls. */
if (!enable_pml || !atomic_read(&vcpu->kvm->nr_memslots_dirty_logging))
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
- if (cpu_has_vmx_xsaves()) {
- /* Exposing XSAVES only when XSAVE is exposed */
- bool xsaves_enabled =
- boot_cpu_has(X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
- guest_cpuid_has(vcpu, X86_FEATURE_XSAVES);
-
- vcpu->arch.xsaves_enabled = xsaves_enabled;
-
- vmx_adjust_secondary_exec_control(vmx, &exec_control,
- SECONDARY_EXEC_XSAVES,
- xsaves_enabled, false);
- }
+ vmx_adjust_sec_exec_feature(vmx, &exec_control, xsaves, XSAVES);
/*
* RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
SECONDARY_EXEC_ENABLE_RDTSCP,
rdpid_or_rdtscp_enabled, false);
}
+
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
vmcs_write64(APIC_ACCESS_ADDR, pfn_to_hpa(pfn));
read_unlock(&vcpu->kvm->mmu_lock);
- vmx_flush_tlb_current(vcpu);
-
+ /*
+ * No need for a manual TLB flush at this point, KVM has already done a
+ * flush if there were SPTEs pointing at the previous page.
+ */
out:
/*
* Do not pin apic access page in memory, the MMU notifier
flags);
vcpu->arch.cr2 = native_read_cr2();
+ vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
+
+ vmx->idt_vectoring_info = 0;
vmx_enable_fb_clear(vmx);
- if (unlikely(vmx->fail))
+ if (unlikely(vmx->fail)) {
vmx->exit_reason.full = 0xdead;
- else
- vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+ goto out;
+ }
+
+ vmx->exit_reason.full = vmcs_read32(VM_EXIT_REASON);
+ if (likely(!vmx->exit_reason.failed_vmentry))
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu))) {
kvm_after_interrupt(vcpu);
}
+out:
guest_state_exit_irqoff();
}
loadsegment(es, __USER_DS);
#endif
- vcpu->arch.regs_avail &= ~VMX_REGS_LAZY_LOAD_SET;
-
pt_guest_exit(vmx);
kvm_load_host_xsave_state(vcpu);
vmx->nested.nested_run_pending = 0;
}
- vmx->idt_vectoring_info = 0;
-
if (unlikely(vmx->fail))
return EXIT_FASTPATH_NONE;
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
- if (likely(!vmx->exit_reason.failed_vmentry))
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
-
trace_kvm_exit(vcpu, KVM_ISA_VMX);
if (unlikely(vmx->exit_reason.failed_vmentry))
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- /* xsaves_enabled is recomputed in vmx_compute_secondary_exec_control(). */
- vcpu->arch.xsaves_enabled = false;
+ /*
+ * XSAVES is effectively enabled if and only if XSAVE is also exposed
+ * to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
+ * set if and only if XSAVE is supported.
+ */
+ if (boot_cpu_has(X86_FEATURE_XSAVE) &&
+ guest_cpuid_has(vcpu, X86_FEATURE_XSAVE))
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES);
+
+ kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX);
vmx_setup_uret_msrs(vmx);
vmcs_set_secondary_exec_control(vmx,
vmx_secondary_exec_control(vmx));
- if (nested_vmx_allowed(vcpu))
+ if (guest_can_use(vcpu, X86_FEATURE_VMX))
vmx->msr_ia32_feature_control_valid_bits |=
FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
~(FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX);
- if (nested_vmx_allowed(vcpu))
+ if (guest_can_use(vcpu, X86_FEATURE_VMX))
nested_vmx_cr_fixed1_bits_update(vcpu);
if (boot_cpu_has(X86_FEATURE_INTEL_PT) &&
*/
vmx_setup_me_spte_mask();
- kvm_configure_mmu(enable_ept, 0, vmx_get_max_tdp_level(),
+ kvm_configure_mmu(enable_ept, 0, vmx_get_max_ept_level(),
ept_caps_to_lpage_level(vmx_capability.ept));
/*
{
allow_smaller_maxphyaddr = false;
-#ifdef CONFIG_KEXEC_CORE
- RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
- synchronize_rcu();
-#endif
+ cpu_emergency_unregister_virt_callback(vmx_emergency_disable);
+
vmx_cleanup_l1d_flush();
}
if (r)
goto err_l1d_flush;
- vmx_setup_fb_clear_ctrl();
-
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
pi_init_cpu(cpu);
}
-#ifdef CONFIG_KEXEC_CORE
- rcu_assign_pointer(crash_vmclear_loaded_vmcss,
- crash_vmclear_local_loaded_vmcss);
-#endif
+ cpu_emergency_register_virt_callback(vmx_emergency_disable);
+
vmx_check_vmcs12_offsets();
/*
u64 *pid_table;
};
-bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
struct loaded_vmcs *buddy);
int allocate_vpid(void);
SECONDARY_EXEC_APIC_REGISTER_VIRT | \
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | \
SECONDARY_EXEC_SHADOW_VMCS | \
- SECONDARY_EXEC_XSAVES | \
+ SECONDARY_EXEC_ENABLE_XSAVES | \
SECONDARY_EXEC_RDSEED_EXITING | \
SECONDARY_EXEC_RDRAND_EXITING | \
SECONDARY_EXEC_ENABLE_PML | \
#include "tss.h"
#include "kvm_cache_regs.h"
#include "kvm_emulate.h"
+#include "mmu/page_track.h"
#include "x86.h"
#include "cpuid.h"
#include "pmu.h"
u64 __read_mostly host_xss;
EXPORT_SYMBOL_GPL(host_xss);
+u64 __read_mostly host_arch_capabilities;
+EXPORT_SYMBOL_GPL(host_arch_capabilities);
+
const struct _kvm_stats_desc kvm_vm_stats_desc[] = {
KVM_GENERIC_VM_STATS(),
STATS_DESC_COUNTER(VM, mmu_shadow_zapped),
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, vcpu->arch.xcr0);
- if (vcpu->arch.xsaves_enabled &&
+ if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
if (vcpu->arch.xcr0 != host_xcr0)
xsetbv(XCR_XFEATURE_ENABLED_MASK, host_xcr0);
- if (vcpu->arch.xsaves_enabled &&
+ if (guest_can_use(vcpu, X86_FEATURE_XSAVES) &&
vcpu->arch.ia32_xss != host_xss)
wrmsrl(MSR_IA32_XSS, host_xss);
}
static u64 kvm_get_arch_capabilities(void)
{
- u64 data = 0;
-
- if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) {
- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
- data &= KVM_SUPPORTED_ARCH_CAP;
- }
+ u64 data = host_arch_capabilities & KVM_SUPPORTED_ARCH_CAP;
/*
* If nx_huge_pages is enabled, KVM's shadow paging will ensure that
else
vcpu->arch.tsc_offset = l1_offset;
- static_call(kvm_x86_write_tsc_offset)(vcpu, vcpu->arch.tsc_offset);
+ static_call(kvm_x86_write_tsc_offset)(vcpu);
}
static void kvm_vcpu_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 l1_multiplier)
vcpu->arch.tsc_scaling_ratio = l1_multiplier;
if (kvm_caps.has_tsc_control)
- static_call(kvm_x86_write_tsc_multiplier)(
- vcpu, vcpu->arch.tsc_scaling_ratio);
+ static_call(kvm_x86_write_tsc_multiplier)(vcpu);
}
static inline bool kvm_check_tsc_unstable(void)
return 0;
default:
return -ENXIO;
- break;
}
}
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
struct kvm_msr_filter_range *user_range)
{
- unsigned long *bitmap = NULL;
+ unsigned long *bitmap;
size_t bitmap_size;
if (!user_range->nmsrs)
return kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx, exact_only);
}
-static bool emulator_guest_has_long_mode(struct x86_emulate_ctxt *ctxt)
-{
- return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_LM);
-}
-
static bool emulator_guest_has_movbe(struct x86_emulate_ctxt *ctxt)
{
return guest_cpuid_has(emul_to_vcpu(ctxt), X86_FEATURE_MOVBE);
.fix_hypercall = emulator_fix_hypercall,
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
- .guest_has_long_mode = emulator_guest_has_long_mode,
.guest_has_movbe = emulator_guest_has_movbe,
.guest_has_fxsr = emulator_guest_has_fxsr,
.guest_has_rdpid = emulator_guest_has_rdpid,
static void tsc_khz_changed(void *data)
{
struct cpufreq_freqs *freq = data;
- unsigned long khz = 0;
+ unsigned long khz;
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
kvm_init_pmu_capability(ops->pmu_ops);
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, host_arch_capabilities);
+
r = ops->hardware_setup();
if (r != 0)
goto out_mmu_exit;
r = -EINTR;
goto out;
}
+
/*
- * It should be impossible for the hypervisor timer to be in
- * use before KVM has ever run the vCPU.
+ * Don't bother switching APIC timer emulation from the
+ * hypervisor timer to the software timer, the only way for the
+ * APIC timer to be active is if userspace stuffed vCPU state,
+ * i.e. put the vCPU into a nonsensical state. Only an INIT
+ * will transition the vCPU out of UNINITIALIZED (without more
+ * state stuffing from userspace), which will reset the local
+ * APIC and thus cancel the timer or drop the IRQ (if the timer
+ * already expired).
*/
- WARN_ON_ONCE(kvm_lapic_hv_timer_in_use(vcpu));
-
kvm_vcpu_srcu_read_unlock(vcpu);
kvm_vcpu_block(vcpu);
kvm_vcpu_srcu_read_lock(vcpu);
__set_regs(vcpu, &vcpu->run->s.regs.regs);
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_REGS;
}
+
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_SREGS) {
- if (__set_sregs(vcpu, &vcpu->run->s.regs.sregs))
+ struct kvm_sregs sregs = vcpu->run->s.regs.sregs;
+
+ if (__set_sregs(vcpu, &sregs))
return -EINVAL;
+
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_SREGS;
}
+
if (vcpu->run->kvm_dirty_regs & KVM_SYNC_X86_EVENTS) {
- if (kvm_vcpu_ioctl_x86_set_vcpu_events(
- vcpu, &vcpu->run->s.regs.events))
+ struct kvm_vcpu_events events = vcpu->run->s.regs.events;
+
+ if (kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events))
return -EINVAL;
+
vcpu->run->kvm_dirty_regs &= ~KVM_SYNC_X86_EVENTS;
}
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ /*
+ * KVM doesn't support moving memslots when there are external page
+ * trackers attached to the VM, i.e. if KVMGT is in use.
+ */
+ if (change == KVM_MR_MOVE && kvm_page_track_has_external_user(kvm))
+ return -EINVAL;
+
if (change == KVM_MR_CREATE || change == KVM_MR_MOVE) {
if ((new->base_gfn + new->npages - 1) > kvm_mmu_max_gfn())
return -EINVAL;
* See is_writable_pte() for more details (the case involving
* access-tracked SPTEs is particularly relevant).
*/
- kvm_arch_flush_remote_tlbs_memslot(kvm, new);
+ kvm_flush_remote_tlbs_memslot(kvm, new);
}
}
const struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ if (change == KVM_MR_DELETE)
+ kvm_page_track_delete_slot(kvm, old);
+
if (!kvm->arch.n_requested_mmu_pages &&
(change == KVM_MR_CREATE || change == KVM_MR_DELETE)) {
unsigned long nr_mmu_pages;
kvm_arch_free_memslot(kvm, old);
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
- kvm_mmu_zap_all(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
- kvm_page_track_flush_slot(kvm, slot);
-}
-
static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
return (is_guest_mode(vcpu) &&
extern u64 host_xcr0;
extern u64 host_xss;
+extern u64 host_arch_capabilities;
extern struct kvm_caps kvm_caps;
config ARCH_HAS_ILOG2_U64
def_bool n
+config ARCH_MTD_XIP
+ def_bool y
+
config NO_IOPORT_MAP
def_bool n
#define XTENSA_STACK_ALIGNMENT 16
#endif
+#ifndef XCHAL_HW_MIN_VERSION
+#if defined(XCHAL_HW_MIN_VERSION_MAJOR) && defined(XCHAL_HW_MIN_VERSION_MINOR)
+#define XCHAL_HW_MIN_VERSION (XCHAL_HW_MIN_VERSION_MAJOR * 100 + \
+ XCHAL_HW_MIN_VERSION_MINOR)
+#else
+#define XCHAL_HW_MIN_VERSION 0
+#endif
+#endif
+
#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_MTD_XIP_H
+#define _ASM_MTD_XIP_H
+
+#include <asm/processor.h>
+
+#define xip_irqpending() (xtensa_get_sr(interrupt) & xtensa_get_sr(intenable))
+#define xip_currtime() (xtensa_get_sr(ccount))
+#define xip_elapsed_since(x) ((xtensa_get_sr(ccount) - (x)) / 1000) /* should work up to 1GHz */
+#define xip_cpu_idle() do { asm volatile ("waiti 0"); } while (0)
+
+#endif /* _ASM_MTD_XIP_H */
+
extern char _SecondaryResetVector_text_end[];
#endif
#ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+extern char _xip_text_start[];
+extern char _xip_text_end[];
+#endif
extern char _xip_start[];
extern char _xip_end[];
#endif
#include <linux/perf_event.h>
#include <linux/platform_device.h>
+#include <asm/core.h>
#include <asm/processor.h>
#include <asm/stacktrace.h>
+#define XTENSA_HWVERSION_RG_2015_0 260000
+
+#if XCHAL_HW_MIN_VERSION >= XTENSA_HWVERSION_RG_2015_0
+#define XTENSA_PMU_ERI_BASE 0x00101000
+#else
+#define XTENSA_PMU_ERI_BASE 0x00001000
+#endif
+
/* Global control/status for all perf counters */
-#define XTENSA_PMU_PMG 0x1000
+#define XTENSA_PMU_PMG XTENSA_PMU_ERI_BASE
/* Perf counter values */
-#define XTENSA_PMU_PM(i) (0x1080 + (i) * 4)
+#define XTENSA_PMU_PM(i) (XTENSA_PMU_ERI_BASE + 0x80 + (i) * 4)
/* Perf counter control registers */
-#define XTENSA_PMU_PMCTRL(i) (0x1100 + (i) * 4)
+#define XTENSA_PMU_PMCTRL(i) (XTENSA_PMU_ERI_BASE + 0x100 + (i) * 4)
/* Perf counter status registers */
-#define XTENSA_PMU_PMSTAT(i) (0x1180 + (i) * 4)
+#define XTENSA_PMU_PMSTAT(i) (XTENSA_PMU_ERI_BASE + 0x180 + (i) * 4)
#define XTENSA_PMU_PMG_PMEN 0x1
mem_reserve(__pa(_stext), __pa(_end));
#ifdef CONFIG_XIP_KERNEL
+#ifdef CONFIG_VECTORS_ADDR
+ mem_reserve(__pa(_xip_text_start), __pa(_xip_text_end));
+#endif
mem_reserve(__pa(_xip_start), __pa(_xip_end));
#endif
SECTION_VECTOR2 (.DoubleExceptionVector.text, DOUBLEEXC_VECTOR_VADDR)
*(.exception.text)
+ *(.xiptext)
#endif
IRQENTRY_TEXT
.DebugInterruptVector.text);
RELOCATE_ENTRY(_exception_text,
.exception.text);
+#ifdef CONFIG_XIP_KERNEL
+ RELOCATE_ENTRY(_xip_text, .xiptext);
+#endif
#endif
#ifdef CONFIG_XIP_KERNEL
RELOCATE_ENTRY(_xip_data, .data);
LAST)
#undef LAST
#define LAST .exception.text
-
+ SECTION_VECTOR4 (_xip_text,
+ .xiptext,
+ ,
+ LAST)
+#undef LAST
+#define LAST .xiptext
#endif
. = (LOADADDR(LAST) + SIZEOF(LAST) + 3) & ~ 3;
struct vpu_jsm_msg resp;
int ret;
- if (!strncpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN - 1))
- return -ENOMEM;
+ strscpy(req.payload.dyndbg_control.dyndbg_cmd, command, VPU_DYNDBG_CMD_MAX_LEN);
ret = ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_DYNDBG_CONTROL_RSP, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
{ PCI_VDEVICE(INTEL, 0x34d3), board_ahci_low_power }, /* Ice Lake LP AHCI */
{ PCI_VDEVICE(INTEL, 0x02d3), board_ahci_low_power }, /* Comet Lake PCH-U AHCI */
{ PCI_VDEVICE(INTEL, 0x02d7), board_ahci_low_power }, /* Comet Lake PCH RAID */
+ /* Elkhart Lake IDs 0x4b60 & 0x4b62 https://sata-io.org/product/8803 not tested yet */
+ { PCI_VDEVICE(INTEL, 0x4b63), board_ahci_low_power }, /* Elkhart Lake AHCI */
/* JMicron 360/1/3/5/6, match class to avoid IDE function */
{ PCI_VENDOR_ID_JMICRON, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID,
static int ahci_avn_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
struct ata_port *ap = link->ap;
struct ahci_port_priv *pp = ap->private_data;
struct ahci_host_priv *hpriv = ap->host->private_data;
#include <linux/kernel.h>
#include <linux/libata.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/reset.h>
#include "ahci.h"
#include <linux/log2.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/regmap.h>
#include <linux/libata.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/regmap.h>
#include <linux/kernel.h>
#include <linux/mbus.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include "ahci.h"
{
struct device *dev = &pdev->dev;
struct device_node *node = dev->of_node;
- struct resource *res;
void __iomem *base;
u64 cfg;
int ret;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- base = devm_ioremap_resource(&pdev->dev, res);
+ base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
return PTR_ERR(base);
#include <linux/pm.h>
#include <linux/ahci_platform.h>
#include <linux/device.h>
-#include <linux/of_address.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/libata.h>
#include "ahci.h"
static int ahci_qoriq_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
void __iomem *port_mmio = ahci_port_base(link->ap);
u32 px_cmd, px_is, px_val;
struct ata_port *ap = link->ap;
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/device.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/libata.h>
#include <linux/ahci_platform.h>
if (!plat_data)
return &ahci_port_info;
- plat_data->sgpio_ctrl = devm_ioremap_resource(dev,
- platform_get_resource(pdev, IORESOURCE_MEM, 1));
+ plat_data->sgpio_ctrl = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(plat_data->sgpio_ctrl))
return &ahci_port_info;
#include <linux/clk.h>
#include <linux/errno.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
#include "ahci.h"
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
tegra->pdev = pdev;
tegra->soc = of_device_get_match_data(&pdev->dev);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- tegra->sata_regs = devm_ioremap_resource(&pdev->dev, res);
+ tegra->sata_regs = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(tegra->sata_regs))
return PTR_ERR(tegra->sata_regs);
* @timeout : timeout for achieving the value.
*/
static int xgene_ahci_poll_reg_val(struct ata_port *ap,
- void __iomem *reg, unsigned
- int val, unsigned long interval,
- unsigned long timeout)
+ void __iomem *reg, unsigned int val,
+ unsigned int interval, unsigned int timeout)
{
unsigned long deadline;
unsigned int tmp;
static int xgene_ahci_do_hardreset(struct ata_link *link,
unsigned long deadline, bool *online)
{
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
struct ata_port *ap = link->ap;
struct ahci_host_priv *hpriv = ap->host->private_data;
struct xgene_ahci_context *ctx = hpriv->plat_data;
ctx->dev = dev;
/* Retrieve the IP core resource */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- ctx->csr_core = devm_ioremap_resource(dev, res);
+ ctx->csr_core = devm_platform_ioremap_resource(pdev, 1);
if (IS_ERR(ctx->csr_core))
return PTR_ERR(ctx->csr_core);
/* Retrieve the IP diagnostic resource */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
- ctx->csr_diag = devm_ioremap_resource(dev, res);
+ ctx->csr_diag = devm_platform_ioremap_resource(pdev, 2);
if (IS_ERR(ctx->csr_diag))
return PTR_ERR(ctx->csr_diag);
/* Retrieve the IP AXI resource */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 3);
- ctx->csr_axi = devm_ioremap_resource(dev, res);
+ ctx->csr_axi = devm_platform_ioremap_resource(pdev, 3);
if (IS_ERR(ctx->csr_axi))
return PTR_ERR(ctx->csr_axi);
static int ahci_exec_polled_cmd(struct ata_port *ap, int pmp,
struct ata_taskfile *tf, int is_cmd, u16 flags,
- unsigned long timeout_msec)
+ unsigned int timeout_msec)
{
const u32 cmd_fis_len = 5; /* five dwords */
struct ahci_port_priv *pp = ap->private_data;
struct ahci_host_priv *hpriv = ap->host->private_data;
struct ahci_port_priv *pp = ap->private_data;
const char *reason = NULL;
- unsigned long now, msecs;
+ unsigned long now;
+ unsigned int msecs;
struct ata_taskfile tf;
bool fbs_disabled = false;
int rc;
int ahci_do_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline, bool *online)
{
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
struct ata_port *ap = link->ap;
struct ahci_port_priv *pp = ap->private_data;
struct ahci_host_priv *hpriv = ap->host->private_data;
#include <linux/ahci_platform.h>
#include <linux/phy/phy.h>
#include <linux/pm_runtime.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
#include <linux/reset.h>
#include "ahci.h"
}
}
- if (ap->ops->error_handler)
- ata_eh_release(ap);
+ ata_eh_release(ap);
rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
- if (ap->ops->error_handler)
- ata_eh_acquire(ap);
+ ata_eh_acquire(ap);
ata_sff_flush_pio_task(ap);
if (qc->flags & ATA_QCFLAG_ACTIVE) {
qc->err_mask |= AC_ERR_TIMEOUT;
- if (ap->ops->error_handler)
- ata_port_freeze(ap);
- else
- ata_qc_complete(qc);
+ ata_port_freeze(ap);
ata_dev_warn(dev, "qc timeout after %u msecs (cmd 0x%x)\n",
timeout, command);
EXPORT_SYMBOL_GPL(ata_cable_sata);
/**
- * ata_bus_probe - Reset and probe ATA bus
- * @ap: Bus to probe
- *
- * Master ATA bus probing function. Initiates a hardware-dependent
- * bus reset, then attempts to identify any devices found on
- * the bus.
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * Zero on success, negative errno otherwise.
- */
-
-int ata_bus_probe(struct ata_port *ap)
-{
- unsigned int classes[ATA_MAX_DEVICES];
- int tries[ATA_MAX_DEVICES];
- int rc;
- struct ata_device *dev;
-
- ata_for_each_dev(dev, &ap->link, ALL)
- tries[dev->devno] = ATA_PROBE_MAX_TRIES;
-
- retry:
- ata_for_each_dev(dev, &ap->link, ALL) {
- /* If we issue an SRST then an ATA drive (not ATAPI)
- * may change configuration and be in PIO0 timing. If
- * we do a hard reset (or are coming from power on)
- * this is true for ATA or ATAPI. Until we've set a
- * suitable controller mode we should not touch the
- * bus as we may be talking too fast.
- */
- dev->pio_mode = XFER_PIO_0;
- dev->dma_mode = 0xff;
-
- /* If the controller has a pio mode setup function
- * then use it to set the chipset to rights. Don't
- * touch the DMA setup as that will be dealt with when
- * configuring devices.
- */
- if (ap->ops->set_piomode)
- ap->ops->set_piomode(ap, dev);
- }
-
- /* reset and determine device classes */
- ap->ops->phy_reset(ap);
-
- ata_for_each_dev(dev, &ap->link, ALL) {
- if (dev->class != ATA_DEV_UNKNOWN)
- classes[dev->devno] = dev->class;
- else
- classes[dev->devno] = ATA_DEV_NONE;
-
- dev->class = ATA_DEV_UNKNOWN;
- }
-
- /* read IDENTIFY page and configure devices. We have to do the identify
- specific sequence bass-ackwards so that PDIAG- is released by
- the slave device */
-
- ata_for_each_dev(dev, &ap->link, ALL_REVERSE) {
- if (tries[dev->devno])
- dev->class = classes[dev->devno];
-
- if (!ata_dev_enabled(dev))
- continue;
-
- rc = ata_dev_read_id(dev, &dev->class, ATA_READID_POSTRESET,
- dev->id);
- if (rc)
- goto fail;
- }
-
- /* Now ask for the cable type as PDIAG- should have been released */
- if (ap->ops->cable_detect)
- ap->cbl = ap->ops->cable_detect(ap);
-
- /* We may have SATA bridge glue hiding here irrespective of
- * the reported cable types and sensed types. When SATA
- * drives indicate we have a bridge, we don't know which end
- * of the link the bridge is which is a problem.
- */
- ata_for_each_dev(dev, &ap->link, ENABLED)
- if (ata_id_is_sata(dev->id))
- ap->cbl = ATA_CBL_SATA;
-
- /* After the identify sequence we can now set up the devices. We do
- this in the normal order so that the user doesn't get confused */
-
- ata_for_each_dev(dev, &ap->link, ENABLED) {
- ap->link.eh_context.i.flags |= ATA_EHI_PRINTINFO;
- rc = ata_dev_configure(dev);
- ap->link.eh_context.i.flags &= ~ATA_EHI_PRINTINFO;
- if (rc)
- goto fail;
- }
-
- /* configure transfer mode */
- rc = ata_set_mode(&ap->link, &dev);
- if (rc)
- goto fail;
-
- ata_for_each_dev(dev, &ap->link, ENABLED)
- return 0;
-
- return -ENODEV;
-
- fail:
- tries[dev->devno]--;
-
- switch (rc) {
- case -EINVAL:
- /* eeek, something went very wrong, give up */
- tries[dev->devno] = 0;
- break;
-
- case -ENODEV:
- /* give it just one more chance */
- tries[dev->devno] = min(tries[dev->devno], 1);
- fallthrough;
- case -EIO:
- if (tries[dev->devno] == 1) {
- /* This is the last chance, better to slow
- * down than lose it.
- */
- sata_down_spd_limit(&ap->link, 0);
- ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
- }
- }
-
- if (!tries[dev->devno])
- ata_dev_disable(dev);
-
- goto retry;
-}
-
-/**
* sata_print_link_status - Print SATA link status
* @link: SATA link to printk link status about
*
{
struct ata_port *ap = link->ap;
struct ata_eh_context *ehc = &link->eh_context;
- const unsigned long *timing = sata_ehc_deb_timing(ehc);
+ const unsigned int *timing = sata_ehc_deb_timing(ehc);
int rc;
/* if we're about to do hardreset, nothing more to do */
int sata_std_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
bool online;
int rc;
ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Micron_M500_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM },
- { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
- ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Micron_M5[15]0_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM },
+ { "Micron_1100_*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM, },
+ { "Crucial_CT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM |
+ ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Crucial_CT*M550*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
ATA_HORKAGE_ZERO_AFTER_TRIM },
{ "Crucial_CT*MX100*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
void ata_qc_complete(struct ata_queued_cmd *qc)
{
struct ata_port *ap = qc->ap;
+ struct ata_device *dev = qc->dev;
+ struct ata_eh_info *ehi = &dev->link->eh_info;
/* Trigger the LED (if available) */
ledtrig_disk_activity(!!(qc->tf.flags & ATA_TFLAG_WRITE));
- /* XXX: New EH and old EH use different mechanisms to
- * synchronize EH with regular execution path.
- *
- * In new EH, a qc owned by EH is marked with ATA_QCFLAG_EH.
- * Normal execution path is responsible for not accessing a
- * qc owned by EH. libata core enforces the rule by returning NULL
- * from ata_qc_from_tag() for qcs owned by EH.
+ /*
+ * In order to synchronize EH with the regular execution path, a qc that
+ * is owned by EH is marked with ATA_QCFLAG_EH.
*
- * Old EH depends on ata_qc_complete() nullifying completion
- * requests if ATA_QCFLAG_EH_SCHEDULED is set. Old EH does
- * not synchronize with interrupt handler. Only PIO task is
- * taken care of.
+ * The normal execution path is responsible for not accessing a qc owned
+ * by EH. libata core enforces the rule by returning NULL from
+ * ata_qc_from_tag() for qcs owned by EH.
*/
- if (ap->ops->error_handler) {
- struct ata_device *dev = qc->dev;
- struct ata_eh_info *ehi = &dev->link->eh_info;
-
- if (unlikely(qc->err_mask))
- qc->flags |= ATA_QCFLAG_EH;
+ if (unlikely(qc->err_mask))
+ qc->flags |= ATA_QCFLAG_EH;
- /*
- * Finish internal commands without any further processing
- * and always with the result TF filled.
- */
- if (unlikely(ata_tag_internal(qc->tag))) {
- fill_result_tf(qc);
- trace_ata_qc_complete_internal(qc);
- __ata_qc_complete(qc);
- return;
- }
+ /*
+ * Finish internal commands without any further processing and always
+ * with the result TF filled.
+ */
+ if (unlikely(ata_tag_internal(qc->tag))) {
+ fill_result_tf(qc);
+ trace_ata_qc_complete_internal(qc);
+ __ata_qc_complete(qc);
+ return;
+ }
- /*
- * Non-internal qc has failed. Fill the result TF and
- * summon EH.
- */
- if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
- fill_result_tf(qc);
- trace_ata_qc_complete_failed(qc);
- ata_qc_schedule_eh(qc);
- return;
- }
+ /* Non-internal qc has failed. Fill the result TF and summon EH. */
+ if (unlikely(qc->flags & ATA_QCFLAG_EH)) {
+ fill_result_tf(qc);
+ trace_ata_qc_complete_failed(qc);
+ ata_qc_schedule_eh(qc);
+ return;
+ }
- WARN_ON_ONCE(ata_port_is_frozen(ap));
+ WARN_ON_ONCE(ata_port_is_frozen(ap));
- /* read result TF if requested */
- if (qc->flags & ATA_QCFLAG_RESULT_TF)
- fill_result_tf(qc);
+ /* read result TF if requested */
+ if (qc->flags & ATA_QCFLAG_RESULT_TF)
+ fill_result_tf(qc);
- trace_ata_qc_complete_done(qc);
+ trace_ata_qc_complete_done(qc);
+ /*
+ * For CDL commands that completed without an error, check if we have
+ * sense data (ATA_SENSE is set). If we do, then the command may have
+ * been aborted by the device due to a limit timeout using the policy
+ * 0xD. For these commands, invoke EH to get the command sense data.
+ */
+ if (qc->result_tf.status & ATA_SENSE &&
+ ((ata_is_ncq(qc->tf.protocol) &&
+ dev->flags & ATA_DFLAG_CDL_ENABLED) ||
+ (!ata_is_ncq(qc->tf.protocol) &&
+ ata_id_sense_reporting_enabled(dev->id)))) {
/*
- * For CDL commands that completed without an error, check if
- * we have sense data (ATA_SENSE is set). If we do, then the
- * command may have been aborted by the device due to a limit
- * timeout using the policy 0xD. For these commands, invoke EH
- * to get the command sense data.
+ * Tell SCSI EH to not overwrite scmd->result even if this
+ * command is finished with result SAM_STAT_GOOD.
*/
- if (qc->result_tf.status & ATA_SENSE &&
- ((ata_is_ncq(qc->tf.protocol) &&
- dev->flags & ATA_DFLAG_CDL_ENABLED) ||
- (!ata_is_ncq(qc->tf.protocol) &&
- ata_id_sense_reporting_enabled(dev->id)))) {
- /*
- * Tell SCSI EH to not overwrite scmd->result even if
- * this command is finished with result SAM_STAT_GOOD.
- */
- qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
- qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
- ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
+ qc->scsicmd->flags |= SCMD_FORCE_EH_SUCCESS;
+ qc->flags |= ATA_QCFLAG_EH_SUCCESS_CMD;
+ ehi->dev_action[dev->devno] |= ATA_EH_GET_SUCCESS_SENSE;
- /*
- * set pending so that ata_qc_schedule_eh() does not
- * trigger fast drain, and freeze the port.
- */
- ap->pflags |= ATA_PFLAG_EH_PENDING;
- ata_qc_schedule_eh(qc);
- return;
- }
-
- /* Some commands need post-processing after successful
- * completion.
+ /*
+ * set pending so that ata_qc_schedule_eh() does not trigger
+ * fast drain, and freeze the port.
*/
- switch (qc->tf.command) {
- case ATA_CMD_SET_FEATURES:
- if (qc->tf.feature != SETFEATURES_WC_ON &&
- qc->tf.feature != SETFEATURES_WC_OFF &&
- qc->tf.feature != SETFEATURES_RA_ON &&
- qc->tf.feature != SETFEATURES_RA_OFF)
- break;
- fallthrough;
- case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
- case ATA_CMD_SET_MULTI: /* multi_count changed */
- /* revalidate device */
- ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
- ata_port_schedule_eh(ap);
- break;
+ ap->pflags |= ATA_PFLAG_EH_PENDING;
+ ata_qc_schedule_eh(qc);
+ return;
+ }
- case ATA_CMD_SLEEP:
- dev->flags |= ATA_DFLAG_SLEEPING;
+ /* Some commands need post-processing after successful completion. */
+ switch (qc->tf.command) {
+ case ATA_CMD_SET_FEATURES:
+ if (qc->tf.feature != SETFEATURES_WC_ON &&
+ qc->tf.feature != SETFEATURES_WC_OFF &&
+ qc->tf.feature != SETFEATURES_RA_ON &&
+ qc->tf.feature != SETFEATURES_RA_OFF)
break;
- }
-
- if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
- ata_verify_xfer(qc);
+ fallthrough;
+ case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */
+ case ATA_CMD_SET_MULTI: /* multi_count changed */
+ /* revalidate device */
+ ehi->dev_action[dev->devno] |= ATA_EH_REVALIDATE;
+ ata_port_schedule_eh(ap);
+ break;
- __ata_qc_complete(qc);
- } else {
- if (qc->flags & ATA_QCFLAG_EH_SCHEDULED)
- return;
+ case ATA_CMD_SLEEP:
+ dev->flags |= ATA_DFLAG_SLEEPING;
+ break;
+ }
- /* read result TF if failed or requested */
- if (qc->err_mask || qc->flags & ATA_QCFLAG_RESULT_TF)
- fill_result_tf(qc);
+ if (unlikely(dev->flags & ATA_DFLAG_DUBIOUS_XFER))
+ ata_verify_xfer(qc);
- __ata_qc_complete(qc);
- }
+ __ata_qc_complete(qc);
}
EXPORT_SYMBOL_GPL(ata_qc_complete);
struct ata_link *link = qc->dev->link;
u8 prot = qc->tf.protocol;
- /* Make sure only one non-NCQ command is outstanding. The
- * check is skipped for old EH because it reuses active qc to
- * request ATAPI sense.
- */
- WARN_ON_ONCE(ap->ops->error_handler && ata_tag_valid(link->active_tag));
+ /* Make sure only one non-NCQ command is outstanding. */
+ WARN_ON_ONCE(ata_tag_valid(link->active_tag));
if (ata_is_ncq(prot)) {
WARN_ON_ONCE(link->sactive & (1 << qc->hw_tag));
}
EXPORT_SYMBOL_GPL(ata_host_init);
-void __ata_port_probe(struct ata_port *ap)
+void ata_port_probe(struct ata_port *ap)
{
struct ata_eh_info *ehi = &ap->link.eh_info;
unsigned long flags;
spin_unlock_irqrestore(ap->lock, flags);
}
-
-int ata_port_probe(struct ata_port *ap)
-{
- int rc = 0;
-
- if (ap->ops->error_handler) {
- __ata_port_probe(ap);
- ata_port_wait_eh(ap);
- } else {
- rc = ata_bus_probe(ap);
- }
- return rc;
-}
-
+EXPORT_SYMBOL_GPL(ata_port_probe);
static void async_port_probe(void *data, async_cookie_t cookie)
{
if (!(ap->host->flags & ATA_HOST_PARALLEL_SCAN) && ap->port_no != 0)
async_synchronize_cookie(cookie);
- (void)ata_port_probe(ap);
+ ata_port_probe(ap);
+ ata_port_wait_eh(ap);
/* in order to keep device order, we need to synchronize at this point */
async_synchronize_cookie(cookie);
struct ata_link *link;
struct ata_device *dev;
- if (!ap->ops->error_handler)
- goto skip_eh;
-
/* tell EH we're leaving & flush EH */
spin_lock_irqsave(ap->lock, flags);
ap->pflags |= ATA_PFLAG_UNLOADING;
cancel_delayed_work_sync(&ap->hotplug_task);
cancel_delayed_work_sync(&ap->scsi_rescan_task);
- skip_eh:
/* clean up zpodd on port removal */
ata_for_each_link(link, ap, HOST_FIRST) {
ata_for_each_dev(dev, link, ALL) {
* The final register value.
*/
u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask, u32 val,
- unsigned long interval, unsigned long timeout)
+ unsigned int interval, unsigned int timeout)
{
unsigned long deadline;
u32 tmp;
* are mostly for error handling, hotplug and those outlier devices that
* take an exceptionally long time to recover from reset.
*/
-static const unsigned long ata_eh_reset_timeouts[] = {
+static const unsigned int ata_eh_reset_timeouts[] = {
10000, /* most drives spin up by 10sec */
10000, /* > 99% working drives spin up before 20sec */
35000, /* give > 30 secs of idleness for outlier devices */
5000, /* and sweet one last chance */
- ULONG_MAX, /* > 1 min has elapsed, give up */
+ UINT_MAX, /* > 1 min has elapsed, give up */
};
static const unsigned int ata_eh_identify_timeouts[] = {
/* make sure sff pio task is not running */
ata_sff_flush_pio_task(ap);
- if (!ap->ops->error_handler)
- return;
-
/* synchronize with host lock and sort out timeouts */
/*
- * For new EH, all qcs are finished in one of three ways -
+ * For EH, all qcs are finished in one of three ways -
* normal completion, error completion, and SCSI timeout.
* Both completions can race against SCSI timeout. When normal
* completion wins, the qc never reaches EH. When error
void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap)
{
unsigned long flags;
+ struct ata_link *link;
- /* invoke error handler */
- if (ap->ops->error_handler) {
- struct ata_link *link;
-
- /* acquire EH ownership */
- ata_eh_acquire(ap);
+ /* acquire EH ownership */
+ ata_eh_acquire(ap);
repeat:
- /* kill fast drain timer */
- del_timer_sync(&ap->fastdrain_timer);
+ /* kill fast drain timer */
+ del_timer_sync(&ap->fastdrain_timer);
- /* process port resume request */
- ata_eh_handle_port_resume(ap);
+ /* process port resume request */
+ ata_eh_handle_port_resume(ap);
- /* fetch & clear EH info */
- spin_lock_irqsave(ap->lock, flags);
+ /* fetch & clear EH info */
+ spin_lock_irqsave(ap->lock, flags);
- ata_for_each_link(link, ap, HOST_FIRST) {
- struct ata_eh_context *ehc = &link->eh_context;
- struct ata_device *dev;
+ ata_for_each_link(link, ap, HOST_FIRST) {
+ struct ata_eh_context *ehc = &link->eh_context;
+ struct ata_device *dev;
- memset(&link->eh_context, 0, sizeof(link->eh_context));
- link->eh_context.i = link->eh_info;
- memset(&link->eh_info, 0, sizeof(link->eh_info));
+ memset(&link->eh_context, 0, sizeof(link->eh_context));
+ link->eh_context.i = link->eh_info;
+ memset(&link->eh_info, 0, sizeof(link->eh_info));
- ata_for_each_dev(dev, link, ENABLED) {
- int devno = dev->devno;
+ ata_for_each_dev(dev, link, ENABLED) {
+ int devno = dev->devno;
- ehc->saved_xfer_mode[devno] = dev->xfer_mode;
- if (ata_ncq_enabled(dev))
- ehc->saved_ncq_enabled |= 1 << devno;
- }
+ ehc->saved_xfer_mode[devno] = dev->xfer_mode;
+ if (ata_ncq_enabled(dev))
+ ehc->saved_ncq_enabled |= 1 << devno;
}
+ }
- ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
- ap->pflags &= ~ATA_PFLAG_EH_PENDING;
- ap->excl_link = NULL; /* don't maintain exclusion over EH */
+ ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
+ ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+ ap->excl_link = NULL; /* don't maintain exclusion over EH */
- spin_unlock_irqrestore(ap->lock, flags);
+ spin_unlock_irqrestore(ap->lock, flags);
- /* invoke EH, skip if unloading or suspended */
- if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
- ap->ops->error_handler(ap);
- else {
- /* if unloading, commence suicide */
- if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
- !(ap->pflags & ATA_PFLAG_UNLOADED))
- ata_eh_unload(ap);
- ata_eh_finish(ap);
- }
+ /* invoke EH, skip if unloading or suspended */
+ if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
+ ap->ops->error_handler(ap);
+ else {
+ /* if unloading, commence suicide */
+ if ((ap->pflags & ATA_PFLAG_UNLOADING) &&
+ !(ap->pflags & ATA_PFLAG_UNLOADED))
+ ata_eh_unload(ap);
+ ata_eh_finish(ap);
+ }
- /* process port suspend request */
- ata_eh_handle_port_suspend(ap);
+ /* process port suspend request */
+ ata_eh_handle_port_suspend(ap);
- /* Exception might have happened after ->error_handler
- * recovered the port but before this point. Repeat
- * EH in such case.
- */
- spin_lock_irqsave(ap->lock, flags);
+ /*
+ * Exception might have happened after ->error_handler recovered the
+ * port but before this point. Repeat EH in such case.
+ */
+ spin_lock_irqsave(ap->lock, flags);
- if (ap->pflags & ATA_PFLAG_EH_PENDING) {
- if (--ap->eh_tries) {
- spin_unlock_irqrestore(ap->lock, flags);
- goto repeat;
- }
- ata_port_err(ap,
- "EH pending after %d tries, giving up\n",
- ATA_EH_MAX_TRIES);
- ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+ if (ap->pflags & ATA_PFLAG_EH_PENDING) {
+ if (--ap->eh_tries) {
+ spin_unlock_irqrestore(ap->lock, flags);
+ goto repeat;
}
+ ata_port_err(ap,
+ "EH pending after %d tries, giving up\n",
+ ATA_EH_MAX_TRIES);
+ ap->pflags &= ~ATA_PFLAG_EH_PENDING;
+ }
- /* this run is complete, make sure EH info is clear */
- ata_for_each_link(link, ap, HOST_FIRST)
- memset(&link->eh_info, 0, sizeof(link->eh_info));
+ /* this run is complete, make sure EH info is clear */
+ ata_for_each_link(link, ap, HOST_FIRST)
+ memset(&link->eh_info, 0, sizeof(link->eh_info));
- /* end eh (clear host_eh_scheduled) while holding
- * ap->lock such that if exception occurs after this
- * point but before EH completion, SCSI midlayer will
- * re-initiate EH.
- */
- ap->ops->end_eh(ap);
+ /*
+ * end eh (clear host_eh_scheduled) while holding ap->lock such that if
+ * exception occurs after this point but before EH completion, SCSI
+ * midlayer will re-initiate EH.
+ */
+ ap->ops->end_eh(ap);
- spin_unlock_irqrestore(ap->lock, flags);
- ata_eh_release(ap);
- } else {
- WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL);
- ap->ops->eng_timeout(ap);
- }
+ spin_unlock_irqrestore(ap->lock, flags);
+ ata_eh_release(ap);
scsi_eh_flush_done_q(&ap->eh_done_q);
{
struct ata_port *ap = qc->ap;
- WARN_ON(!ap->ops->error_handler);
-
qc->flags |= ATA_QCFLAG_EH;
ata_eh_set_pending(ap, 1);
*/
void ata_std_sched_eh(struct ata_port *ap)
{
- WARN_ON(!ap->ops->error_handler);
-
if (ap->pflags & ATA_PFLAG_INITIALIZING)
return;
struct ata_queued_cmd *qc;
int tag, nr_aborted = 0;
- WARN_ON(!ap->ops->error_handler);
-
/* we're gonna abort all commands, no need for fast drain */
ata_eh_set_pending(ap, 0);
*/
static void __ata_port_freeze(struct ata_port *ap)
{
- WARN_ON(!ap->ops->error_handler);
-
if (ap->ops->freeze)
ap->ops->freeze(ap);
*/
int ata_port_freeze(struct ata_port *ap)
{
- WARN_ON(!ap->ops->error_handler);
-
__ata_port_freeze(ap);
return ata_port_abort(ap);
{
unsigned long flags;
- if (!ap->ops->error_handler)
- return;
-
spin_lock_irqsave(ap->lock, flags);
__ata_port_freeze(ap);
spin_unlock_irqrestore(ap->lock, flags);
{
unsigned long flags;
- if (!ap->ops->error_handler)
- return;
-
spin_lock_irqsave(ap->lock, flags);
ap->pflags &= ~ATA_PFLAG_FROZEN;
/*
* Prepare to reset
*/
- while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX)
+ while (ata_eh_reset_timeouts[max_tries] != UINT_MAX)
max_tries++;
if (link->flags & ATA_LFLAG_RST_ONCE)
max_tries = 1;
#include "libata-transport.h"
/* debounce timing parameters in msecs { interval, duration, timeout } */
-const unsigned long sata_deb_timing_normal[] = { 5, 100, 2000 };
+const unsigned int sata_deb_timing_normal[] = { 5, 100, 2000 };
EXPORT_SYMBOL_GPL(sata_deb_timing_normal);
-const unsigned long sata_deb_timing_hotplug[] = { 25, 500, 2000 };
+const unsigned int sata_deb_timing_hotplug[] = { 25, 500, 2000 };
EXPORT_SYMBOL_GPL(sata_deb_timing_hotplug);
-const unsigned long sata_deb_timing_long[] = { 100, 2000, 5000 };
+const unsigned int sata_deb_timing_long[] = { 100, 2000, 5000 };
EXPORT_SYMBOL_GPL(sata_deb_timing_long);
/**
* RETURNS:
* 0 on success, -errno on failure.
*/
-int sata_link_debounce(struct ata_link *link, const unsigned long *params,
+int sata_link_debounce(struct ata_link *link, const unsigned int *params,
unsigned long deadline)
{
- unsigned long interval = params[0];
- unsigned long duration = params[1];
+ unsigned int interval = params[0];
+ unsigned int duration = params[1];
unsigned long last_jiffies, t;
u32 last, cur;
int rc;
* RETURNS:
* 0 on success, -errno on failure.
*/
-int sata_link_resume(struct ata_link *link, const unsigned long *params,
+int sata_link_resume(struct ata_link *link, const unsigned int *params,
unsigned long deadline)
{
int tries = ATA_LINK_RESUME_TRIES;
* RETURNS:
* 0 on success, -errno otherwise.
*/
-int sata_link_hardreset(struct ata_link *link, const unsigned long *timing,
+int sata_link_hardreset(struct ata_link *link, const unsigned int *timing,
unsigned long deadline,
bool *online, int (*check_ready)(struct ata_link *))
{
ap->flags |= port_info->flags;
ap->ops = port_info->port_ops;
ap->cbl = ATA_CBL_SATA;
+ ap->print_id = atomic_inc_return(&ata_print_id);
return ap;
}
EXPORT_SYMBOL_GPL(ata_sas_port_alloc);
-/**
- * ata_sas_port_start - Set port up for dma.
- * @ap: Port to initialize
- *
- * Called just after data structures for each port are
- * initialized.
- *
- * May be used as the port_start() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-int ata_sas_port_start(struct ata_port *ap)
-{
- /*
- * the port is marked as frozen at allocation time, but if we don't
- * have new eh, we won't thaw it
- */
- if (!ap->ops->error_handler)
- ap->pflags &= ~ATA_PFLAG_FROZEN;
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_start);
-
-/**
- * ata_sas_port_stop - Undo ata_sas_port_start()
- * @ap: Port to shut down
- *
- * May be used as the port_stop() entry in ata_port_operations.
- *
- * LOCKING:
- * Inherited from caller.
- */
-
-void ata_sas_port_stop(struct ata_port *ap)
-{
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_stop);
-
-/**
- * ata_sas_async_probe - simply schedule probing and return
- * @ap: Port to probe
- *
- * For batch scheduling of probe for sas attached ata devices, assumes
- * the port has already been through ata_sas_port_init()
- */
-void ata_sas_async_probe(struct ata_port *ap)
-{
- __ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_async_probe);
-
-int ata_sas_sync_probe(struct ata_port *ap)
-{
- return ata_port_probe(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_sync_probe);
-
-
-/**
- * ata_sas_port_init - Initialize a SATA device
- * @ap: SATA port to initialize
- *
- * LOCKING:
- * PCI/etc. bus probe sem.
- *
- * RETURNS:
- * Zero on success, non-zero on error.
- */
-
-int ata_sas_port_init(struct ata_port *ap)
-{
- int rc = ap->ops->port_start(ap);
-
- if (rc)
- return rc;
- ap->print_id = atomic_inc_return(&ata_print_id);
- return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_init);
-
int ata_sas_tport_add(struct device *parent, struct ata_port *ap)
{
return ata_tport_add(parent, ap);
EXPORT_SYMBOL_GPL(ata_sas_tport_delete);
/**
- * ata_sas_port_destroy - Destroy a SATA port allocated by ata_sas_port_alloc
- * @ap: SATA port to destroy
- *
- */
-
-void ata_sas_port_destroy(struct ata_port *ap)
-{
- if (ap->ops->port_stop)
- ap->ops->port_stop(ap);
- kfree(ap);
-}
-EXPORT_SYMBOL_GPL(ata_sas_port_destroy);
-
-/**
* ata_sas_slave_configure - Default slave_config routine for libata devices
* @sdev: SCSI device to configure
* @ap: ATA port to which SCSI device is attached
struct scsi_device *sdev = to_scsi_device(device);
struct ata_port *ap;
struct ata_device *dev;
- long int input;
+ int input;
unsigned long flags;
int rc;
- rc = kstrtol(buf, 10, &input);
+ rc = kstrtoint(buf, 10, &input);
if (rc)
return rc;
if (input < -2)
}
/**
- * ata_dump_status - user friendly display of error info
- * @ap: the port in question
- * @tf: ptr to filled out taskfile
- *
- * Decode and dump the ATA error/status registers for the user so
- * that they have some idea what really happened at the non
- * make-believe layer.
- *
- * LOCKING:
- * inherited from caller
- */
-static void ata_dump_status(struct ata_port *ap, struct ata_taskfile *tf)
-{
- u8 stat = tf->status, err = tf->error;
-
- if (stat & ATA_BUSY) {
- ata_port_warn(ap, "status=0x%02x {Busy} ", stat);
- } else {
- ata_port_warn(ap, "status=0x%02x { %s%s%s%s%s%s%s} ", stat,
- stat & ATA_DRDY ? "DriveReady " : "",
- stat & ATA_DF ? "DeviceFault " : "",
- stat & ATA_DSC ? "SeekComplete " : "",
- stat & ATA_DRQ ? "DataRequest " : "",
- stat & ATA_CORR ? "CorrectedError " : "",
- stat & ATA_SENSE ? "Sense " : "",
- stat & ATA_ERR ? "Error " : "");
- if (err)
- ata_port_warn(ap, "error=0x%02x {%s%s%s%s%s%s", err,
- err & ATA_ABORTED ?
- "DriveStatusError " : "",
- err & ATA_ICRC ?
- (err & ATA_ABORTED ?
- "BadCRC " : "Sector ") : "",
- err & ATA_UNC ? "UncorrectableError " : "",
- err & ATA_IDNF ? "SectorIdNotFound " : "",
- err & ATA_TRK0NF ? "TrackZeroNotFound " : "",
- err & ATA_AMNF ? "AddrMarkNotFound " : "");
- }
-}
-
-/**
* ata_to_sense_error - convert ATA error to SCSI error
* @id: ATA device number
* @drv_stat: value contained in ATA status register
* @sk: the sense key we'll fill out
* @asc: the additional sense code we'll fill out
* @ascq: the additional sense code qualifier we'll fill out
- * @verbose: be verbose
*
* Converts an ATA error into a SCSI error. Fill out pointers to
* SK, ASC, and ASCQ bytes for later use in fixed or descriptor
* spin_lock_irqsave(host lock)
*/
static void ata_to_sense_error(unsigned id, u8 drv_stat, u8 drv_err, u8 *sk,
- u8 *asc, u8 *ascq, int verbose)
+ u8 *asc, u8 *ascq)
{
int i;
*sk = sense_table[i][1];
*asc = sense_table[i][2];
*ascq = sense_table[i][3];
- goto translate_done;
+ return;
}
}
}
*sk = stat_table[i][1];
*asc = stat_table[i][2];
*ascq = stat_table[i][3];
- goto translate_done;
+ return;
}
}
*sk = ABORTED_COMMAND;
*asc = 0x00;
*ascq = 0x00;
-
- translate_done:
- if (verbose)
- pr_err("ata%u: translated ATA stat/err 0x%02x/%02x to SCSI SK/ASC/ASCQ 0x%x/%02x/%02x\n",
- id, drv_stat, drv_err, *sk, *asc, *ascq);
- return;
}
/*
struct ata_taskfile *tf = &qc->result_tf;
unsigned char *sb = cmd->sense_buffer;
unsigned char *desc = sb + 8;
- int verbose = qc->ap->ops->error_handler == NULL;
u8 sense_key, asc, ascq;
memset(sb, 0, SCSI_SENSE_BUFFERSIZE);
if (qc->err_mask ||
tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
- &sense_key, &asc, &ascq, verbose);
+ &sense_key, &asc, &ascq);
ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq);
} else {
/*
struct scsi_cmnd *cmd = qc->scsicmd;
struct ata_taskfile *tf = &qc->result_tf;
unsigned char *sb = cmd->sense_buffer;
- int verbose = qc->ap->ops->error_handler == NULL;
u64 block;
u8 sense_key, asc, ascq;
if (qc->err_mask ||
tf->status & (ATA_BUSY | ATA_DF | ATA_ERR | ATA_DRQ)) {
ata_to_sense_error(qc->ap->print_id, tf->status, tf->error,
- &sense_key, &asc, &ascq, verbose);
+ &sense_key, &asc, &ascq);
ata_scsi_set_sense(dev, cmd, sense_key, asc, ascq);
} else {
/* Could not decode error */
unsigned long flags;
struct ata_device *dev;
- if (!ap->ops->error_handler)
- return;
-
spin_lock_irqsave(ap->lock, flags);
dev = __ata_scsi_find_dev(ap, sdev);
if (dev && dev->sdev) {
static void ata_scsi_qc_complete(struct ata_queued_cmd *qc)
{
- struct ata_port *ap = qc->ap;
struct scsi_cmnd *cmd = qc->scsicmd;
u8 *cdb = cmd->cmnd;
int need_sense = (qc->err_mask != 0) &&
/* Keep the SCSI ML and status byte, clear host byte. */
cmd->result &= 0x0000ffff;
- if (need_sense && !ap->ops->error_handler)
- ata_dump_status(ap, &qc->result_tf);
-
ata_qc_done(qc);
}
return 0;
}
-static void atapi_sense_complete(struct ata_queued_cmd *qc)
-{
- if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0)) {
- /* FIXME: not quite right; we don't want the
- * translation of taskfile registers into
- * a sense descriptors, since that's only
- * correct for ATA, not ATAPI
- */
- ata_gen_passthru_sense(qc);
- }
-
- ata_qc_done(qc);
-}
-
-/* is it pointless to prefer PIO for "safety reasons"? */
-static inline int ata_pio_use_silly(struct ata_port *ap)
-{
- return (ap->flags & ATA_FLAG_PIO_DMA);
-}
-
-static void atapi_request_sense(struct ata_queued_cmd *qc)
-{
- struct ata_port *ap = qc->ap;
- struct scsi_cmnd *cmd = qc->scsicmd;
-
- memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE);
-
-#ifdef CONFIG_ATA_SFF
- if (ap->ops->sff_tf_read)
- ap->ops->sff_tf_read(ap, &qc->tf);
-#endif
-
- /* fill these in, for the case where they are -not- overwritten */
- cmd->sense_buffer[0] = 0x70;
- cmd->sense_buffer[2] = qc->tf.error >> 4;
-
- ata_qc_reinit(qc);
-
- /* setup sg table and init transfer direction */
- sg_init_one(&qc->sgent, cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE);
- ata_sg_init(qc, &qc->sgent, 1);
- qc->dma_dir = DMA_FROM_DEVICE;
-
- memset(&qc->cdb, 0, qc->dev->cdb_len);
- qc->cdb[0] = REQUEST_SENSE;
- qc->cdb[4] = SCSI_SENSE_BUFFERSIZE;
-
- qc->tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
- qc->tf.command = ATA_CMD_PACKET;
-
- if (ata_pio_use_silly(ap)) {
- qc->tf.protocol = ATAPI_PROT_DMA;
- qc->tf.feature |= ATAPI_PKT_DMA;
- } else {
- qc->tf.protocol = ATAPI_PROT_PIO;
- qc->tf.lbam = SCSI_SENSE_BUFFERSIZE;
- qc->tf.lbah = 0;
- }
- qc->nbytes = SCSI_SENSE_BUFFERSIZE;
-
- qc->complete_fn = atapi_sense_complete;
-
- ata_qc_issue(qc);
-}
-
/*
* ATAPI devices typically report zero for their SCSI version, and sometimes
* deviate from the spec WRT response data format. If SCSI version is
struct scsi_cmnd *cmd = qc->scsicmd;
unsigned int err_mask = qc->err_mask;
- /* handle completion from new EH */
- if (unlikely(qc->ap->ops->error_handler &&
- (err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID))) {
+ /* handle completion from EH */
+ if (unlikely(err_mask || qc->flags & ATA_QCFLAG_SENSE_VALID)) {
if (!(qc->flags & ATA_QCFLAG_SENSE_VALID)) {
/* FIXME: not quite right; we don't want the
return;
}
- /* successful completion or old EH failure path */
- if (unlikely(err_mask & AC_ERR_DEV)) {
- cmd->result = SAM_STAT_CHECK_CONDITION;
- atapi_request_sense(qc);
- return;
- } else if (unlikely(err_mask)) {
- /* FIXME: not quite right; we don't want the
- * translation of taskfile registers into
- * a sense descriptors, since that's only
- * correct for ATA, not ATAPI
- */
- ata_gen_passthru_sense(qc);
- } else {
- if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
- atapi_fixup_inquiry(cmd);
- cmd->result = SAM_STAT_GOOD;
- }
+ /* successful completion path */
+ if (cmd->cmnd[0] == INQUIRY && (cmd->cmnd[1] & 0x03) == 0)
+ atapi_fixup_inquiry(cmd);
+ cmd->result = SAM_STAT_GOOD;
ata_qc_done(qc);
}
unsigned long flags;
int devno, rc = 0;
- if (!ap->ops->error_handler)
- return -EOPNOTSUPP;
-
if (lun != SCAN_WILD_CARD && lun)
return -EINVAL;
{
struct ata_port *ap = qc->ap;
- if (ap->ops->error_handler) {
- if (in_wq) {
- /* EH might have kicked in while host lock is
- * released.
- */
- qc = ata_qc_from_tag(ap, qc->tag);
- if (qc) {
- if (likely(!(qc->err_mask & AC_ERR_HSM))) {
- ata_sff_irq_on(ap);
- ata_qc_complete(qc);
- } else
- ata_port_freeze(ap);
- }
- } else {
- if (likely(!(qc->err_mask & AC_ERR_HSM)))
+ if (in_wq) {
+ /* EH might have kicked in while host lock is released. */
+ qc = ata_qc_from_tag(ap, qc->tag);
+ if (qc) {
+ if (likely(!(qc->err_mask & AC_ERR_HSM))) {
+ ata_sff_irq_on(ap);
ata_qc_complete(qc);
- else
+ } else
ata_port_freeze(ap);
}
} else {
- if (in_wq) {
- ata_sff_irq_on(ap);
- ata_qc_complete(qc);
- } else
+ if (likely(!(qc->err_mask & AC_ERR_HSM)))
ata_qc_complete(qc);
+ else
+ ata_port_freeze(ap);
}
}
unsigned long deadline)
{
struct ata_eh_context *ehc = &link->eh_context;
- const unsigned long *timing = sata_ehc_deb_timing(ehc);
+ const unsigned int *timing = sata_ehc_deb_timing(ehc);
bool online;
int rc;
extern int ata_cmd_ioctl(struct scsi_device *scsidev, void __user *arg);
extern struct ata_port *ata_port_alloc(struct ata_host *host);
extern const char *sata_spd_string(unsigned int spd);
-extern int ata_port_probe(struct ata_port *ap);
-extern void __ata_port_probe(struct ata_port *ap);
extern unsigned int ata_read_log_page(struct ata_device *dev, u8 log,
u8 page, void *buf, unsigned int sectors);
extern void ata_scsi_hotplug(struct work_struct *work);
extern void ata_schedule_scsi_eh(struct Scsi_Host *shost);
extern void ata_scsi_dev_rescan(struct work_struct *work);
-extern int ata_bus_probe(struct ata_port *ap);
extern int ata_scsi_user_scan(struct Scsi_Host *shost, unsigned int channel,
unsigned int id, u64 lun);
void ata_scsi_sdev_config(struct scsi_device *sdev);
return ret;
}
-static int arasan_cf_remove(struct platform_device *pdev)
+static void arasan_cf_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct arasan_cf_dev *acdev = host->ports[0]->private_data;
ata_host_detach(host);
cf_exit(acdev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
static struct platform_driver arasan_cf_driver = {
.probe = arasan_cf_probe,
- .remove = arasan_cf_remove,
+ .remove_new = arasan_cf_remove,
.driver = {
.name = DRIVER_NAME,
.pm = &arasan_cf_pm_ops,
#include <asm/amigahw.h>
#include <asm/amigaints.h>
-#include <asm/ide.h>
#include <asm/setup.h>
#define DRV_NAME "pata_buddha"
#include <linux/ata.h>
#include <linux/libata.h>
#include <linux/platform_device.h>
+#include <linux/sys_soc.h>
#include <linux/delay.h>
#include <linux/dmaengine.h>
#include <linux/ktime.h>
.port_start = ep93xx_pata_port_start,
};
+static const struct soc_device_attribute ep93xx_soc_table[] = {
+ { .revision = "E1", .data = (void *)ATA_UDMA3 },
+ { .revision = "E2", .data = (void *)ATA_UDMA4 },
+ { /* sentinel */ }
+};
+
static int ep93xx_pata_probe(struct platform_device *pdev)
{
struct ep93xx_pata_data *drv_data;
drv_data = devm_kzalloc(&pdev->dev, sizeof(*drv_data), GFP_KERNEL);
if (!drv_data) {
- err = -ENXIO;
+ err = -ENOMEM;
goto err_rel_gpio;
}
/* allocate host */
host = ata_host_alloc(&pdev->dev, 1);
if (!host) {
- err = -ENXIO;
+ err = -ENOMEM;
goto err_rel_dma;
}
* so this driver supports only UDMA modes.
*/
if (drv_data->dma_rx_channel && drv_data->dma_tx_channel) {
- int chip_rev = ep93xx_chip_revision();
+ const struct soc_device_attribute *match;
- if (chip_rev == EP93XX_CHIP_REV_E1)
- ap->udma_mask = ATA_UDMA3;
- else if (chip_rev == EP93XX_CHIP_REV_E2)
- ap->udma_mask = ATA_UDMA4;
+ match = soc_device_match(ep93xx_soc_table);
+ if (match)
+ ap->udma_mask = (unsigned int) match->data;
else
ap->udma_mask = ATA_UDMA2;
}
return err;
}
-static int ep93xx_pata_remove(struct platform_device *pdev)
+static void ep93xx_pata_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct ep93xx_pata_data *drv_data = host->private_data;
ep93xx_pata_release_dma(drv_data);
ep93xx_pata_clear_regs(drv_data->ide_base);
ep93xx_ide_release_gpio(pdev);
- return 0;
}
static struct platform_driver ep93xx_pata_platform_driver = {
.name = DRV_NAME,
},
.probe = ep93xx_pata_probe,
- .remove = ep93xx_pata_remove,
+ .remove_new = ep93xx_pata_remove,
};
module_platform_driver(ep93xx_pata_platform_driver);
#include <asm/atarihw.h>
#include <asm/atariints.h>
#include <asm/atari_stdma.h>
-#include <asm/ide.h>
#define DRV_NAME "pata_falcon"
#define DRV_VERSION "0.1.0"
+static int pata_falcon_swap_mask;
+
+module_param_named(data_swab, pata_falcon_swap_mask, int, 0444);
+MODULE_PARM_DESC(data_swab, "Data byte swap enable/disable bitmap (0x1==drive1, 0x2==drive2, 0x4==drive3, 0x8==drive4, default==0)");
+
static const struct scsi_host_template pata_falcon_sht = {
ATA_PIO_SHT(DRV_NAME),
};
if (dev->class == ATA_DEV_ATA && cmd &&
!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd)))
- swap = 0;
+ swap = (uintptr_t)ap->private_data & BIT(dev->devno);
/* Transfer multiple of 2 bytes */
if (rw == READ) {
struct resource *base_res, *ctl_res, *irq_res;
struct ata_host *host;
struct ata_port *ap;
- void __iomem *base;
- int irq = 0;
+ void __iomem *base, *ctl_base;
+ int mask_shift = 0; /* Q40 & Falcon default */
+ int irq = 0, io_offset = 1, reg_shift = 2; /* Falcon defaults */
dev_info(&pdev->dev, "Atari Falcon and Q40/Q60 PATA controller\n");
ap->pio_mask = ATA_PIO4;
ap->flags |= ATA_FLAG_SLAVE_POSS | ATA_FLAG_NO_IORDY;
- base = (void __iomem *)base_mem_res->start;
/* N.B. this assumes data_addr will be used for word-sized I/O only */
- ap->ioaddr.data_addr = base + 0 + 0 * 4;
- ap->ioaddr.error_addr = base + 1 + 1 * 4;
- ap->ioaddr.feature_addr = base + 1 + 1 * 4;
- ap->ioaddr.nsect_addr = base + 1 + 2 * 4;
- ap->ioaddr.lbal_addr = base + 1 + 3 * 4;
- ap->ioaddr.lbam_addr = base + 1 + 4 * 4;
- ap->ioaddr.lbah_addr = base + 1 + 5 * 4;
- ap->ioaddr.device_addr = base + 1 + 6 * 4;
- ap->ioaddr.status_addr = base + 1 + 7 * 4;
- ap->ioaddr.command_addr = base + 1 + 7 * 4;
-
- base = (void __iomem *)ctl_mem_res->start;
- ap->ioaddr.altstatus_addr = base + 1;
- ap->ioaddr.ctl_addr = base + 1;
-
- ata_port_desc(ap, "cmd 0x%lx ctl 0x%lx",
- (unsigned long)base_mem_res->start,
- (unsigned long)ctl_mem_res->start);
+ ap->ioaddr.data_addr = (void __iomem *)base_mem_res->start;
+
+ if (base_res) { /* only Q40 has IO resources */
+ io_offset = 0x10000;
+ reg_shift = 0;
+ base = (void __iomem *)base_res->start;
+ ctl_base = (void __iomem *)ctl_res->start;
+ } else {
+ base = (void __iomem *)base_mem_res->start;
+ ctl_base = (void __iomem *)ctl_mem_res->start;
+ }
+
+ ap->ioaddr.error_addr = base + io_offset + (1 << reg_shift);
+ ap->ioaddr.feature_addr = base + io_offset + (1 << reg_shift);
+ ap->ioaddr.nsect_addr = base + io_offset + (2 << reg_shift);
+ ap->ioaddr.lbal_addr = base + io_offset + (3 << reg_shift);
+ ap->ioaddr.lbam_addr = base + io_offset + (4 << reg_shift);
+ ap->ioaddr.lbah_addr = base + io_offset + (5 << reg_shift);
+ ap->ioaddr.device_addr = base + io_offset + (6 << reg_shift);
+ ap->ioaddr.status_addr = base + io_offset + (7 << reg_shift);
+ ap->ioaddr.command_addr = base + io_offset + (7 << reg_shift);
+
+ ap->ioaddr.altstatus_addr = ctl_base + io_offset;
+ ap->ioaddr.ctl_addr = ctl_base + io_offset;
+
+ ata_port_desc(ap, "cmd %px ctl %px data %px",
+ base, ctl_base, ap->ioaddr.data_addr);
+
+ if (pdev->id > 0)
+ mask_shift = 2;
+ ap->private_data = (void *)(uintptr_t)(pata_falcon_swap_mask >> mask_shift);
irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
if (irq_res && irq_res->start > 0) {
#include <linux/module.h>
#include <linux/libata.h>
#include <linux/bitops.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/clk.h>
#include "sata_gemini.h"
if (irq < 0)
return irq;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENODEV;
-
- ftide->base = devm_ioremap_resource(dev, res);
+ ftide->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(ftide->base))
return PTR_ERR(ftide->base);
return ret;
}
-static int pata_ftide010_remove(struct platform_device *pdev)
+static void pata_ftide010_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct ftide010 *ftide = host->private_data;
ata_host_detach(ftide->host);
clk_disable_unprepare(ftide->pclk);
-
- return 0;
}
static const struct of_device_id pata_ftide010_of_match[] = {
.of_match_table = pata_ftide010_of_match,
},
.probe = pata_ftide010_probe,
- .remove = pata_ftide010_remove,
+ .remove_new = pata_ftide010_remove,
};
module_platform_driver(pata_ftide010_driver);
+MODULE_DESCRIPTION("low level driver for Faraday Technology FTIDE010");
MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" DRV_NAME);
#include <asm/amigahw.h>
#include <asm/amigaints.h>
#include <asm/amigayle.h>
-#include <asm/ide.h>
#include <asm/setup.h>
#define DRV_NAME "pata_gayle"
if (!priv)
return -ENOMEM;
- priv->clk = devm_clk_get(&pdev->dev, NULL);
+ priv->clk = devm_clk_get_enabled(&pdev->dev, NULL);
if (IS_ERR(priv->clk)) {
- dev_err(&pdev->dev, "Failed to get clock\n");
+ dev_err(&pdev->dev, "Failed to get and enable clock\n");
return PTR_ERR(priv->clk);
}
- ret = clk_prepare_enable(priv->clk);
- if (ret)
- return ret;
-
host = ata_host_alloc(&pdev->dev, 1);
- if (!host) {
- ret = -ENOMEM;
- goto err;
- }
+ if (!host)
+ return -ENOMEM;
host->private_data = priv;
ap = host->ports[0];
ap->pio_mask = ATA_PIO4;
ap->flags |= ATA_FLAG_SLAVE_POSS;
- io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- priv->host_regs = devm_ioremap_resource(&pdev->dev, io_res);
- if (IS_ERR(priv->host_regs)) {
- ret = PTR_ERR(priv->host_regs);
- goto err;
- }
+ priv->host_regs = devm_platform_get_and_ioremap_resource(pdev, 0, &io_res);
+ if (IS_ERR(priv->host_regs))
+ return PTR_ERR(priv->host_regs);
ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA;
ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL;
&pata_imx_sht);
if (ret)
- goto err;
+ return ret;
return 0;
-err:
- clk_disable_unprepare(priv->clk);
-
- return ret;
}
-static int pata_imx_remove(struct platform_device *pdev)
+static void pata_imx_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct pata_imx_priv *priv = host->private_data;
ata_host_detach(host);
__raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN);
-
- clk_disable_unprepare(priv->clk);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
static struct platform_driver pata_imx_driver = {
.probe = pata_imx_probe,
- .remove = pata_imx_remove,
+ .remove_new = pata_imx_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = imx_pata_dt_ids,
int ret;
int irq;
- cmd = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- ctl = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-
- if (!cmd || !ctl)
- return -EINVAL;
-
ixpp = devm_kzalloc(dev, sizeof(*ixpp), GFP_KERNEL);
if (!ixpp)
return -ENOMEM;
if (ret)
return ret;
- ixpp->cmd = devm_ioremap_resource(dev, cmd);
- ixpp->ctl = devm_ioremap_resource(dev, ctl);
- if (IS_ERR(ixpp->cmd) || IS_ERR(ixpp->ctl))
- return -ENOMEM;
+ ixpp->cmd = devm_platform_get_and_ioremap_resource(pdev, 0, &cmd);
+ if (IS_ERR(ixpp->cmd))
+ return PTR_ERR(ixpp->cmd);
+
+ ixpp->ctl = devm_platform_get_and_ioremap_resource(pdev, 1, &ctl);
+ if (IS_ERR(ixpp->ctl))
+ return PTR_ERR(ixpp->ctl);
irq = platform_get_irq(pdev, 0);
- if (irq > 0)
- irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
- else if (irq < 0)
+ if (irq < 0)
return irq;
- else
- return -EINVAL;
+ irq_set_irq_type(irq, IRQ_TYPE_EDGE_RISING);
/* Just one port to set up */
ixp4xx_setup_port(ixpp->host->ports[0], ixpp, cmd->start, ctl->start);
#include <linux/gfp.h>
#include <linux/delay.h>
#include <linux/libata.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <asm/cacheflush.h>
return rv;
}
-static int
-mpc52xx_ata_remove(struct platform_device *op)
+static void mpc52xx_ata_remove(struct platform_device *op)
{
struct ata_host *host = platform_get_drvdata(op);
struct mpc52xx_ata_priv *priv = host->private_data;
irq_dispose_mapping(task_irq);
bcom_ata_release(priv->dmatsk);
irq_dispose_mapping(priv->ata_irq);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
static struct platform_driver mpc52xx_ata_of_platform_driver = {
.probe = mpc52xx_ata_probe,
- .remove = mpc52xx_ata_remove,
+ .remove_new = mpc52xx_ata_remove,
#ifdef CONFIG_PM_SLEEP
.suspend = mpc52xx_ata_suspend,
.resume = mpc52xx_ata_resume,
return ret;
}
-static int pxa_ata_remove(struct platform_device *pdev)
+static void pxa_ata_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct pata_pxa_data *data = host->ports[0]->private_data;
dma_release_channel(data->dma_chan);
ata_host_detach(host);
-
- return 0;
}
static struct platform_driver pxa_ata_driver = {
.probe = pxa_ata_probe,
- .remove = pxa_ata_remove,
+ .remove_new = pxa_ata_remove,
.driver = {
.name = DRV_NAME,
},
return 0;
}
-static int rb532_pata_driver_remove(struct platform_device *pdev)
+static void rb532_pata_driver_remove(struct platform_device *pdev)
{
struct ata_host *ah = platform_get_drvdata(pdev);
ata_host_detach(ah);
-
- return 0;
}
static struct platform_driver rb532_pata_platform_driver = {
.probe = rb532_pata_driver_probe,
- .remove = rb532_pata_driver_remove,
+ .remove_new = rb532_pata_driver_remove,
.driver = {
.name = DRV_NAME,
},
* document.
*
* This function is also called to turn off DMA when a timeout occurs
- * during DMA operation. In both cases we need to reset the engine,
- * so no actual eng_timeout handler is required.
+ * during DMA operation. In both cases we need to reset the engine.
*
* We assume bmdma_stop is always called if bmdma_start as called. If
* not then we may need to wrap qc_issue.
#include <linux/module.h>
#include <linux/device.h>
#include <linux/dmaengine.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/phy/phy.h>
#include <linux/libata.h>
return err;
}
-static int sata_dwc_remove(struct platform_device *ofdev)
+static void sata_dwc_remove(struct platform_device *ofdev)
{
struct device *dev = &ofdev->dev;
struct ata_host *host = dev_get_drvdata(dev);
#endif
dev_dbg(dev, "done\n");
- return 0;
}
static const struct of_device_id sata_dwc_match[] = {
.of_match_table = sata_dwc_match,
},
.probe = sata_dwc_probe,
- .remove = sata_dwc_remove,
+ .remove_new = sata_dwc_remove,
};
module_platform_driver(sata_dwc_driver);
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <scsi/scsi_cmnd.h>
#include <linux/libata.h>
#include <asm/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
static unsigned int intr_coalescing_count;
module_param(intr_coalescing_count, int, S_IRUGO);
return retval;
}
-static int sata_fsl_remove(struct platform_device *ofdev)
+static void sata_fsl_remove(struct platform_device *ofdev)
{
struct ata_host *host = platform_get_drvdata(ofdev);
struct sata_fsl_host_priv *host_priv = host->private_data;
device_remove_file(&ofdev->dev, &host_priv->rx_watermark);
ata_host_detach(host);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
.of_match_table = fsl_sata_match,
},
.probe = sata_fsl_probe,
- .remove = sata_fsl_remove,
+ .remove_new = sata_fsl_remove,
#ifdef CONFIG_PM_SLEEP
.suspend = sata_fsl_suspend,
.resume = sata_fsl_resume,
#include <linux/regmap.h>
#include <linux/delay.h>
#include <linux/reset.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/pinctrl/consumer.h>
return ret;
}
-static int gemini_sata_remove(struct platform_device *pdev)
+static void gemini_sata_remove(struct platform_device *pdev)
{
struct sata_gemini *sg = platform_get_drvdata(pdev);
clk_unprepare(sg->sata0_pclk);
}
sg_singleton = NULL;
-
- return 0;
}
static const struct of_device_id gemini_sata_of_match[] = {
.of_match_table = gemini_sata_of_match,
},
.probe = gemini_sata_probe,
- .remove = gemini_sata_remove,
+ .remove_new = gemini_sata_remove,
};
module_platform_driver(gemini_sata_driver);
+MODULE_DESCRIPTION("low level driver for Cortina Systems Gemini SATA bridge");
MODULE_AUTHOR("Linus Walleij <linus.walleij@linaro.org>");
MODULE_LICENSE("GPL");
MODULE_ALIAS("platform:" DRV_NAME);
#include <linux/io.h>
#include <linux/spinlock.h>
#include <linux/device.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/platform_device.h>
#include <linux/libata.h>
static int ahci_highbank_hardreset(struct ata_link *link, unsigned int *class,
unsigned long deadline)
{
- static const unsigned long timing[] = { 5, 100, 500};
+ static const unsigned int timing[] = { 5, 100, 500};
struct ata_port *ap = link->ap;
struct ahci_port_priv *pp = ap->private_data;
struct ahci_host_priv *hpriv = ap->host->private_data;
struct ata_port *ap = link->ap;
void __iomem *port_base = inic_port_base(ap);
void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
- const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+ const unsigned int *timing = sata_ehc_deb_timing(&link->eh_context);
int rc;
/* hammer it into sane state */
/* Workaround for errata FEr SATA#10 (part 2) */
do {
- const unsigned long *timing =
+ const unsigned int *timing =
sata_ehc_deb_timing(&link->eh_context);
rc = sata_link_hardreset(link, timing, deadline + extra,
* A platform bus SATA device has been unplugged. Perform the needed
* cleanup. Also called on module unload for any active devices.
*/
-static int mv_platform_remove(struct platform_device *pdev)
+static void mv_platform_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct mv_host_priv *hpriv = host->private_data;
}
phy_power_off(hpriv->port_phys[port]);
}
- return 0;
}
#ifdef CONFIG_PM_SLEEP
static struct platform_driver mv_platform_driver = {
.probe = mv_platform_probe,
- .remove = mv_platform_remove,
+ .remove_new = mv_platform_remove,
.suspend = mv_platform_suspend,
.resume = mv_platform_resume,
.driver = {
sata_link_hardreset(link, sata_deb_timing_hotplug, deadline,
NULL, NULL);
else {
- const unsigned long *timing = sata_ehc_deb_timing(ehc);
+ const unsigned int *timing = sata_ehc_deb_timing(ehc);
int rc;
if (!(ehc->i.flags & ATA_EHI_QUIET))
#include <linux/module.h>
#include <linux/ata.h>
#include <linux/libata.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/err.h>
struct device *dev = &pdev->dev;
struct ata_host *host;
struct sata_rcar_priv *priv;
- struct resource *mem;
- int irq;
- int ret = 0;
+ int irq, ret;
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
- if (!irq)
- return -EINVAL;
priv = devm_kzalloc(dev, sizeof(struct sata_rcar_priv), GFP_KERNEL);
if (!priv)
host->private_data = priv;
- mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- priv->base = devm_ioremap_resource(dev, mem);
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(priv->base)) {
ret = PTR_ERR(priv->base);
goto err_pm_put;
return ret;
}
-static int sata_rcar_remove(struct platform_device *pdev)
+static void sata_rcar_remove(struct platform_device *pdev)
{
struct ata_host *host = platform_get_drvdata(pdev);
struct sata_rcar_priv *priv = host->private_data;
pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
-
- return 0;
}
#ifdef CONFIG_PM_SLEEP
static struct platform_driver sata_rcar_driver = {
.probe = sata_rcar_probe,
- .remove = sata_rcar_remove,
+ .remove_new = sata_rcar_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = sata_rcar_match,
static int sil24_exec_polled_cmd(struct ata_port *ap, int pmp,
const struct ata_taskfile *tf,
int is_cmd, u32 ctrl,
- unsigned long timeout_msec)
+ unsigned int timeout_msec)
{
void __iomem *port = sil24_port_base(ap);
struct sil24_port_priv *pp = ap->private_data;
{
struct ata_port *ap = link->ap;
int pmp = sata_srst_pmp(link);
- unsigned long timeout_msec = 0;
+ unsigned int timeout_msec = 0;
struct ata_taskfile tf;
const char *reason;
int rc;
.dma_boundary = ATA_DMA_BOUNDARY,
};
-/* TODO: inherit from base port_ops after converting to new EH */
static struct ata_port_operations pdc_20621_ops = {
.inherits = &ata_sff_port_ops,
static ssize_t do_rbd_remove(const char *buf, size_t count)
{
struct rbd_device *rbd_dev = NULL;
- struct list_head *tmp;
int dev_id;
char opt_buf[6];
bool force = false;
ret = -ENOENT;
spin_lock(&rbd_dev_list_lock);
- list_for_each(tmp, &rbd_dev_list) {
- rbd_dev = list_entry(tmp, struct rbd_device, node);
+ list_for_each_entry(rbd_dev, &rbd_dev_list, node) {
if (rbd_dev->dev_id == dev_id) {
ret = 0;
break;
return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
}
-static int crb_check_flags(struct tpm_chip *chip)
-{
- u32 val;
- int ret;
-
- ret = crb_request_locality(chip, 0);
- if (ret)
- return ret;
-
- ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
- if (ret)
- goto release;
-
- if (val == 0x414D4400U /* AMD */)
- chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
-
-release:
- crb_relinquish_locality(chip, 0);
-
- return ret;
-}
-
static const struct tpm_class_ops tpm_crb = {
.flags = TPM_OPS_AUTO_STARTUP,
.status = crb_status,
FW_BUG "TPM2 ACPI table has wrong size %u for start method type %d\n",
buf->header.length,
ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
crb_pluton = ACPI_ADD_PTR(struct tpm2_crb_pluton, buf, sizeof(*buf));
rc = crb_map_pluton(dev, priv, buf, crb_pluton);
if (rc)
- return rc;
+ goto out;
}
priv->sm = sm;
if (rc)
goto out;
- rc = crb_check_flags(chip);
- if (rc)
- goto out;
+#ifdef CONFIG_X86
+ /* A quirk for https://www.amd.com/en/support/kb/faq/pa-410 */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+ dev_info(dev, "Disabling hwrng\n");
+ chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+ }
+#endif /* CONFIG_X86 */
rc = tpm_chip_register(chip);
return 0;
}
+static int zynq_gpio_irq_reqres(struct irq_data *d)
+{
+ struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+ int ret;
+
+ ret = pm_runtime_resume_and_get(chip->parent);
+ if (ret < 0)
+ return ret;
+
+ return gpiochip_reqres_irq(chip, d->hwirq);
+}
+
+static void zynq_gpio_irq_relres(struct irq_data *d)
+{
+ struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
+
+ gpiochip_relres_irq(chip, d->hwirq);
+ pm_runtime_put(chip->parent);
+}
+
/* irq chip descriptor */
static const struct irq_chip zynq_gpio_level_irqchip = {
.name = DRIVER_NAME,
.irq_unmask = zynq_gpio_irq_unmask,
.irq_set_type = zynq_gpio_set_irq_type,
.irq_set_wake = zynq_gpio_set_wake,
+ .irq_request_resources = zynq_gpio_irq_reqres,
+ .irq_release_resources = zynq_gpio_irq_relres,
.flags = IRQCHIP_EOI_THREADED | IRQCHIP_EOI_IF_HANDLED |
IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
- GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static const struct irq_chip zynq_gpio_edge_irqchip = {
.irq_unmask = zynq_gpio_irq_unmask,
.irq_set_type = zynq_gpio_set_irq_type,
.irq_set_wake = zynq_gpio_set_wake,
+ .irq_request_resources = zynq_gpio_irq_reqres,
+ .irq_release_resources = zynq_gpio_irq_relres,
.flags = IRQCHIP_MASK_ON_SUSPEND | IRQCHIP_IMMUTABLE,
- GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static void zynq_gpio_handle_bank_irq(struct zynq_gpio *gpio,
mem_info->local_mem_size_public,
mem_info->local_mem_size_private);
- if (amdgpu_sriov_vf(adev))
- mem_info->mem_clk_max = adev->clock.default_mclk / 100;
- else if (adev->pm.dpm_enabled) {
+ if (adev->pm.dpm_enabled) {
if (amdgpu_emu_mode == 1)
mem_info->mem_clk_max = 0;
else
uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
{
/* the sclk is in quantas of 10kHz */
- if (amdgpu_sriov_vf(adev))
- return adev->clock.default_sclk / 100;
- else if (adev->pm.dpm_enabled)
+ if (adev->pm.dpm_enabled)
return amdgpu_dpm_get_sclk(adev, false) / 100;
else
return 100;
struct atom_umc_info_v3_1 v31;
struct atom_umc_info_v3_2 v32;
struct atom_umc_info_v3_3 v33;
+ struct atom_umc_info_v4_0 v40;
};
union vram_info {
if (amdgpu_atom_parse_data_header(mode_info->atom_context,
index, &size, &frev, &crev, &data_offset)) {
+ umc_info = (union umc_info *)(mode_info->atom_context->bios + data_offset);
if (frev == 3) {
- umc_info = (union umc_info *)
- (mode_info->atom_context->bios + data_offset);
switch (crev) {
case 1:
umc_config = le32_to_cpu(umc_info->v31.umc_config);
/* unsupported crev */
return false;
}
+ } else if (frev == 4) {
+ switch (crev) {
+ case 0:
+ umc_config1 = le32_to_cpu(umc_info->v40.umc_config1);
+ ecc_default_enabled =
+ (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false;
+ break;
+ default:
+ /* unsupported crev */
+ return false;
+ }
+ } else {
+ /* unsupported frev */
+ return false;
}
}
{
struct drm_gem_object *gobj;
unsigned long size;
- int r;
gobj = drm_gem_object_lookup(p->filp, data->handle);
if (gobj == NULL)
drm_gem_object_put(gobj);
size = amdgpu_bo_size(p->uf_bo);
- if (size != PAGE_SIZE || (data->offset + 8) > size) {
- r = -EINVAL;
- goto error_unref;
- }
+ if (size != PAGE_SIZE || data->offset > (size - 8))
+ return -EINVAL;
- if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm)) {
- r = -EINVAL;
- goto error_unref;
- }
+ if (amdgpu_ttm_tt_get_usermm(p->uf_bo->tbo.ttm))
+ return -EINVAL;
*offset = data->offset;
-
return 0;
-
-error_unref:
- amdgpu_bo_unref(&p->uf_bo);
- return r;
}
static int amdgpu_cs_p1_bo_handles(struct amdgpu_cs_parser *p,
*/
static int amdgpu_device_asic_init(struct amdgpu_device *adev)
{
+ int ret;
+
amdgpu_asic_pre_asic_init(adev);
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 3) ||
- adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
- return amdgpu_atomfirmware_asic_init(adev, true);
- else
+ adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) {
+ amdgpu_psp_wait_for_bootloader(adev);
+ ret = amdgpu_atomfirmware_asic_init(adev, true);
+ return ret;
+ } else {
return amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ }
+
+ return 0;
}
/**
}
if (ret)
- dev_err(adev->dev, "GPU mode1 reset failed\n");
+ goto mode1_reset_failed;
amdgpu_device_load_pci_state(adev->pdev);
+ ret = amdgpu_psp_wait_for_bootloader(adev);
+ if (ret)
+ goto mode1_reset_failed;
/* wait for asic to come out of reset */
for (i = 0; i < adev->usec_timeout; i++) {
udelay(1);
}
+ if (i >= adev->usec_timeout) {
+ ret = -ETIMEDOUT;
+ goto mode1_reset_failed;
+ }
+
amdgpu_atombios_scratch_regs_engine_hung(adev, false);
+
+ return 0;
+
+mode1_reset_failed:
+ dev_err(adev->dev, "GPU mode1 reset failed\n");
return ret;
}
struct drm_device *dev = adev_to_drm(adev);
ktime_get_ts64(&adev->reset_time);
- dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
+ dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_NOWAIT,
amdgpu_devcoredump_read, amdgpu_devcoredump_free);
}
#endif
struct gc_info_v1_1 v1_1;
struct gc_info_v1_2 v1_2;
struct gc_info_v2_0 v2;
+ struct gc_info_v2_1 v2_1;
};
static int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) /
le32_to_cpu(gc_info->v2.gc_num_sh_per_se);
adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc);
+ if (gc_info->v2.header.version_minor == 1) {
+ adev->gfx.config.gc_num_tcp_per_sa = le32_to_cpu(gc_info->v2_1.gc_num_tcp_per_sh);
+ adev->gfx.config.gc_tcp_size_per_cu = le32_to_cpu(gc_info->v2_1.gc_tcp_size_per_cu);
+ adev->gfx.config.gc_num_sdp_interface = le32_to_cpu(gc_info->v2_1.gc_num_sdp_interface); /* per XCD */
+ adev->gfx.config.gc_num_cu_per_sqc = le32_to_cpu(gc_info->v2_1.gc_num_cu_per_sqc);
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_instruction_cache_size_per_sqc);
+ adev->gfx.config.gc_l1_data_cache_size_per_sqc = le32_to_cpu(gc_info->v2_1.gc_scalar_data_cache_size_per_sqc);
+ adev->gfx.config.gc_tcc_size = le32_to_cpu(gc_info->v2_1.gc_tcc_size); /* per XCD */
+ }
break;
default:
dev_err(adev->dev,
union mall_info {
struct mall_info_v1_0 v1;
+ struct mall_info_v2_0 v2;
};
static int amdgpu_discovery_get_mall_info(struct amdgpu_device *adev)
adev->gmc.mall_size = mall_size;
adev->gmc.m_half_use = half_use;
break;
+ case 2:
+ mall_size_per_umc = le32_to_cpu(mall_info->v2.mall_size_per_umc);
+ adev->gmc.mall_size = mall_size_per_umc * adev->gmc.num_umc;
+ break;
default:
dev_err(adev->dev,
"Unhandled MALL info table %d.%d\n",
#include <linux/pci.h>
#include <linux/pm_runtime.h>
#include <drm/drm_crtc_helper.h>
+#include <drm/drm_damage_helper.h>
+#include <drm/drm_drv.h>
#include <drm/drm_edid.h>
#include <drm/drm_fb_helper.h>
#include <drm/drm_gem_framebuffer_helper.h>
return true;
}
+static int amdgpu_dirtyfb(struct drm_framebuffer *fb, struct drm_file *file,
+ unsigned int flags, unsigned int color,
+ struct drm_clip_rect *clips, unsigned int num_clips)
+{
+
+ if (file)
+ return -ENOSYS;
+
+ return drm_atomic_helper_dirtyfb(fb, file, flags, color, clips,
+ num_clips);
+}
+
static const struct drm_framebuffer_funcs amdgpu_fb_funcs = {
.destroy = drm_gem_fb_destroy,
.create_handle = drm_gem_fb_create_handle,
};
+static const struct drm_framebuffer_funcs amdgpu_fb_funcs_atomic = {
+ .destroy = drm_gem_fb_destroy,
+ .create_handle = drm_gem_fb_create_handle,
+ .dirty = amdgpu_dirtyfb
+};
+
uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
uint64_t bo_flags)
{
if (ret)
goto err;
- ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
+ if (drm_drv_uses_atomic_modeset(dev))
+ ret = drm_framebuffer_init(dev, &rfb->base,
+ &amdgpu_fb_funcs_atomic);
+ else
+ ret = drm_framebuffer_init(dev, &rfb->base, &amdgpu_fb_funcs);
if (ret)
goto err;
uint32_t gc_gl1c_per_sa;
uint32_t gc_gl1c_size_per_instance;
uint32_t gc_gl2c_per_gpu;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_num_cu_per_sqc;
+ uint32_t gc_tcc_size;
};
struct amdgpu_cu_info {
}
/* SECUREDISPLAY end */
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
+{
+ struct psp_context *psp = &adev->psp;
+ int ret = 0;
+
+ if (!amdgpu_sriov_vf(adev) && psp->funcs && psp->funcs->wait_for_bootloader != NULL)
+ ret = psp->funcs->wait_for_bootloader(psp);
+
+ return ret;
+}
+
static int psp_hw_start(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
struct psp_funcs {
int (*init_microcode)(struct psp_context *psp);
+ int (*wait_for_bootloader)(struct psp_context *psp);
int (*bootloader_load_kdb)(struct psp_context *psp);
int (*bootloader_load_spl)(struct psp_context *psp);
int (*bootloader_load_sysdrv)(struct psp_context *psp);
int is_psp_fw_valid(struct psp_bin_desc bin);
+int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
+
#endif
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
union ta_ras_cmd_input *info;
- int ret = 0;
+ int ret;
if (!con)
return -EINVAL;
if (enable &&
head->block != AMDGPU_RAS_BLOCK__GFX &&
!amdgpu_ras_is_feature_allowed(adev, head))
- goto out;
+ return 0;
/* Only enable gfx ras feature from host side */
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
enable ? "enable":"disable",
get_ras_block_str(head),
amdgpu_ras_is_poison_mode_supported(adev), ret);
- goto out;
+ return ret;
}
+
+ kfree(info);
}
/* setup the obj */
__amdgpu_ras_feature_enable(adev, head, enable);
-out:
- if (head->block == AMDGPU_RAS_BLOCK__GFX)
- kfree(info);
- return ret;
+
+ return 0;
}
/* Only used in device probe stage and called only once. */
if (amdgpu_sriov_vf(adev)) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+ case IP_VERSION(13, 0, 6):
return true;
default:
return false;
case IP_VERSION(11, 0, 7): /* Sienna cichlid */
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 2): /* Aldebaran */
- case IP_VERSION(13, 0, 6):
case IP_VERSION(13, 0, 10):
return true;
+ case IP_VERSION(13, 0, 6):
+ return (adev->gmc.is_app_apu) ? false : true;
default:
return false;
}
if (adev->rev_id == 0) {
WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1,
REDUCE_FIFO_DEPTH_BY_2, 2);
+ } else {
+ WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2,
+ SPARE, 0x1);
}
}
}
if (r)
return r;
- r = amdgpu_gfx_sysfs_init(adev);
+ r = amdgpu_gfx_ras_sw_init(adev);
if (r)
return r;
- return amdgpu_gfx_ras_sw_init(adev);
+
+ if (!amdgpu_sriov_vf(adev))
+ r = amdgpu_gfx_sysfs_init(adev);
+
+ return r;
}
static int gfx_v9_4_3_sw_fini(void *handle)
gfx_v9_4_3_mec_fini(adev);
amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
gfx_v9_4_3_free_microcode(adev);
- amdgpu_gfx_sysfs_fini(adev);
+ if (!amdgpu_sriov_vf(adev))
+ amdgpu_gfx_sysfs_fini(adev);
return 0;
}
WREG32_SOC15(GC, GET_INST(GC, xcc_id),
regRLC_CGTT_MGCG_OVERRIDE, data);
- def = data = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL);
-
- if (enable)
- data &= ~RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
- else
- data |= RLC_CLK_CNTL__RLC_SRAM_CLK_GATER_OVERRIDE_MASK;
-
- if (def != data)
- WREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CLK_CNTL, data);
}
static void gfx_v9_4_3_xcc_update_repeater_fgcg(struct amdgpu_device *adev,
uint32_t i;
uint32_t data;
- data = REG_SET_FIELD(0, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
+ data = RREG32_SOC15(GC, GET_INST(GC, 0), regSQ_TIMEOUT_CONFIG);
+ data = REG_SET_FIELD(data, SQ_TIMEOUT_CONFIG, TIMEOUT_FATAL_DISABLE,
amdgpu_watchdog_timer.timeout_fatal_disable ? 1 : 0);
if (amdgpu_watchdog_timer.timeout_fatal_disable &&
cancel_delayed_work_sync(&adev->jpeg.idle_work);
- if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
- ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ if (!amdgpu_sriov_vf(adev)) {
+ if (adev->jpeg.cur_state != AMD_PG_STATE_GATE)
+ ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE);
+ }
return ret;
}
XCC_DOORBELL_FENCE__SHUB_SLV_MODE_MASK);
}
+
+ if (!amdgpu_sriov_vf(adev)) {
+ u32 baco_cntl;
+ for_each_inst(i, adev->aid_mask) {
+ baco_cntl = RREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL);
+ if (baco_cntl & (BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK)) {
+ baco_cntl &= ~(
+ BIF_BX0_BACO_CNTL__BACO_DUMMY_EN_MASK |
+ BIF_BX0_BACO_CNTL__BACO_EN_MASK);
+ dev_dbg(adev->dev,
+ "Unsetting baco dummy mode %x",
+ baco_cntl);
+ WREG32_SOC15(NBIO, i, regBIF_BX0_BACO_CNTL,
+ baco_cntl);
+ }
+ }
+ }
}
static u64 nbio_v7_9_get_pcie_replay_count(struct amdgpu_device *adev)
return sol_reg != 0x0;
}
-static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+static int psp_v13_0_wait_for_vmbx_ready(struct psp_context *psp)
{
struct amdgpu_device *adev = psp->adev;
+ int retry_loop, ret;
- int ret;
- int retry_loop;
+ for (retry_loop = 0; retry_loop < 70; retry_loop++) {
+ /* Wait for bootloader to signify that is
+ ready having bit 31 of C2PMSG_33 set to 1 */
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_33),
+ 0x80000000, 0xffffffff, false);
+
+ if (ret == 0)
+ break;
+ }
+
+ if (ret)
+ dev_warn(adev->dev, "Bootloader wait timed out");
+
+ return ret;
+}
+
+static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+ int retry_loop, ret;
/* Wait for bootloader to signify that it is ready having bit 31 of
* C2PMSG_35 set to 1. All other bits are expected to be cleared.
return ret;
}
+static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp)
+{
+ struct amdgpu_device *adev = psp->adev;
+
+ if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(13, 0, 6)) {
+ psp_v13_0_wait_for_vmbx_ready(psp);
+
+ return psp_v13_0_wait_for_bootloader(psp);
+ }
+
+ return 0;
+}
+
static int psp_v13_0_bootloader_load_component(struct psp_context *psp,
struct psp_bin_desc *bin_desc,
enum psp_bootloader_cmd bl_cmd)
static const struct psp_funcs psp_v13_0_funcs = {
.init_microcode = psp_v13_0_init_microcode,
+ .wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
.bootloader_load_kdb = psp_v13_0_bootloader_load_kdb,
.bootloader_load_spl = psp_v13_0_bootloader_load_spl,
.bootloader_load_sysdrv = psp_v13_0_bootloader_load_sysdrv,
*/
if (amdgpu_gpu_recovery == 4 || amdgpu_gpu_recovery == 5)
return AMD_RESET_METHOD_MODE2;
+ else if (!(adev->flags & AMD_IS_APU))
+ return AMD_RESET_METHOD_MODE1;
else
- return AMD_RESET_METHOD_NONE;
+ return AMD_RESET_METHOD_MODE2;
default:
break;
}
default:
break;
}
- kfd_signal_event_interrupt(pasid, context_id0 & 0xffffff, 24);
+ kfd_signal_event_interrupt(pasid, sq_int_data, 24);
} else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) {
kfd_set_dbg_ev_from_interrupt(dev, pasid,
KFD_DEBUG_DOORBELL_ID(context_id0),
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
mqd->get_wave_state = get_wave_state;
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
mqd->destroy_mqd = destroy_hiq_mqd;
mqd->is_occupied = kfd_is_occupied_cp;
mqd->mqd_size = sizeof(struct v11_compute_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd;
#endif
mqd->destroy_mqd = kfd_destroy_mqd_sdma;
mqd->is_occupied = kfd_is_occupied_sdma;
mqd->mqd_size = sizeof(struct v11_sdma_mqd);
+ mqd->mqd_stride = kfd_mqd_stride;
#if defined(CONFIG_DEBUG_FS)
mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
#endif
WRITE_ONCE(p->svms.faulting_task, NULL);
if (r) {
pr_debug("failed %d to get svm range pages\n", r);
+ if (r == -EBUSY)
+ r = -EAGAIN;
goto unreserve_out;
}
#include "amdgpu_dm_debugfs.h"
#endif
#include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
#include "ivsrcid/ivsrcid_vislands30.h"
enum dc_connection_type new_connection_type = dc_connection_none;
const struct dc_plane_cap *plane;
bool psr_feature_enabled = false;
+ bool replay_feature_enabled = false;
int max_overlay = dm->dc->caps.max_slave_planes;
dm->display_indexes_num = dm->dc->caps.max_streams;
}
}
+ if (!(amdgpu_dc_debug_mask & DC_DISABLE_REPLAY)) {
+ switch (adev->ip_versions[DCE_HWIP][0]) {
+ case IP_VERSION(3, 1, 4):
+ case IP_VERSION(3, 1, 5):
+ case IP_VERSION(3, 1, 6):
+ case IP_VERSION(3, 2, 0):
+ case IP_VERSION(3, 2, 1):
+ replay_feature_enabled = true;
+ break;
+ default:
+ replay_feature_enabled = amdgpu_dc_feature_mask & DC_REPLAY_MASK;
+ break;
+ }
+ }
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
amdgpu_dm_update_connector_after_detect(aconnector);
setup_backlight_device(dm, aconnector);
+ /*
+ * Disable psr if replay can be enabled
+ */
+ if (replay_feature_enabled && amdgpu_dm_setup_replay(link, aconnector))
+ psr_feature_enabled = false;
+
if (psr_feature_enabled)
amdgpu_dm_set_psr_caps(link);
if (recalculate_timing)
drm_mode_set_crtcinfo(&saved_mode, 0);
- else
+ else if (!old_stream)
drm_mode_set_crtcinfo(&mode, 0);
/*
#include "dc.h"
#include "amdgpu.h"
#include "amdgpu_dm_psr.h"
+#include "amdgpu_dm_replay.h"
#include "amdgpu_dm_crtc.h"
#include "amdgpu_dm_plane.h"
#include "amdgpu_dm_trace.h"
* fill_dc_dirty_rects().
*/
if (vblank_work->stream && vblank_work->stream->link) {
- if (vblank_work->enable) {
+ /*
+ * Prioritize replay, instead of psr
+ */
+ if (vblank_work->stream->link->replay_settings.replay_feature_enabled)
+ amdgpu_dm_replay_enable(vblank_work->stream, false);
+ else if (vblank_work->enable) {
if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
vblank_work->stream->link->psr_settings.psr_allow_active)
amdgpu_dm_psr_disable(vblank_work->stream);
#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
!amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
#endif
+ vblank_work->stream->link->panel_config.psr.disallow_replay &&
vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
amdgpu_dm_psr_enable(vblank_work->stream);
}
attributes.rotation_angle = 0;
attributes.attribute_flags.value = 0;
+ /* Enable cursor degamma ROM on DCN3+ for implicit sRGB degamma in DRM
+ * legacy gamma setup.
+ */
+ if (crtc_state->cm_is_degamma_srgb &&
+ adev->dm.dc->caps.color.dpp.gamma_corr)
+ attributes.attribute_flags.bits.ENABLE_CURSOR_DEGAMMA = 1;
+
attributes.pitch = afb->base.pitches[0] / afb->base.format->cpp[0];
if (crtc_state->stream) {
drm_plane_create_blend_mode_property(plane, blend_caps);
}
+ if (plane->type == DRM_PLANE_TYPE_PRIMARY) {
+ drm_plane_create_zpos_immutable_property(plane, 0);
+ } else if (plane->type == DRM_PLANE_TYPE_OVERLAY) {
+ unsigned int zpos = 1 + drm_plane_index(plane);
+ drm_plane_create_zpos_property(plane, zpos, 1, 254);
+ } else if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ drm_plane_create_zpos_immutable_property(plane, 255);
+ }
+
if (plane->type == DRM_PLANE_TYPE_PRIMARY &&
plane_cap &&
(plane_cap->pixel_format_support.nv12 ||
AMD_DISPLAY_DMUB = $(addprefix $(AMDDALPATH)/dc/,$(DC_DMUB))
AMD_DISPLAY_EDID = $(addprefix $(AMDDALPATH)/dc/,$(DC_EDID))
AMD_DISPLAY_FILES += $(AMD_DISPLAY_DMUB) $(AMD_DISPLAY_EDID)
+
#define MAX_INSTANCE 6
#define MAX_SEGMENT 6
+#define SMU_REGISTER_WRITE_RETRY_COUNT 5
struct IP_BASE_INSTANCE {
unsigned int segment[MAX_SEGMENT];
unsigned int msg_id, unsigned int param)
{
uint32_t result;
+ uint32_t i = 0;
+ uint32_t read_back_data;
result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
/* Set the parameter register for the SMU message, unit is Mhz */
REG_WRITE(MP1_SMN_C2PMSG_37, param);
- /* Trigger the message transaction by writing the message ID */
- generic_write_indirect_reg(CTX,
- REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
- mmMP1_C2PMSG_3, msg_id);
+ for (i = 0; i < SMU_REGISTER_WRITE_RETRY_COUNT; i++) {
+ /* Trigger the message transaction by writing the message ID */
+ generic_write_indirect_reg(CTX,
+ REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+ mmMP1_C2PMSG_3, msg_id);
+ read_back_data = generic_read_indirect_reg(CTX,
+ REG_NBIO(RSMU_INDEX), REG_NBIO(RSMU_DATA),
+ mmMP1_C2PMSG_3);
+ if (read_back_data == msg_id)
+ break;
+ udelay(2);
+ smu_print("SMU msg id write fail %x times. \n", i + 1);
+ }
result = dcn315_smu_wait_for_response(clk_mgr, 10, 200000);
}
}
- /* Check for case where we are going from odm 2:1 to max
- * pipe scenario. For these cases, we will call
- * commit_minimal_transition_state() to exit out of odm 2:1
- * first before processing new streams
+ /* ODM Combine 2:1 power optimization is only applied for single stream
+ * scenario, it uses extra pipes than needed to reduce power consumption
+ * We need to switch off this feature to make room for new streams.
*/
- if (stream_count == dc->res_pool->pipe_count) {
+ if (stream_count > dc->current_state->stream_count &&
+ dc->current_state->stream_count == 1) {
for (i = 0; i < dc->res_pool->pipe_count; i++) {
pipe = &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->next_odm_pipe)
top_pipe_to_program->stream->update_flags.raw = 0;
}
+static void wait_for_outstanding_hw_updates(struct dc *dc, const struct dc_state *dc_context)
+{
+/*
+ * This function calls HWSS to wait for any potentially double buffered
+ * operations to complete. It should be invoked as a pre-amble prior
+ * to full update programming before asserting any HW locks.
+ */
+ int pipe_idx;
+ int opp_inst;
+ int opp_count = dc->res_pool->pipe_count;
+ struct hubp *hubp;
+ int mpcc_inst;
+ const struct pipe_ctx *pipe_ctx;
+
+ for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
+ pipe_ctx = &dc_context->res_ctx.pipe_ctx[pipe_idx];
+
+ if (!pipe_ctx->stream)
+ continue;
+
+ if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
+ pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
+
+ hubp = pipe_ctx->plane_res.hubp;
+ if (!hubp)
+ continue;
+
+ mpcc_inst = hubp->inst;
+ // MPCC inst is equal to pipe index in practice
+ for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
+ if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
+ dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
+ dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
+ break;
+ }
+ }
+ }
+}
+
static void commit_planes_for_stream(struct dc *dc,
struct dc_surface_update *srf_updates,
int surface_count,
// dc->current_state anymore, so we have to cache it before we apply
// the new SubVP context
subvp_prev_use = false;
-
-
dc_z10_restore(dc);
-
- if (update_type == UPDATE_TYPE_FULL) {
- /* wait for all double-buffer activity to clear on all pipes */
- int pipe_idx;
-
- for (pipe_idx = 0; pipe_idx < dc->res_pool->pipe_count; pipe_idx++) {
- struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
-
- if (!pipe_ctx->stream)
- continue;
-
- if (pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear)
- pipe_ctx->stream_res.tg->funcs->wait_drr_doublebuffer_pending_clear(pipe_ctx->stream_res.tg);
- }
- }
+ if (update_type == UPDATE_TYPE_FULL)
+ wait_for_outstanding_hw_updates(dc, context);
if (update_type == UPDATE_TYPE_FULL) {
dc_allow_idle_optimizations(dc, false);
v_active,
offset);
- if (!blank && dc->debug.enable_single_display_2to1_odm_policy) {
- /* when exiting dynamic ODM need to reinit DPG state for unused pipes */
- struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe;
-
- odm_pipe = pipe_ctx->next_odm_pipe;
-
- while (old_odm_pipe) {
- if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx)
- dc->hwss.set_disp_pattern_generator(dc,
- old_odm_pipe,
- CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
- CONTROLLER_DP_COLOR_SPACE_UDEFINED,
- COLOR_DEPTH_888,
- NULL,
- 0,
- 0,
- 0);
- old_odm_pipe = old_odm_pipe->next_odm_pipe;
- if (odm_pipe)
- odm_pipe = odm_pipe->next_odm_pipe;
- }
- }
-
if (!blank)
if (stream_res->abm) {
dc->hwss.set_pipe(pipe_ctx);
|| plane_state->update_flags.bits.global_alpha_change
|| plane_state->update_flags.bits.per_pixel_alpha_change) {
// MPCC inst is equal to pipe index in practice
- int mpcc_inst = hubp->inst;
- int opp_inst;
- int opp_count = dc->res_pool->pipe_count;
-
- for (opp_inst = 0; opp_inst < opp_count; opp_inst++) {
- if (dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst]) {
- dc->res_pool->mpc->funcs->wait_for_idle(dc->res_pool->mpc, mpcc_inst);
- dc->res_pool->opps[opp_inst]->mpcc_disconnect_pending[mpcc_inst] = false;
- break;
- }
- }
hws->funcs.update_mpcc(dc, pipe_ctx);
}
struct dc_state *context)
{
struct dce_hwseq *hws = dc->hwseq;
- /* Only need to unblank on top pipe */
- if ((pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.abm_level)
- && !pipe_ctx->top_pipe && !pipe_ctx->prev_odm_pipe)
- hws->funcs.blank_pixel_data(dc, pipe_ctx, !pipe_ctx->plane_state->visible);
+ /* Only need to unblank on top pipe */
+ if (resource_is_pipe_type(pipe_ctx, OTG_MASTER)) {
+ if (pipe_ctx->update_flags.bits.enable ||
+ pipe_ctx->update_flags.bits.odm ||
+ pipe_ctx->stream->update_flags.bits.abm_level)
+ hws->funcs.blank_pixel_data(dc, pipe_ctx,
+ !pipe_ctx->plane_state ||
+ !pipe_ctx->plane_state->visible);
+ }
/* Only update TG on top pipe */
if (pipe_ctx->update_flags.bits.global_sync && !pipe_ctx->top_pipe
}
}
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params)
+{
+ unsigned int i;
+ unsigned int triggers = 0;
+
+ if (params->triggers.surface_update)
+ triggers |= 0x100;
+ if (params->triggers.cursor_update)
+ triggers |= 0x8;
+ if (params->triggers.force_trigger)
+ triggers |= 0x1;
+
+ for (i = 0; i < num_pipes; i++)
+ pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+ triggers, params->num_frames);
+}
void dcn30_prepare_bandwidth(struct dc *dc,
struct dc_state *context);
+void dcn30_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+ int num_pipes, const struct dc_static_screen_params *params);
#endif /* __DC_HWSS_DCN30_H__ */
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn30_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.get_hw_state = dcn10_get_hw_state,
.clear_status_bits = dcn10_clear_status_bits,
.wait_for_mpcc_disconnect = dcn10_wait_for_mpcc_disconnect,
+ .edp_backlight_control = dce110_edp_backlight_control,
.edp_power_control = dce110_edp_power_control,
.edp_wait_for_hpd_ready = dce110_edp_wait_for_hpd_ready,
.set_cursor_position = dcn10_set_cursor_position,
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn30_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn30_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
.update_bandwidth = dcn20_update_bandwidth,
.set_drr = dcn10_set_drr,
.get_position = dcn10_get_position,
- .set_static_screen_control = dcn10_set_static_screen_control,
+ .set_static_screen_control = dcn30_set_static_screen_control,
.setup_stereo = dcn10_setup_stereo,
.set_avmute = dcn30_set_avmute,
.log_hw_state = dcn10_log_hw_state,
return free_pipe_idx;
}
+static struct pipe_ctx *find_idle_secondary_pipe_check_mpo(
+ struct resource_context *res_ctx,
+ const struct resource_pool *pool,
+ const struct pipe_ctx *primary_pipe)
+{
+ int i;
+ struct pipe_ctx *secondary_pipe = NULL;
+ struct pipe_ctx *next_odm_mpo_pipe = NULL;
+ int primary_index, preferred_pipe_idx;
+ struct pipe_ctx *old_primary_pipe = NULL;
+
+ /*
+ * Modified from find_idle_secondary_pipe
+ * With windowed MPO and ODM, we want to avoid the case where we want a
+ * free pipe for the left side but the free pipe is being used on the
+ * right side.
+ * Add check on current_state if the primary_pipe is the left side,
+ * to check the right side ( primary_pipe->next_odm_pipe ) to see if
+ * it is using a pipe for MPO ( primary_pipe->next_odm_pipe->bottom_pipe )
+ * - If so, then don't use this pipe
+ * EXCEPTION - 3 plane ( 2 MPO plane ) case
+ * - in this case, the primary pipe has already gotten a free pipe for the
+ * MPO window in the left
+ * - when it tries to get a free pipe for the MPO window on the right,
+ * it will see that it is already assigned to the right side
+ * ( primary_pipe->next_odm_pipe ). But in this case, we want this
+ * free pipe, since it will be for the right side. So add an
+ * additional condition, that skipping the free pipe on the right only
+ * applies if the primary pipe has no bottom pipe currently assigned
+ */
+ if (primary_pipe) {
+ primary_index = primary_pipe->pipe_idx;
+ old_primary_pipe = &primary_pipe->stream->ctx->dc->current_state->res_ctx.pipe_ctx[primary_index];
+ if ((old_primary_pipe->next_odm_pipe) && (old_primary_pipe->next_odm_pipe->bottom_pipe)
+ && (!primary_pipe->bottom_pipe))
+ next_odm_mpo_pipe = old_primary_pipe->next_odm_pipe->bottom_pipe;
+
+ preferred_pipe_idx = (pool->pipe_count - 1) - primary_pipe->pipe_idx;
+ if ((res_ctx->pipe_ctx[preferred_pipe_idx].stream == NULL) &&
+ !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == preferred_pipe_idx)) {
+ secondary_pipe = &res_ctx->pipe_ctx[preferred_pipe_idx];
+ secondary_pipe->pipe_idx = preferred_pipe_idx;
+ }
+ }
+
+ /*
+ * search backwards for the second pipe to keep pipe
+ * assignment more consistent
+ */
+ if (!secondary_pipe)
+ for (i = pool->pipe_count - 1; i >= 0; i--) {
+ if ((res_ctx->pipe_ctx[i].stream == NULL) &&
+ !(next_odm_mpo_pipe && next_odm_mpo_pipe->pipe_idx == i)) {
+ secondary_pipe = &res_ctx->pipe_ctx[i];
+ secondary_pipe->pipe_idx = i;
+ break;
+ }
+ }
+
+ return secondary_pipe;
+}
+
+static struct pipe_ctx *dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+ struct dc_state *state,
+ const struct resource_pool *pool,
+ struct dc_stream_state *stream,
+ const struct pipe_ctx *head_pipe)
+{
+ struct resource_context *res_ctx = &state->res_ctx;
+ struct pipe_ctx *idle_pipe, *pipe;
+ struct resource_context *old_ctx = &stream->ctx->dc->current_state->res_ctx;
+ int head_index;
+
+ if (!head_pipe)
+ ASSERT(0);
+
+ /*
+ * Modified from dcn20_acquire_idle_pipe_for_layer
+ * Check if head_pipe in old_context already has bottom_pipe allocated.
+ * - If so, check if that pipe is available in the current context.
+ * -- If so, reuse pipe from old_context
+ */
+ head_index = head_pipe->pipe_idx;
+ pipe = &old_ctx->pipe_ctx[head_index];
+ if (pipe->bottom_pipe && res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx].stream == NULL) {
+ idle_pipe = &res_ctx->pipe_ctx[pipe->bottom_pipe->pipe_idx];
+ idle_pipe->pipe_idx = pipe->bottom_pipe->pipe_idx;
+ } else {
+ idle_pipe = find_idle_secondary_pipe_check_mpo(res_ctx, pool, head_pipe);
+ if (!idle_pipe)
+ return NULL;
+ }
+
+ idle_pipe->stream = head_pipe->stream;
+ idle_pipe->stream_res.tg = head_pipe->stream_res.tg;
+ idle_pipe->stream_res.opp = head_pipe->stream_res.opp;
+
+ idle_pipe->plane_res.hubp = pool->hubps[idle_pipe->pipe_idx];
+ idle_pipe->plane_res.ipp = pool->ipps[idle_pipe->pipe_idx];
+ idle_pipe->plane_res.dpp = pool->dpps[idle_pipe->pipe_idx];
+ idle_pipe->plane_res.mpcc_inst = pool->dpps[idle_pipe->pipe_idx]->inst;
+
+ return idle_pipe;
+}
+
struct pipe_ctx *dcn32_acquire_free_pipe_as_secondary_dpp_pipe(
const struct dc_state *cur_ctx,
struct dc_state *new_ctx,
const struct resource_pool *pool,
const struct pipe_ctx *opp_head_pipe)
{
- int free_pipe_idx =
- find_optimal_free_pipe_as_secondary_dpp_pipe(
- &cur_ctx->res_ctx, &new_ctx->res_ctx,
- pool, opp_head_pipe);
+
+ int free_pipe_idx;
struct pipe_ctx *free_pipe;
+ if (!opp_head_pipe->stream->ctx->dc->config.enable_windowed_mpo_odm)
+ return dcn32_acquire_idle_pipe_for_head_pipe_in_layer(
+ new_ctx, pool, opp_head_pipe->stream, opp_head_pipe);
+
+ free_pipe_idx = find_optimal_free_pipe_as_secondary_dpp_pipe(
+ &cur_ctx->res_ctx, &new_ctx->res_ctx,
+ pool, opp_head_pipe);
if (free_pipe_idx >= 0) {
free_pipe = &new_ctx->res_ctx.pipe_ctx[free_pipe_idx];
free_pipe->pipe_idx = free_pipe_idx;
context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz =
pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
+ if (dc->ctx->dce_version < DCN_VERSION_3_1 &&
+ context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
+ dcn20_adjust_freesync_v_startup(
+ &context->res_ctx.pipe_ctx[i].stream->timing,
+ &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
pipe_idx++;
}
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
- int i = 0;
display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
dcn20_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
- if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
- dcn20_adjust_freesync_v_startup(
- &context->res_ctx.pipe_ctx[i].stream->timing,
- &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
- }
-
BW_VAL_TRACE_END_WATERMARKS();
goto validate_out;
int vlevel = 0;
int pipe_split_from[MAX_PIPES];
int pipe_cnt = 0;
- int i = 0;
display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
DC_LOGGER_INIT(dc->ctx->logger);
dcn21_calculate_wm(dc, context, pipes, &pipe_cnt, pipe_split_from, vlevel, fast_validate);
dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- if (!context->res_ctx.pipe_ctx[i].stream)
- continue;
- if (context->res_ctx.pipe_ctx[i].stream->adaptive_sync_infopacket.valid)
- dcn20_adjust_freesync_v_startup(
- &context->res_ctx.pipe_ctx[i].stream->timing,
- &context->res_ctx.pipe_ctx[i].pipe_dlg_param.vstartup_start);
- }
-
BW_VAL_TRACE_END_WATERMARKS();
goto validate_out;
return num_lines;
}
+static unsigned int get_vertical_back_porch(struct dc_crtc_timing *timing)
+{
+ unsigned int v_active = 0, v_blank = 0, v_back_porch = 0;
+
+ v_active = timing->v_border_top + timing->v_addressable + timing->v_border_bottom;
+ v_blank = timing->v_total - v_active;
+ v_back_porch = v_blank - timing->v_front_porch - timing->v_sync_width;
+
+ return v_back_porch;
+}
+
int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *context,
display_e2e_pipe_params_st *pipes,
bool fast_validate)
for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
struct dc_crtc_timing *timing;
unsigned int num_lines = 0;
+ unsigned int v_back_porch = 0;
if (!res_ctx->pipe_ctx[i].stream)
continue;
else
pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
+ v_back_porch = get_vertical_back_porch(timing);
+
pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, num_lines);
- pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width);
+ // vblank_nom should not smaller than (VSync (timing->v_sync_width + v_back_porch) + 2)
+ // + 2 is because
+ // 1 -> VStartup_start should be 1 line before VSync
+ // 1 -> always reserve 1 line between start of vblank to vstartup signal
+ pipes[pipe_cnt].pipe.dest.vblank_nom =
+ max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width + v_back_porch + 2);
pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, max_allowed_vblank_nom);
if (pipe->plane_state &&
* - Delta for CEIL: delta_from_mid_point_in_us_1
* - Delta for FLOOR: delta_from_mid_point_in_us_2
*/
- if ((last_render_time_in_us / mid_point_frames_ceil) < in_out_vrr->min_duration_in_us) {
+ if (mid_point_frames_ceil &&
+ (last_render_time_in_us / mid_point_frames_ceil) <
+ in_out_vrr->min_duration_in_us) {
/* Check for out of range.
* If using CEIL produces a value that is out of range,
* then we are forced to use FLOOR.
/* Either we've calculated the number of frames to insert,
* or we need to insert min duration frames
*/
- if (last_render_time_in_us / frames_to_insert <
- in_out_vrr->min_duration_in_us){
+ if (frames_to_insert &&
+ (last_render_time_in_us / frames_to_insert) <
+ in_out_vrr->min_duration_in_us){
frames_to_insert -= (frames_to_insert > 1) ?
1 : 0;
}
DC_DISABLE_LTTPR_DP2_0 = (1 << 6), //0x40, disabled by default
DC_PSR_ALLOW_SMU_OPT = (1 << 7), //0x80, disabled by default
DC_PSR_ALLOW_MULTI_DISP_OPT = (1 << 8), //0x100, disabled by default
+ DC_REPLAY_MASK = (1 << 9), //0x200, disabled by default for dcn < 3.1.4
};
enum DC_DEBUG_MASK {
DC_DISABLE_PSR = 0x10,
DC_FORCE_SUBVP_MCLK_SWITCH = 0x20,
DC_DISABLE_MPO = 0x40,
+ DC_DISABLE_REPLAY = 0x50,
DC_ENABLE_DPIA_TRACE = 0x80,
};
UMC_CONFIG1__ENABLE_ECC_CAPABLE = 0x00010000,
};
+struct atom_umc_info_v4_0 {
+ struct atom_common_table_header table_header;
+ uint32_t ucode_reserved[5];
+ uint8_t umcip_min_ver;
+ uint8_t umcip_max_ver;
+ uint8_t vram_type;
+ uint8_t umc_config;
+ uint32_t mem_refclk_10khz;
+ uint32_t clk_reserved[4];
+ uint32_t golden_reserved;
+ uint32_t umc_config1;
+ uint32_t reserved[2];
+ uint8_t channel_num;
+ uint8_t channel_width;
+ uint8_t channel_reserve[2];
+ uint8_t umc_info_reserved[16];
+};
+
/*
***************************************************************************
Data Table vram_info structure
#define GC_TABLE_ID 0x4347
#define HARVEST_TABLE_SIGNATURE 0x56524148
#define VCN_INFO_TABLE_ID 0x004E4356
-#define MALL_INFO_TABLE_ID 0x4D414C4C
+#define MALL_INFO_TABLE_ID 0x4C4C414D
typedef enum
{
uint32_t gc_num_packer_per_sc;
};
+struct gc_info_v2_1 {
+ struct gpu_info_header header;
+
+ uint32_t gc_num_se;
+ uint32_t gc_num_cu_per_sh;
+ uint32_t gc_num_sh_per_se;
+ uint32_t gc_num_rb_per_se;
+ uint32_t gc_num_tccs;
+ uint32_t gc_num_gprs;
+ uint32_t gc_num_max_gs_thds;
+ uint32_t gc_gs_table_depth;
+ uint32_t gc_gsprim_buff_depth;
+ uint32_t gc_parameter_cache_depth;
+ uint32_t gc_double_offchip_lds_buffer;
+ uint32_t gc_wave_size;
+ uint32_t gc_max_waves_per_simd;
+ uint32_t gc_max_scratch_slots_per_cu;
+ uint32_t gc_lds_size;
+ uint32_t gc_num_sc_per_se;
+ uint32_t gc_num_packer_per_sc;
+ /* new for v2_1 */
+ uint32_t gc_num_tcp_per_sh;
+ uint32_t gc_tcp_size_per_cu;
+ uint32_t gc_num_sdp_interface;
+ uint32_t gc_num_cu_per_sqc;
+ uint32_t gc_instruction_cache_size_per_sqc;
+ uint32_t gc_scalar_data_cache_size_per_sqc;
+ uint32_t gc_tcc_size;
+};
+
typedef struct harvest_info_header {
uint32_t signature; /* Table Signature */
uint32_t version; /* Table Version */
uint32_t reserved[5];
};
+struct mall_info_v2_0 {
+ struct mall_info_header header;
+ uint32_t mall_size_per_umc;
+ uint32_t reserved[8];
+};
+
#define VCN_INFO_TABLE_MAX_NUM_INSTANCES 4
struct vcn_info_header {
(gc_ver != IP_VERSION(9, 4, 3)) &&
(attr == &sensor_dev_attr_temp2_input.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_label.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_input.dev_attr.attr ||
- attr == &sensor_dev_attr_temp3_label.dev_attr.attr))
+ attr == &sensor_dev_attr_temp3_label.dev_attr.attr ||
+ attr == &sensor_dev_attr_temp3_crit.dev_attr.attr))
return 0;
/* hotspot temperature for gc 9,4,3*/
/* only SOC15 dGPUs support hotspot and mem temperatures */
if (((adev->flags & AMD_IS_APU) || gc_ver < IP_VERSION(9, 0, 0) ||
(gc_ver == IP_VERSION(9, 4, 3))) &&
- (attr == &sensor_dev_attr_temp2_crit.dev_attr.attr ||
- attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
- attr == &sensor_dev_attr_temp3_crit.dev_attr.attr ||
+ (attr == &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr ||
attr == &sensor_dev_attr_temp1_emergency.dev_attr.attr ||
attr == &sensor_dev_attr_temp2_emergency.dev_attr.attr ||
size = sizeof(uint32_t);
if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_AVG_POWER, (void *)&query, &size))
seq_printf(m, "\t%u.%u W (average GPU)\n", query >> 8, query & 0xff);
+ size = sizeof(uint32_t);
+ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_INPUT_POWER, (void *)&query, &size))
+ seq_printf(m, "\t%u.%u W (current GPU)\n", query >> 8, query & 0xff);
size = sizeof(value);
seq_printf(m, "\n");
enum smu_feature_mask mask);
/**
- * @notify_display_change: Enable fast memory clock switching.
- *
- * Allows for fine grained memory clock switching but has more stringent
- * timing requirements.
+ * @notify_display_change: General interface call to let SMU know about DC change
*/
int (*notify_display_change)(struct smu_context *smu);
#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
#define PPSMC_MSG_SetPriorityDeltaGain 0x4B
#define PPSMC_MSG_AllowIHHostInterrupt 0x4C
-#define PPSMC_Message_Count 0x4D
+
+#define PPSMC_MSG_DALNotPresent 0x4E
+
+#define PPSMC_Message_Count 0x4F
//Debug Dump Message
#define DEBUGSMC_MSG_TestMessage 0x1
VOLTAGE_GUARDBAND_COUNT
} GFX_GUARDBAND_e;
-#define SMU_METRICS_TABLE_VERSION 0x5
+#define SMU_METRICS_TABLE_VERSION 0x7
typedef struct __attribute__((packed, aligned(4))) {
uint32_t AccumulationCounter;
uint32_t SocketThmResidencyAcc;
uint32_t VrThmResidencyAcc;
uint32_t HbmThmResidencyAcc;
- uint32_t spare;
+ uint32_t GfxLockXCDMak;
// New Items at end to maintain driver compatibility
uint32_t GfxclkFrequency[8];
#define PPSMC_MSG_GetMinGfxDpmFreq 0x32
#define PPSMC_MSG_GetMaxGfxDpmFreq 0x33
#define PPSMC_MSG_PrepareForDriverUnload 0x34
-#define PPSMC_Message_Count 0x35
+#define PPSMC_MSG_ReadThrottlerLimit 0x35
+#define PPSMC_MSG_QueryValidMcaCount 0x36
+#define PPSMC_MSG_McaBankDumpDW 0x37
+#define PPSMC_MSG_GetCTFLimit 0x38
+#define PPSMC_Message_Count 0x39
//PPSMC Reset Types for driver msg argument
#define PPSMC_RESET_TYPE_DRIVER_MODE_1_RESET 0x1
#define PPSMC_RESET_TYPE_DRIVER_MODE_2_RESET 0x2
#define PPSMC_RESET_TYPE_DRIVER_MODE_3_RESET 0x3
+//PPSMC Reset Types for driver msg argument
+#define PPSMC_THROTTLING_LIMIT_TYPE_SOCKET 0x1
+#define PPSMC_THROTTLING_LIMIT_TYPE_HBM 0x2
+
+//CTF/Throttle Limit types
+#define PPSMC_AID_THM_TYPE 0x1
+#define PPSMC_CCD_THM_TYPE 0x2
+#define PPSMC_XCD_THM_TYPE 0x3
+#define PPSMC_HBM_THM_TYPE 0x4
+
typedef uint32_t PPSMC_Result;
typedef uint32_t PPSMC_MSG;
__SMU_DUMMY_MAP(SetTjMax), \
__SMU_DUMMY_MAP(SetFanTemperatureTarget), \
__SMU_DUMMY_MAP(PrepareMp1ForUnload), \
+ __SMU_DUMMY_MAP(GetCTFLimit), \
__SMU_DUMMY_MAP(DramLogSetDramAddrHigh), \
__SMU_DUMMY_MAP(DramLogSetDramAddrLow), \
__SMU_DUMMY_MAP(DramLogSetDramSize), \
__SMU_DUMMY_MAP(AllowGpo), \
__SMU_DUMMY_MAP(Mode2Reset), \
__SMU_DUMMY_MAP(RequestI2cTransaction), \
- __SMU_DUMMY_MAP(GetMetricsTable),
+ __SMU_DUMMY_MAP(GetMetricsTable), \
+ __SMU_DUMMY_MAP(DALNotPresent),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
{
int ret = 0;
- if (!smu->pm_enabled)
- return ret;
-
- if (smu_cmn_feature_is_enabled(smu, SMU_FEATURE_DPM_UCLK_BIT) &&
- smu->adev->gmc.vram_type == AMDGPU_VRAM_TYPE_HBM)
- ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetUclkFastSwitch, 1, NULL);
+ if (!amdgpu_device_has_dc_support(smu->adev))
+ ret = smu_cmn_send_smc_msg(smu, SMU_MSG_DALNotPresent, NULL);
return ret;
}
MSG_MAP(AllowGpo, PPSMC_MSG_SetGpoAllow, 0),
MSG_MAP(AllowIHHostInterrupt, PPSMC_MSG_AllowIHHostInterrupt, 0),
MSG_MAP(ReenableAcDcInterrupt, PPSMC_MSG_ReenableAcDcInterrupt, 0),
+ MSG_MAP(DALNotPresent, PPSMC_MSG_DALNotPresent, 0),
};
static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
.send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
.gpo_control = smu_v13_0_gpo_control,
.get_ecc_info = smu_v13_0_0_get_ecc_info,
+ .notify_display_change = smu_v13_0_notify_display_change,
};
void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
MSG_MAP(SetSoftMinGfxclk, PPSMC_MSG_SetSoftMinGfxClk, 0),
MSG_MAP(SetSoftMaxGfxClk, PPSMC_MSG_SetSoftMaxGfxClk, 0),
MSG_MAP(PrepareMp1ForUnload, PPSMC_MSG_PrepareForDriverUnload, 0),
+ MSG_MAP(GetCTFLimit, PPSMC_MSG_GetCTFLimit, 0),
};
static const struct cmn2asic_mapping smu_v13_0_6_clk_map[SMU_CLK_COUNT] = {
return ret;
}
+static int smu_v13_0_6_get_thermal_temperature_range(struct smu_context *smu,
+ struct smu_temperature_range *range)
+{
+ struct amdgpu_device *adev = smu->adev;
+ u32 aid_temp, xcd_temp, mem_temp;
+ uint32_t smu_version;
+ u32 ccd_temp = 0;
+ int ret;
+
+ if (amdgpu_sriov_vf(smu->adev))
+ return 0;
+
+ if (!range)
+ return -EINVAL;
+
+ /*Check smu version, GetCtfLimit message only supported for smu version 85.69 or higher */
+ smu_cmn_get_smc_version(smu, NULL, &smu_version);
+ if (smu_version < 0x554500)
+ return 0;
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+ PPSMC_AID_THM_TYPE, &aid_temp);
+ if (ret)
+ goto failed;
+
+ if (adev->flags & AMD_IS_APU) {
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+ PPSMC_CCD_THM_TYPE, &ccd_temp);
+ if (ret)
+ goto failed;
+ }
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+ PPSMC_XCD_THM_TYPE, &xcd_temp);
+ if (ret)
+ goto failed;
+
+ range->hotspot_crit_max = max3(aid_temp, xcd_temp, ccd_temp) *
+ SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetCTFLimit,
+ PPSMC_HBM_THM_TYPE, &mem_temp);
+ if (ret)
+ goto failed;
+
+ range->mem_crit_max = mem_temp * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
+failed:
+ return ret;
+}
+
static int smu_v13_0_6_mode1_reset(struct smu_context *smu)
{
struct amdgpu_device *adev = smu->adev;
static bool smu_v13_0_6_is_mode1_reset_supported(struct smu_context *smu)
{
- /* TODO: Enable this when FW support is added */
- return false;
+ return true;
}
static bool smu_v13_0_6_is_mode2_reset_supported(struct smu_context *smu)
.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,
.set_pp_feature_mask = smu_cmn_set_pp_feature_mask,
.get_gpu_metrics = smu_v13_0_6_get_gpu_metrics,
+ .get_thermal_temperature_range = smu_v13_0_6_get_thermal_temperature_range,
.mode1_reset_is_support = smu_v13_0_6_is_mode1_reset_supported,
.mode2_reset_is_support = smu_v13_0_6_is_mode2_reset_supported,
.mode1_reset = smu_v13_0_6_mode1_reset,
typedef u32 intel_engine_mask_t;
#define ALL_ENGINES ((intel_engine_mask_t)~0ul)
+#define VIRTUAL_ENGINES BIT(BITS_PER_TYPE(intel_engine_mask_t) - 1)
struct intel_hw_status_page {
struct list_head timelines;
ve->base.flags = I915_ENGINE_IS_VIRTUAL;
+ BUILD_BUG_ON(ilog2(VIRTUAL_ENGINES) < I915_NUM_ENGINES);
+ ve->base.mask = VIRTUAL_ENGINES;
+
intel_context_init(&ve->context, &ve->base);
for (n = 0; n < count; n++) {
static bool enable_out_of_sync = false;
static int preallocated_oos_pages = 8192;
-static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
-{
- struct kvm *kvm = vgpu->vfio_device.kvm;
- int idx;
- bool ret;
-
- if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
- return false;
-
- idx = srcu_read_lock(&kvm->srcu);
- ret = kvm_is_visible_gfn(kvm, gfn);
- srcu_read_unlock(&kvm->srcu, idx);
-
- return ret;
-}
-
/*
* validate a gm address and related range size,
* translate it to host gm address
ops->set_pfn(se, s->shadow_page.mfn);
}
-/*
- * Check if can do 2M page
- * @vgpu: target vgpu
- * @entry: target pfn's gtt entry
- *
- * Return 1 if 2MB huge gtt shadowing is possible, 0 if miscondition,
- * negative if found err.
- */
-static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
- struct intel_gvt_gtt_entry *entry)
-{
- const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
- kvm_pfn_t pfn;
-
- if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
- return 0;
-
- if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
- return -EINVAL;
- pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
- if (is_error_noslot_pfn(pfn))
- return -EINVAL;
- return PageTransHuge(pfn_to_page(pfn));
-}
-
static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
struct intel_gvt_gtt_entry *se)
{
const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
struct intel_gvt_gtt_entry se = *ge;
- unsigned long gfn, page_size = PAGE_SIZE;
+ unsigned long gfn;
dma_addr_t dma_addr;
int ret;
switch (ge->type) {
case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
gvt_vdbg_mm("shadow 4K gtt entry\n");
+ ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
+ if (ret)
+ return -ENXIO;
break;
case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
gvt_vdbg_mm("shadow 64K gtt entry\n");
return split_64KB_gtt_entry(vgpu, spt, index, &se);
case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
gvt_vdbg_mm("shadow 2M gtt entry\n");
- ret = is_2MB_gtt_possible(vgpu, ge);
- if (ret == 0)
+ if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
+ intel_gvt_dma_map_guest_page(vgpu, gfn,
+ I915_GTT_PAGE_SIZE_2M, &dma_addr))
return split_2MB_gtt_entry(vgpu, spt, index, &se);
- else if (ret < 0)
- return ret;
- page_size = I915_GTT_PAGE_SIZE_2M;
break;
case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
gvt_vgpu_err("GVT doesn't support 1GB entry\n");
return -EINVAL;
default:
GEM_BUG_ON(1);
+ return -EINVAL;
}
- /* direct shadow */
- ret = intel_gvt_dma_map_guest_page(vgpu, gfn, page_size, &dma_addr);
- if (ret)
- return -ENXIO;
-
+ /* Successfully shadowed a 4K or 2M page (without splitting). */
pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
ppgtt_set_shadow_entry(spt, &se, index);
return 0;
static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
{
struct intel_vgpu *vgpu = spt->vgpu;
- struct intel_gvt *gvt = vgpu->gvt;
- const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
struct intel_vgpu_ppgtt_spt *s;
struct intel_gvt_gtt_entry se, ge;
- unsigned long gfn, i;
+ unsigned long i;
int ret;
trace_spt_change(spt->vgpu->id, "born", spt,
ppgtt_generate_shadow_entry(&se, s, &ge);
ppgtt_set_shadow_entry(spt, &se, i);
} else {
- gfn = ops->get_pfn(&ge);
- if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
- ops->set_pfn(&se, gvt->gtt.scratch_mfn);
- ppgtt_set_shadow_entry(spt, &se, i);
- continue;
- }
-
ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
if (ret)
goto fail;
if (mm->ppgtt_mm.shadowed)
return 0;
+ if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+ return -EINVAL;
+
mm->ppgtt_mm.shadowed = true;
for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
m.val64 = e.val64;
m.type = e.type;
- /* one PTE update may be issued in multiple writes and the
- * first write may not construct a valid gfn
- */
- if (!intel_gvt_is_valid_gfn(vgpu, gfn)) {
- ops->set_pfn(&m, gvt->gtt.scratch_mfn);
- goto out;
- }
-
ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
&dma_addr);
if (ret) {
ops->clear_present(&m);
}
-out:
ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
}
/**
- * intel_vgpu_reset_gtt - reset the all GTT related status
- * @vgpu: a vGPU
- *
- * This function is called from vfio core to reset reset all
- * GTT related status, including GGTT, PPGTT, scratch page.
- *
- */
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu)
-{
- /* Shadow pages are only created when there is no page
- * table tracking data, so remove page tracking data after
- * removing the shadow pages.
- */
- intel_vgpu_destroy_all_ppgtt_mm(vgpu);
- intel_vgpu_reset_ggtt(vgpu, true);
-}
-
-/**
* intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
* @gvt: intel gvt device
*
void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
int intel_gvt_init_gtt(struct intel_gvt *gvt);
-void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu);
void intel_gvt_clean_gtt(struct intel_gvt *gvt);
struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
#define _GVT_H_
#include <uapi/linux/pci_regs.h>
-#include <linux/kvm_host.h>
#include <linux/vfio.h>
#include <linux/mdev.h>
+#include <asm/kvm_page_track.h>
+
#include "i915_drv.h"
#include "intel_gvt.h"
#define vfio_dev_to_vgpu(vfio_dev) \
container_of((vfio_dev), struct intel_vgpu, vfio_device)
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *val, int len,
- struct kvm_page_track_notifier_node *node);
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+ struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+ struct kvm_page_track_notifier_node *node);
static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
{
if (npage == 0)
base_page = cur_page;
- else if (base_page + npage != cur_page) {
- gvt_vgpu_err("The pages are not continuous\n");
+ else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
ret = -EINVAL;
npage++;
goto err;
*page = base_page;
return 0;
err:
- gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
+ if (npage)
+ gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
return ret;
}
{
struct kvmgt_pgfn *p, *res = NULL;
+ lockdep_assert_held(&info->vgpu_lock);
+
hash_for_each_possible(info->ptable, p, hnode, gfn) {
if (gfn == p->gfn) {
res = p;
static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
{
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
-
- if (!vgpu->vfio_device.kvm ||
- vgpu->vfio_device.kvm->mm != current->mm) {
- gvt_vgpu_err("KVM is required to use Intel vGPU\n");
- return -ESRCH;
- }
+ int ret;
if (__kvmgt_vgpu_exist(vgpu))
return -EEXIST;
vgpu->track_node.track_write = kvmgt_page_track_write;
- vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
- kvm_get_kvm(vgpu->vfio_device.kvm);
- kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
- &vgpu->track_node);
+ vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region;
+ ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+ &vgpu->track_node);
+ if (ret) {
+ gvt_vgpu_err("KVM is required to use Intel vGPU\n");
+ return ret;
+ }
set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
&vgpu->track_node);
- kvm_put_kvm(vgpu->vfio_device.kvm);
kvmgt_protect_table_destroy(vgpu);
gvt_cache_destroy(vgpu);
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->vfio_device.kvm;
- struct kvm_memory_slot *slot;
- int idx;
+ int r;
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
return -ESRCH;
- idx = srcu_read_lock(&kvm->srcu);
- slot = gfn_to_memslot(kvm, gfn);
- if (!slot) {
- srcu_read_unlock(&kvm->srcu, idx);
- return -EINVAL;
- }
-
- write_lock(&kvm->mmu_lock);
-
if (kvmgt_gfn_is_write_protected(info, gfn))
- goto out;
+ return 0;
- kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
- kvmgt_protect_table_add(info, gfn);
+ r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
+ if (r)
+ return r;
-out:
- write_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvmgt_protect_table_add(info, gfn);
return 0;
}
int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
{
- struct kvm *kvm = info->vfio_device.kvm;
- struct kvm_memory_slot *slot;
- int idx;
+ int r;
if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
return -ESRCH;
- idx = srcu_read_lock(&kvm->srcu);
- slot = gfn_to_memslot(kvm, gfn);
- if (!slot) {
- srcu_read_unlock(&kvm->srcu, idx);
- return -EINVAL;
- }
-
- write_lock(&kvm->mmu_lock);
-
if (!kvmgt_gfn_is_write_protected(info, gfn))
- goto out;
+ return 0;
- kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE);
- kvmgt_protect_table_del(info, gfn);
+ r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
+ if (r)
+ return r;
-out:
- write_unlock(&kvm->mmu_lock);
- srcu_read_unlock(&kvm->srcu, idx);
+ kvmgt_protect_table_del(info, gfn);
return 0;
}
-static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *val, int len,
- struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+ struct kvm_page_track_notifier_node *node)
{
struct intel_vgpu *info =
container_of(node, struct intel_vgpu, track_node);
- if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
+ mutex_lock(&info->vgpu_lock);
+
+ if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT))
intel_vgpu_page_track_handler(info, gpa,
(void *)val, len);
+
+ mutex_unlock(&info->vgpu_lock);
}
-static void kvmgt_page_track_flush_slot(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- struct kvm_page_track_notifier_node *node)
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+ struct kvm_page_track_notifier_node *node)
{
- int i;
- gfn_t gfn;
+ unsigned long i;
struct intel_vgpu *info =
container_of(node, struct intel_vgpu, track_node);
- write_lock(&kvm->mmu_lock);
- for (i = 0; i < slot->npages; i++) {
- gfn = slot->base_gfn + i;
- if (kvmgt_gfn_is_write_protected(info, gfn)) {
- kvm_slot_page_track_remove_page(kvm, slot, gfn,
- KVM_PAGE_TRACK_WRITE);
- kvmgt_protect_table_del(info, gfn);
- }
+ mutex_lock(&info->vgpu_lock);
+
+ for (i = 0; i < nr_pages; i++) {
+ if (kvmgt_gfn_is_write_protected(info, gfn + i))
+ kvmgt_protect_table_del(info, gfn + i);
}
- write_unlock(&kvm->mmu_lock);
+
+ mutex_unlock(&info->vgpu_lock);
}
void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
struct intel_vgpu_page_track *page_track;
int ret = 0;
- mutex_lock(&vgpu->vgpu_lock);
-
page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
- if (!page_track) {
- ret = -ENXIO;
- goto out;
- }
+ if (!page_track)
+ return -ENXIO;
if (unlikely(vgpu->failsafe)) {
/* Remove write protection to prevent furture traps. */
gvt_err("guest page write error, gpa %llx\n", gpa);
}
-out:
- mutex_unlock(&vgpu->vgpu_lock);
return ret;
}
i915_sw_fence_fini(&rq->semaphore);
/*
- * Keep one request on each engine for reserved use under mempressure
- * do not use with virtual engines as this really is only needed for
- * kernel contexts.
+ * Keep one request on each engine for reserved use under mempressure.
*
* We do not hold a reference to the engine here and so have to be
* very careful in what rq->engine we poke. The virtual engine is
* know that if the rq->execution_mask is a single bit, rq->engine
* can be a physical engine with the exact corresponding mask.
*/
- if (!intel_engine_is_virtual(rq->engine) &&
- is_power_of_2(rq->execution_mask) &&
+ if (is_power_of_2(rq->execution_mask) &&
!cmpxchg(&rq->engine->request_pool, NULL, rq))
return;
PUSH_NVSQ(push, NV_SW, NV_SW_PAGE_FLIP, 0x00000000);
PUSH_KICK(push);
- ret = nouveau_fence_new(pfence);
+ ret = nouveau_fence_new(pfence, chan);
if (ret)
goto fail;
- ret = nouveau_fence_emit(*pfence, chan);
- if (ret)
- goto fail_fence_unref;
-
return 0;
-
-fail_fence_unref:
- nouveau_fence_unref(pfence);
fail:
spin_lock_irqsave(&dev->event_lock, flags);
list_del(&s->head);
if (ret)
goto out_unlock;
- ret = nouveau_fence_new(&fence);
+ ret = nouveau_fence_new(&fence, chan);
if (ret)
goto out_unlock;
- ret = nouveau_fence_emit(fence, chan);
- if (ret) {
- nouveau_fence_unref(&fence);
- goto out_unlock;
- }
-
/* TODO: figure out a better solution here
*
* wait on the fence here explicitly as going through
struct nouveau_fence *fence = NULL;
int ret;
- ret = nouveau_fence_new(&fence);
+ ret = nouveau_fence_new(&fence, chan);
if (!ret) {
- ret = nouveau_fence_emit(fence, chan);
- if (!ret)
- ret = nouveau_fence_wait(fence, false, false);
+ ret = nouveau_fence_wait(fence, false, false);
nouveau_fence_unref(&fence);
}
goto done;
}
- if (!nouveau_fence_new(&fence))
- nouveau_fence_emit(fence, dmem->migrate.chan);
+ nouveau_fence_new(&fence, dmem->migrate.chan);
migrate_vma_pages(&args);
nouveau_dmem_fence_done(&fence);
dma_unmap_page(drm->dev->dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
}
}
- if (!nouveau_fence_new(&fence))
- nouveau_fence_emit(fence, chunk->drm->dmem->migrate.chan);
+ nouveau_fence_new(&fence, chunk->drm->dmem->migrate.chan);
migrate_device_pages(src_pfns, dst_pfns, npages);
nouveau_dmem_fence_done(&fence);
migrate_device_finalize(src_pfns, dst_pfns, npages);
addr += PAGE_SIZE;
}
- if (!nouveau_fence_new(&fence))
- nouveau_fence_emit(fence, drm->dmem->migrate.chan);
+ nouveau_fence_new(&fence, drm->dmem->migrate.chan);
migrate_vma_pages(args);
nouveau_dmem_fence_done(&fence);
nouveau_pfns_map(svmm, args->vma->vm_mm, args->start, pfns, i);
unsigned long index;
int ret;
- ret = nouveau_fence_new(&exec_job->fence);
+ /* Create a new fence, but do not emit yet. */
+ ret = nouveau_fence_create(&exec_job->fence, exec_job->chan);
if (ret)
return ret;
nv50_dma_push(chan, p->va, p->va_len, no_prefetch);
}
- ret = nouveau_fence_emit(fence, chan);
+ ret = nouveau_fence_emit(fence);
if (ret) {
+ nouveau_fence_unref(&exec_job->fence);
NV_PRINTK(err, job->cli, "error fencing pushbuf: %d\n", ret);
WIND_RING(chan);
return ERR_PTR(ret);
}
+ /* The fence was emitted successfully, set the job's fence pointer to
+ * NULL in order to avoid freeing it up when the job is cleaned up.
+ */
exec_job->fence = NULL;
return &fence->base;
nouveau_job_free(job);
- nouveau_fence_unref(&exec_job->fence);
+ kfree(exec_job->fence);
kfree(exec_job->push.s);
kfree(exec_job);
}
}
int
-nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
+nouveau_fence_emit(struct nouveau_fence *fence)
{
+ struct nouveau_channel *chan = fence->channel;
struct nouveau_fence_chan *fctx = chan->fence;
struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
int ret;
- if (unlikely(!chan->fence))
- return -ENODEV;
-
- fence->channel = chan;
fence->timeout = jiffies + (15 * HZ);
if (priv->uevent)
}
int
-nouveau_fence_new(struct nouveau_fence **pfence)
+nouveau_fence_create(struct nouveau_fence **pfence,
+ struct nouveau_channel *chan)
{
struct nouveau_fence *fence;
+ if (unlikely(!chan->fence))
+ return -ENODEV;
+
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
if (!fence)
return -ENOMEM;
+ fence->channel = chan;
+
*pfence = fence;
return 0;
}
+int
+nouveau_fence_new(struct nouveau_fence **pfence,
+ struct nouveau_channel *chan)
+{
+ int ret = 0;
+
+ ret = nouveau_fence_create(pfence, chan);
+ if (ret)
+ return ret;
+
+ ret = nouveau_fence_emit(*pfence);
+ if (ret)
+ nouveau_fence_unref(pfence);
+
+ return ret;
+}
+
static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
{
return "nouveau";
unsigned long timeout;
};
-int nouveau_fence_new(struct nouveau_fence **);
+int nouveau_fence_create(struct nouveau_fence **, struct nouveau_channel *);
+int nouveau_fence_new(struct nouveau_fence **, struct nouveau_channel *);
void nouveau_fence_unref(struct nouveau_fence **);
-int nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
+int nouveau_fence_emit(struct nouveau_fence *);
bool nouveau_fence_done(struct nouveau_fence *);
int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
}
}
- ret = nouveau_fence_new(&fence);
- if (!ret)
- ret = nouveau_fence_emit(fence, chan);
+ ret = nouveau_fence_new(&fence, chan);
if (ret) {
- nouveau_fence_unref(&fence);
NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
WIND_RING(chan);
goto out;
if (dev->info.static_addr) {
status = i3c_bus_get_addr_slot_status(&master->bus,
dev->info.static_addr);
- if (status != I3C_ADDR_SLOT_FREE)
+ /* Since static address and assigned dynamic address can be
+ * equal, allow this case to pass.
+ */
+ if (status != I3C_ADDR_SLOT_FREE &&
+ dev->info.static_addr != dev->boardinfo->init_dyn_addr)
return -EBUSY;
i3c_bus_set_addr_slot_status(&master->bus,
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
-#include <linux/of_device.h>
#define DEV_ID 0x0
#define DEV_ID_I3C_MASTER 0x5034
break;
}
if (RESP_STATUS(xfer[0].response) == RESP_ERR_NACK &&
- RESP_STATUS(xfer[0].response) == 1) {
+ RESP_DATA_LENGTH(xfer->response) == 1) {
ret = 0; /* no more devices to be assigned */
break;
}
* @base: I3C master controller
* @dev: Corresponding device
* @regs: Memory mapping
+ * @saved_regs: Volatile values for PM operations
* @free_slots: Bit array of available slots
* @addrs: Array containing the dynamic addresses of each attached device
* @descs: Array of descriptors, one per attached device
*/
break;
} else if (SVC_I3C_MSTATUS_NACKED(reg)) {
+ /* No I3C devices attached */
+ if (dev_nb == 0)
+ break;
+
/*
* A slave device nacked the address, this is
* allowed only once, DAA will be stopped and
{
struct svc_i3c_master *master = to_svc_i3c_master(m);
bool broadcast = cmd->id < 0x80;
+ int ret;
if (broadcast)
- return svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
+ ret = svc_i3c_master_send_bdcast_ccc_cmd(master, cmd);
else
- return svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+ ret = svc_i3c_master_send_direct_ccc_cmd(master, cmd);
+
+ if (ret)
+ cmd->err = I3C_ERROR_M2;
+
+ return ret;
}
static int svc_i3c_master_priv_xfers(struct i3c_dev_desc *dev,
return PTR_ERR(master->sclk);
master->irq = platform_get_irq(pdev, 0);
- if (master->irq <= 0)
- return -ENOENT;
+ if (master->irq < 0)
+ return master->irq;
master->dev = dev;
config GAMEPORT_NS558
tristate "Classic ISA and PnP gameport support"
+ depends on ISA
help
Say Y here if you have an ISA or PnP gameport.
config GAMEPORT_L4
tristate "PDPI Lightning 4 gamecard support"
+ depends on ISA
help
Say Y here if you have a PDPI Lightning 4 gamecard.
config GAMEPORT_FM801
tristate "ForteMedia FM801 gameport support"
- depends on PCI
+ depends on PCI && HAS_IOPORT
help
Say Y here if you have ForteMedia FM801 PCI audio controller
(Abit AU10, Genius Sound Maker, HP Workstation zx2000,
static void gameport_default_trigger(struct gameport *gameport)
{
+#ifdef CONFIG_HAS_IOPORT
outb(0xff, gameport->io);
+#endif
}
static unsigned char gameport_default_read(struct gameport *gameport)
{
+#ifdef CONFIG_HAS_IOPORT
return inb(gameport->io);
+#else
+ return 0xff;
+#endif
+}
+
+static void gameport_setup_default_handlers(struct gameport *gameport)
+{
+ if ((!gameport->trigger || !gameport->read) &&
+ !IS_ENABLED(CONFIG_HAS_IOPORT))
+ dev_err(&gameport->dev,
+ "I/O port access is required for %s (%s) but is not available\n",
+ gameport->phys, gameport->name);
+
+ if (!gameport->trigger)
+ gameport->trigger = gameport_default_trigger;
+ if (!gameport->read)
+ gameport->read = gameport_default_read;
}
/*
if (gameport->parent)
gameport->dev.parent = &gameport->parent->dev;
- if (!gameport->trigger)
- gameport->trigger = gameport_default_trigger;
- if (!gameport->read)
- gameport->read = gameport_default_read;
-
+ gameport_setup_default_handlers(gameport);
INIT_LIST_HEAD(&gameport->node);
spin_lock_init(&gameport->timer_lock);
timer_setup(&gameport->poll_timer, gameport_run_poll_handler, 0);
{ 0x0f0d, 0x0067, "HORIPAD ONE", 0, XTYPE_XBOXONE },
{ 0x0f0d, 0x0078, "Hori Real Arcade Pro V Kai Xbox One", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
{ 0x0f0d, 0x00c5, "Hori Fighting Commander ONE", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOXONE },
+ { 0x0f0d, 0x00dc, "HORIPAD FPS for Nintendo Switch", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x0f30, 0x010b, "Philips Recoil", 0, XTYPE_XBOX },
{ 0x0f30, 0x0202, "Joytech Advanced Controller", 0, XTYPE_XBOX },
{ 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX },
{ 0x31e3, 0x1300, "Wooting 60HE (AVR)", 0, XTYPE_XBOX360 },
{ 0x31e3, 0x1310, "Wooting 60HE (ARM)", 0, XTYPE_XBOX360 },
{ 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 },
+ { 0x3537, 0x1004, "GameSir T4 Kaleid", 0, XTYPE_XBOX360 },
{ 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX },
{ 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX },
{ 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN }
XPAD_XBOX360_VENDOR(0x2f24), /* GameSir controllers */
XPAD_XBOX360_VENDOR(0x31e3), /* Wooting Keyboards */
XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */
+ XPAD_XBOX360_VENDOR(0x3537), /* GameSir Controllers */
+ XPAD_XBOXONE_VENDOR(0x3537), /* GameSir Controllers */
{ }
};
return error;
}
}
+ if (xpad->xtype == XTYPE_XBOX360) {
+ /*
+ * Some third-party controllers Xbox 360-style controllers
+ * require this message to finish initialization.
+ */
+ u8 dummy[20];
+
+ error = usb_control_msg_recv(xpad->udev, 0,
+ /* bRequest */ 0x01,
+ /* bmRequestType */
+ USB_TYPE_VENDOR | USB_DIR_IN |
+ USB_RECIP_INTERFACE,
+ /* wValue */ 0x100,
+ /* wIndex */ 0x00,
+ dummy, sizeof(dummy),
+ 25, GFP_KERNEL);
+ if (error)
+ dev_warn(&xpad->dev->dev,
+ "unable to receive magic message: %d\n",
+ error);
+ }
return 0;
}
return 0;
}
-static void adp5588_disable_regulator(void *reg)
-{
- regulator_disable(reg);
-}
-
static int adp5588_probe(struct i2c_client *client)
{
struct adp5588_kpad *kpad;
struct input_dev *input;
struct gpio_desc *gpio;
- struct regulator *vcc;
unsigned int revid;
int ret;
int error;
if (error)
return error;
- vcc = devm_regulator_get(&client->dev, "vcc");
- if (IS_ERR(vcc))
- return PTR_ERR(vcc);
-
- error = regulator_enable(vcc);
- if (error)
- return error;
-
- error = devm_add_action_or_reset(&client->dev,
- adp5588_disable_regulator, vcc);
+ error = devm_regulator_get_enable(&client->dev, "vcc");
if (error)
return error;
struct input_dev *dev;
int i, err;
- dev = input_allocate_device();
+ dev = devm_input_allocate_device(&pdev->dev);
if (!dev) {
dev_err(&pdev->dev, "Not enough memory for input device\n");
return -ENOMEM;
dev->id.vendor = 0x0001;
dev->id.product = 0x0001;
dev->id.version = 0x0100;
- dev->dev.parent = &pdev->dev;
dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP);
amikbd_init_console_keymaps();
ciaa.cra &= ~0x41; /* serial data in, turn off TA */
- err = request_irq(IRQ_AMIGA_CIAA_SP, amikbd_interrupt, 0, "amikbd",
- dev);
+ err = devm_request_irq(&pdev->dev, IRQ_AMIGA_CIAA_SP, amikbd_interrupt,
+ 0, "amikbd", dev);
if (err)
- goto fail2;
+ return err;
err = input_register_device(dev);
if (err)
- goto fail3;
+ return err;
platform_set_drvdata(pdev, dev);
return 0;
-
- fail3: free_irq(IRQ_AMIGA_CIAA_SP, dev);
- fail2: input_free_device(dev);
- return err;
-}
-
-static int __exit amikbd_remove(struct platform_device *pdev)
-{
- struct input_dev *dev = platform_get_drvdata(pdev);
-
- free_irq(IRQ_AMIGA_CIAA_SP, dev);
- input_unregister_device(dev);
- return 0;
}
static struct platform_driver amikbd_driver = {
- .remove = __exit_p(amikbd_remove),
.driver = {
.name = "amiga-keyboard",
},
{
struct bcm_kp *kp;
struct input_dev *input_dev;
- struct resource *res;
int error;
kp = devm_kzalloc(&pdev->dev, sizeof(*kp), GFP_KERNEL);
return error;
}
- /* Get the KEYPAD base address */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "Missing keypad base address resource\n");
- return -ENODEV;
- }
-
- kp->base = devm_ioremap_resource(&pdev->dev, res);
+ kp->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(kp->base))
return PTR_ERR(kp->base);
/* Enable clock */
- kp->clk = devm_clk_get(&pdev->dev, "peri_clk");
+ kp->clk = devm_clk_get_optional(&pdev->dev, "peri_clk");
if (IS_ERR(kp->clk)) {
- error = PTR_ERR(kp->clk);
- if (error != -ENOENT) {
- if (error != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to get clock\n");
- return error;
- }
- dev_dbg(&pdev->dev,
- "No clock specified. Assuming it's enabled\n");
- kp->clk = NULL;
+ return dev_err_probe(&pdev->dev, PTR_ERR(kp->clk), "Failed to get clock\n");
+ } else if (!kp->clk) {
+ dev_dbg(&pdev->dev, "No clock specified. Assuming it's enabled\n");
} else {
unsigned int desired_rate;
long actual_rate;
NULL, GPIOD_IN, desc);
if (IS_ERR(bdata->gpiod)) {
error = PTR_ERR(bdata->gpiod);
- if (error == -ENOENT) {
- /*
- * GPIO is optional, we may be dealing with
- * purely interrupt-driven setup.
- */
- bdata->gpiod = NULL;
- } else {
- if (error != -EPROBE_DEFER)
- dev_err(dev, "failed to get gpio: %d\n",
- error);
- return error;
- }
+ if (error != -ENOENT)
+ return dev_err_probe(dev, error,
+ "failed to get gpio\n");
+
+ /*
+ * GPIO is optional, we may be dealing with
+ * purely interrupt-driven setup.
+ */
+ bdata->gpiod = NULL;
}
} else if (gpio_is_valid(button->gpio)) {
/*
NULL, GPIOD_IN,
button->desc);
if (IS_ERR(bdata->gpiod)) {
- error = PTR_ERR(bdata->gpiod);
- if (error != -EPROBE_DEFER)
- dev_err(dev,
- "failed to get gpio: %d\n",
- error);
fwnode_handle_put(child);
- return error;
+ return dev_err_probe(dev, PTR_ERR(bdata->gpiod),
+ "failed to get gpio\n");
}
} else if (gpio_is_valid(button->gpio)) {
/*
const char *name)
{
struct lm8323_pwm *pwm;
+ int err;
BUG_ON(id > 3);
pwm->cdev.name = name;
pwm->cdev.brightness_set = lm8323_pwm_set_brightness;
pwm->cdev.groups = lm8323_pwm_groups;
- if (led_classdev_register(dev, &pwm->cdev) < 0) {
- dev_err(dev, "couldn't register PWM %d\n", id);
- return -1;
+
+ err = devm_led_classdev_register(dev, &pwm->cdev);
+ if (err) {
+ dev_err(dev, "couldn't register PWM %d: %d\n", id, err);
+ return err;
}
pwm->enabled = true;
}
return 0;
}
-static struct i2c_driver lm8323_i2c_driver;
-
static ssize_t lm8323_show_disable(struct device *dev,
struct device_attribute *attr, char *buf)
{
}
static DEVICE_ATTR(disable_kp, 0644, lm8323_show_disable, lm8323_set_disable);
+static struct attribute *lm8323_attrs[] = {
+ &dev_attr_disable_kp.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(lm8323);
+
static int lm8323_probe(struct i2c_client *client)
{
struct lm8323_platform_data *pdata = dev_get_platdata(&client->dev);
return -EINVAL;
}
- lm = kzalloc(sizeof *lm, GFP_KERNEL);
- idev = input_allocate_device();
- if (!lm || !idev) {
- err = -ENOMEM;
- goto fail1;
- }
+ lm = devm_kzalloc(&client->dev, sizeof(*lm), GFP_KERNEL);
+ if (!lm)
+ return -ENOMEM;
+
+ idev = devm_input_allocate_device(&client->dev);
+ if (!idev)
+ return -ENOMEM;
lm->client = client;
lm->idev = idev;
lm8323_reset(lm);
- /* Nothing's set up to service the IRQ yet, so just spin for max.
- * 100ms until we can configure. */
+ /*
+ * Nothing's set up to service the IRQ yet, so just spin for max.
+ * 100ms until we can configure.
+ */
tmo = jiffies + msecs_to_jiffies(100);
while (lm8323_read(lm, LM8323_CMD_READ_INT, data, 1) == 1) {
if (data[0] & INT_NOINIT)
/* If a true probe check the device */
if (lm8323_read_id(lm, data) != 0) {
dev_err(&client->dev, "device not found\n");
- err = -ENODEV;
- goto fail1;
+ return -ENODEV;
}
for (pwm = 0; pwm < LM8323_NUM_PWMS; pwm++) {
err = init_pwm(lm, pwm + 1, &client->dev,
pdata->pwm_names[pwm]);
- if (err < 0)
- goto fail2;
+ if (err)
+ return err;
}
lm->kp_enabled = true;
- err = device_create_file(&client->dev, &dev_attr_disable_kp);
- if (err < 0)
- goto fail2;
idev->name = pdata->name ? : "LM8323 keypad";
snprintf(lm->phys, sizeof(lm->phys),
err = input_register_device(idev);
if (err) {
dev_dbg(&client->dev, "error registering input device\n");
- goto fail3;
+ return err;
}
- err = request_threaded_irq(client->irq, NULL, lm8323_irq,
- IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm);
+ err = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, lm8323_irq,
+ IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+ "lm8323", lm);
if (err) {
dev_err(&client->dev, "could not get IRQ %d\n", client->irq);
- goto fail4;
+ return err;
}
i2c_set_clientdata(client, lm);
enable_irq_wake(client->irq);
return 0;
-
-fail4:
- input_unregister_device(idev);
- idev = NULL;
-fail3:
- device_remove_file(&client->dev, &dev_attr_disable_kp);
-fail2:
- while (--pwm >= 0)
- if (lm->pwm[pwm].enabled)
- led_classdev_unregister(&lm->pwm[pwm].cdev);
-fail1:
- input_free_device(idev);
- kfree(lm);
- return err;
-}
-
-static void lm8323_remove(struct i2c_client *client)
-{
- struct lm8323_chip *lm = i2c_get_clientdata(client);
- int i;
-
- disable_irq_wake(client->irq);
- free_irq(client->irq, lm);
-
- input_unregister_device(lm->idev);
-
- device_remove_file(&lm->client->dev, &dev_attr_disable_kp);
-
- for (i = 0; i < 3; i++)
- if (lm->pwm[i].enabled)
- led_classdev_unregister(&lm->pwm[i].cdev);
-
- kfree(lm);
}
/*
static struct i2c_driver lm8323_i2c_driver = {
.driver = {
- .name = "lm8323",
- .pm = pm_sleep_ptr(&lm8323_pm_ops),
+ .name = "lm8323",
+ .pm = pm_sleep_ptr(&lm8323_pm_ops),
+ .dev_groups = lm8323_groups,
},
.probe = lm8323_probe,
- .remove = lm8323_remove,
.id_table = lm8323_id,
};
MODULE_DEVICE_TABLE(i2c, lm8323_id);
return -EINVAL;
}
- lm8333 = kzalloc(sizeof(*lm8333), GFP_KERNEL);
- input = input_allocate_device();
- if (!lm8333 || !input) {
- err = -ENOMEM;
- goto free_mem;
- }
+ lm8333 = devm_kzalloc(&client->dev, sizeof(*lm8333), GFP_KERNEL);
+ if (!lm8333)
+ return -ENOMEM;
+
+ input = devm_input_allocate_device(&client->dev);
+ if (!input)
+ return -ENOMEM;
lm8333->client = client;
lm8333->input = input;
input->name = client->name;
- input->dev.parent = &client->dev;
input->id.bustype = BUS_I2C;
input_set_capability(input, EV_MSC, MSC_SCAN);
LM8333_NUM_ROWS, LM8333_NUM_COLS,
lm8333->keycodes, input);
if (err)
- goto free_mem;
+ return err;
if (pdata->debounce_time) {
err = lm8333_write8(lm8333, LM8333_DEBOUNCE,
dev_warn(&client->dev, "Unable to set active time\n");
}
- err = request_threaded_irq(client->irq, NULL, lm8333_irq_thread,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- "lm8333", lm8333);
+ err = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, lm8333_irq_thread,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ "lm8333", lm8333);
if (err)
- goto free_mem;
+ return err;
err = input_register_device(input);
if (err)
- goto free_irq;
+ return err;
i2c_set_clientdata(client, lm8333);
return 0;
-
- free_irq:
- free_irq(client->irq, lm8333);
- free_mem:
- input_free_device(input);
- kfree(lm8333);
- return err;
-}
-
-static void lm8333_remove(struct i2c_client *client)
-{
- struct lm8333 *lm8333 = i2c_get_clientdata(client);
-
- free_irq(client->irq, lm8333);
- input_unregister_device(lm8333->input);
- kfree(lm8333);
}
static const struct i2c_device_id lm8333_id[] = {
.name = "lm8333",
},
.probe = lm8333_probe,
- .remove = lm8333_remove,
.id_table = lm8333_id,
};
module_i2c_driver(lm8333_driver);
{
struct lpc32xx_kscan_drv *kscandat;
struct input_dev *input;
- struct resource *res;
size_t keymap_size;
int error;
int irq;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "failed to get platform I/O memory\n");
- return -EINVAL;
- }
-
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return -EINVAL;
input_set_drvdata(kscandat->input, kscandat);
- kscandat->kscan_base = devm_ioremap_resource(&pdev->dev, res);
+ kscandat->kscan_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(kscandat->kscan_base))
return PTR_ERR(kscandat->kscan_base);
return IRQ_HANDLED;
}
+static void mcs_touchkey_poweroff(void *data)
+{
+ struct mcs_touchkey_data *touchkey = data;
+
+ touchkey->poweron(false);
+}
+
static int mcs_touchkey_probe(struct i2c_client *client)
{
const struct i2c_device_id *id = i2c_client_get_device_id(client);
return -EINVAL;
}
- data = kzalloc(struct_size(data, keycodes, pdata->key_maxval + 1),
- GFP_KERNEL);
- input_dev = input_allocate_device();
- if (!data || !input_dev) {
- dev_err(&client->dev, "Failed to allocate memory\n");
- error = -ENOMEM;
- goto err_free_mem;
+ data = devm_kzalloc(&client->dev,
+ struct_size(data, keycodes, pdata->key_maxval + 1),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ input_dev = devm_input_allocate_device(&client->dev);
+ if (!input_dev) {
+ dev_err(&client->dev, "Failed to allocate input device\n");
+ return -ENOMEM;
}
data->client = client;
fw_ver = i2c_smbus_read_byte_data(client, fw_reg);
if (fw_ver < 0) {
- error = fw_ver;
- dev_err(&client->dev, "i2c read error[%d]\n", error);
- goto err_free_mem;
+ dev_err(&client->dev, "i2c read error[%d]\n", fw_ver);
+ return fw_ver;
}
dev_info(&client->dev, "Firmware version: %d\n", fw_ver);
input_dev->name = "MELFAS MCS Touchkey";
input_dev->id.bustype = BUS_I2C;
- input_dev->dev.parent = &client->dev;
input_dev->evbit[0] = BIT_MASK(EV_KEY);
if (!pdata->no_autorepeat)
input_dev->evbit[0] |= BIT_MASK(EV_REP);
if (pdata->poweron) {
data->poweron = pdata->poweron;
data->poweron(true);
+
+ error = devm_add_action_or_reset(&client->dev,
+ mcs_touchkey_poweroff, data);
+ if (error)
+ return error;
}
- error = request_threaded_irq(client->irq, NULL, mcs_touchkey_interrupt,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- client->dev.driver->name, data);
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, mcs_touchkey_interrupt,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ client->dev.driver->name, data);
if (error) {
dev_err(&client->dev, "Failed to register interrupt\n");
- goto err_free_mem;
+ return error;
}
error = input_register_device(input_dev);
if (error)
- goto err_free_irq;
+ return error;
i2c_set_clientdata(client, data);
return 0;
-
-err_free_irq:
- free_irq(client->irq, data);
-err_free_mem:
- input_free_device(input_dev);
- kfree(data);
- return error;
-}
-
-static void mcs_touchkey_remove(struct i2c_client *client)
-{
- struct mcs_touchkey_data *data = i2c_get_clientdata(client);
-
- free_irq(client->irq, data);
- if (data->poweron)
- data->poweron(false);
- input_unregister_device(data->input_dev);
- kfree(data);
}
static void mcs_touchkey_shutdown(struct i2c_client *client)
.pm = pm_sleep_ptr(&mcs_touchkey_pm_ops),
},
.probe = mcs_touchkey_probe,
- .remove = mcs_touchkey_remove,
.shutdown = mcs_touchkey_shutdown,
.id_table = mcs_touchkey_id,
};
return IRQ_HANDLED;
}
+static void ske_keypad_board_exit(void *data)
+{
+ struct ske_keypad *keypad = data;
+
+ keypad->board->exit();
+}
+
static int __init ske_keypad_probe(struct platform_device *pdev)
{
const struct ske_keypad_platform_data *plat =
dev_get_platdata(&pdev->dev);
+ struct device *dev = &pdev->dev;
struct ske_keypad *keypad;
struct input_dev *input;
- struct resource *res;
int irq;
int error;
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return -EINVAL;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "missing platform resources\n");
- return -EINVAL;
- }
+ return irq;
- keypad = kzalloc(sizeof(struct ske_keypad), GFP_KERNEL);
- input = input_allocate_device();
+ keypad = devm_kzalloc(dev, sizeof(struct ske_keypad),
+ GFP_KERNEL);
+ input = devm_input_allocate_device(dev);
if (!keypad || !input) {
dev_err(&pdev->dev, "failed to allocate keypad memory\n");
- error = -ENOMEM;
- goto err_free_mem;
+ return -ENOMEM;
}
keypad->irq = irq;
keypad->input = input;
spin_lock_init(&keypad->ske_keypad_lock);
- if (!request_mem_region(res->start, resource_size(res), pdev->name)) {
- dev_err(&pdev->dev, "failed to request I/O memory\n");
- error = -EBUSY;
- goto err_free_mem;
- }
-
- keypad->reg_base = ioremap(res->start, resource_size(res));
- if (!keypad->reg_base) {
- dev_err(&pdev->dev, "failed to remap I/O memory\n");
- error = -ENXIO;
- goto err_free_mem_region;
- }
+ keypad->reg_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(keypad->reg_base))
+ return PTR_ERR(keypad->reg_base);
- keypad->pclk = clk_get(&pdev->dev, "apb_pclk");
+ keypad->pclk = devm_clk_get_enabled(dev, "apb_pclk");
if (IS_ERR(keypad->pclk)) {
dev_err(&pdev->dev, "failed to get pclk\n");
- error = PTR_ERR(keypad->pclk);
- goto err_iounmap;
+ return PTR_ERR(keypad->pclk);
}
- keypad->clk = clk_get(&pdev->dev, NULL);
+ keypad->clk = devm_clk_get_enabled(dev, NULL);
if (IS_ERR(keypad->clk)) {
dev_err(&pdev->dev, "failed to get clk\n");
- error = PTR_ERR(keypad->clk);
- goto err_pclk;
+ return PTR_ERR(keypad->clk);
}
input->id.bustype = BUS_HOST;
keypad->keymap, input);
if (error) {
dev_err(&pdev->dev, "Failed to build keymap\n");
- goto err_clk;
+ return error;
}
input_set_capability(input, EV_MSC, MSC_SCAN);
if (!plat->no_autorepeat)
__set_bit(EV_REP, input->evbit);
- error = clk_prepare_enable(keypad->pclk);
- if (error) {
- dev_err(&pdev->dev, "Failed to prepare/enable pclk\n");
- goto err_clk;
- }
-
- error = clk_prepare_enable(keypad->clk);
- if (error) {
- dev_err(&pdev->dev, "Failed to prepare/enable clk\n");
- goto err_pclk_disable;
- }
-
-
/* go through board initialization helpers */
if (keypad->board->init)
keypad->board->init();
+ if (keypad->board->exit) {
+ error = devm_add_action_or_reset(dev, ske_keypad_board_exit,
+ keypad);
+ if (error)
+ return error;
+ }
+
error = ske_keypad_chip_init(keypad);
if (error) {
dev_err(&pdev->dev, "unable to init keypad hardware\n");
- goto err_clk_disable;
+ return error;
}
- error = request_threaded_irq(keypad->irq, NULL, ske_keypad_irq,
- IRQF_ONESHOT, "ske-keypad", keypad);
+ error = devm_request_threaded_irq(dev, keypad->irq,
+ NULL, ske_keypad_irq,
+ IRQF_ONESHOT, "ske-keypad", keypad);
if (error) {
dev_err(&pdev->dev, "allocate irq %d failed\n", keypad->irq);
- goto err_clk_disable;
+ return error;
}
error = input_register_device(input);
if (error) {
dev_err(&pdev->dev,
- "unable to register input device: %d\n", error);
- goto err_free_irq;
+ "unable to register input device: %d\n", error);
+ return error;
}
if (plat->wakeup_enable)
platform_set_drvdata(pdev, keypad);
return 0;
-
-err_free_irq:
- free_irq(keypad->irq, keypad);
-err_clk_disable:
- clk_disable_unprepare(keypad->clk);
-err_pclk_disable:
- clk_disable_unprepare(keypad->pclk);
-err_clk:
- clk_put(keypad->clk);
-err_pclk:
- clk_put(keypad->pclk);
-err_iounmap:
- iounmap(keypad->reg_base);
-err_free_mem_region:
- release_mem_region(res->start, resource_size(res));
-err_free_mem:
- input_free_device(input);
- kfree(keypad);
- return error;
-}
-
-static int ske_keypad_remove(struct platform_device *pdev)
-{
- struct ske_keypad *keypad = platform_get_drvdata(pdev);
- struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
- free_irq(keypad->irq, keypad);
-
- input_unregister_device(keypad->input);
-
- clk_disable_unprepare(keypad->clk);
- clk_put(keypad->clk);
-
- if (keypad->board->exit)
- keypad->board->exit();
-
- iounmap(keypad->reg_base);
- release_mem_region(res->start, resource_size(res));
- kfree(keypad);
-
- return 0;
}
static int ske_keypad_suspend(struct device *dev)
.name = "nmk-ske-keypad",
.pm = pm_sleep_ptr(&ske_keypad_dev_pm_ops),
},
- .remove = ske_keypad_remove,
};
module_platform_driver_probe(ske_keypad_driver, ske_keypad_probe);
return PTR_ERR(keypad->clk);
}
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- keypad->reg_base = devm_ioremap_resource(&pdev->dev, res);
+ keypad->reg_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(keypad->reg_base))
return PTR_ERR(keypad->reg_base);
struct device *dev = &pdev->dev;
struct omap4_keypad *keypad_data;
struct input_dev *input_dev;
- struct resource *res;
unsigned int max_keys;
int irq;
int error;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "no base address specified\n");
- return -EINVAL;
- }
-
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
if (error)
return error;
- keypad_data->base = devm_ioremap_resource(dev, res);
+ keypad_data->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(keypad_data->base))
return PTR_ERR(keypad_data->base);
{
struct input_dev *input;
struct opencores_kbd *opencores_kbd;
- struct resource *res;
int irq, i, error;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "missing board memory resource\n");
- return -EINVAL;
- }
-
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return -EINVAL;
opencores_kbd->input = input;
- opencores_kbd->addr = devm_ioremap_resource(&pdev->dev, res);
+ opencores_kbd->addr = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(opencores_kbd->addr))
return PTR_ERR(opencores_kbd->addr);
ppkb_set_scan(client, false);
}
-static void ppkb_regulator_disable(void *regulator)
-{
- regulator_disable(regulator);
-}
-
static int ppkb_probe(struct i2c_client *client)
{
struct device *dev = &client->dev;
unsigned int phys_rows, phys_cols;
struct pinephone_keyboard *ppkb;
- struct regulator *vbat_supply;
u8 info[PPKB_MATRIX_SIZE + 1];
struct device_node *i2c_bus;
int ret;
int error;
- vbat_supply = devm_regulator_get(dev, "vbat");
- error = PTR_ERR_OR_ZERO(vbat_supply);
+ error = devm_regulator_get_enable(dev, "vbat");
if (error) {
dev_err(dev, "Failed to get VBAT supply: %d\n", error);
return error;
}
- error = regulator_enable(vbat_supply);
- if (error) {
- dev_err(dev, "Failed to enable VBAT: %d\n", error);
- return error;
- }
-
- error = devm_add_action_or_reset(dev, ppkb_regulator_disable,
- vbat_supply);
- if (error)
- return error;
-
ret = i2c_smbus_read_i2c_block_data(client, 0, sizeof(info), info);
if (ret != sizeof(info)) {
error = ret < 0 ? ret : -EIO;
struct device_node *np = pdev->dev.of_node;
struct pxa27x_keypad *keypad;
struct input_dev *input_dev;
- struct resource *res;
int irq, error;
/* Driver need build keycode from device tree or pdata */
if (irq < 0)
return -ENXIO;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res == NULL) {
- dev_err(&pdev->dev, "failed to get I/O memory\n");
- return -ENXIO;
- }
-
keypad = devm_kzalloc(&pdev->dev, sizeof(*keypad),
GFP_KERNEL);
if (!keypad)
keypad->input_dev = input_dev;
keypad->irq = irq;
- keypad->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+ keypad->mmio_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(keypad->mmio_base))
return PTR_ERR(keypad->mmio_base);
if (!qt1070_identify(client))
return -ENODEV;
- data = kzalloc(sizeof(struct qt1070_data), GFP_KERNEL);
- input = input_allocate_device();
- if (!data || !input) {
- dev_err(&client->dev, "insufficient memory\n");
- err = -ENOMEM;
- goto err_free_mem;
- }
+ data = devm_kzalloc(&client->dev, sizeof(struct qt1070_data),
+ GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ input = devm_input_allocate_device(&client->dev);
+ if (!input)
+ return -ENOMEM;
data->client = client;
data->input = input;
data->irq = client->irq;
input->name = "AT42QT1070 QTouch Sensor";
- input->dev.parent = &client->dev;
input->id.bustype = BUS_I2C;
/* Add the keycode */
qt1070_write(client, RESET, 1);
msleep(QT1070_RESET_TIME);
- err = request_threaded_irq(client->irq, NULL, qt1070_interrupt,
- IRQF_TRIGGER_NONE | IRQF_ONESHOT,
- client->dev.driver->name, data);
+ err = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, qt1070_interrupt,
+ IRQF_TRIGGER_NONE | IRQF_ONESHOT,
+ client->dev.driver->name, data);
if (err) {
dev_err(&client->dev, "fail to request irq\n");
- goto err_free_mem;
+ return err;
}
/* Register the input device */
err = input_register_device(data->input);
if (err) {
dev_err(&client->dev, "Failed to register input device\n");
- goto err_free_irq;
+ return err;
}
i2c_set_clientdata(client, data);
qt1070_read(client, DET_STATUS);
return 0;
-
-err_free_irq:
- free_irq(client->irq, data);
-err_free_mem:
- input_free_device(input);
- kfree(data);
- return err;
-}
-
-static void qt1070_remove(struct i2c_client *client)
-{
- struct qt1070_data *data = i2c_get_clientdata(client);
-
- /* Release IRQ */
- free_irq(client->irq, data);
-
- input_unregister_device(data->input);
- kfree(data);
}
static int qt1070_suspend(struct device *dev)
},
.id_table = qt1070_id,
.probe = qt1070_probe,
- .remove = qt1070_remove,
};
module_i2c_driver(qt1070_driver);
#define QT2160_NUM_LEDS_X 8
-#define QT2160_CYCLE_INTERVAL (2*HZ)
+#define QT2160_CYCLE_INTERVAL 2000 /* msec - 2 sec */
static unsigned char qt2160_key2code[] = {
KEY_0, KEY_1, KEY_2, KEY_3,
struct qt2160_data {
struct i2c_client *client;
struct input_dev *input;
- struct delayed_work dwork;
unsigned short keycodes[ARRAY_SIZE(qt2160_key2code)];
u16 key_matrix;
#ifdef CONFIG_LEDS_CLASS
return 0;
}
-static int qt2160_get_key_matrix(struct qt2160_data *qt2160)
+static void qt2160_get_key_matrix(struct input_dev *input)
{
+ struct qt2160_data *qt2160 = input_get_drvdata(input);
struct i2c_client *client = qt2160->client;
- struct input_dev *input = qt2160->input;
u8 regs[6];
u16 old_matrix, new_matrix;
int ret, i, mask;
if (ret) {
dev_err(&client->dev,
"could not perform chip read.\n");
- return ret;
+ return;
}
old_matrix = qt2160->key_matrix;
}
input_sync(input);
-
- return 0;
}
-static irqreturn_t qt2160_irq(int irq, void *_qt2160)
+static irqreturn_t qt2160_irq(int irq, void *data)
{
- struct qt2160_data *qt2160 = _qt2160;
+ struct input_dev *input = data;
- mod_delayed_work(system_wq, &qt2160->dwork, 0);
+ qt2160_get_key_matrix(input);
return IRQ_HANDLED;
}
-static void qt2160_schedule_read(struct qt2160_data *qt2160)
-{
- schedule_delayed_work(&qt2160->dwork, QT2160_CYCLE_INTERVAL);
-}
-
-static void qt2160_worker(struct work_struct *work)
-{
- struct qt2160_data *qt2160 =
- container_of(work, struct qt2160_data, dwork.work);
-
- dev_dbg(&qt2160->client->dev, "worker\n");
-
- qt2160_get_key_matrix(qt2160);
-
- /* Avoid device lock up by checking every so often */
- qt2160_schedule_read(qt2160);
-}
-
static int qt2160_read(struct i2c_client *client, u8 reg)
{
int ret;
static int qt2160_register_leds(struct qt2160_data *qt2160)
{
struct i2c_client *client = qt2160->client;
- int ret;
+ int error;
int i;
for (i = 0; i < QT2160_NUM_LEDS_X; i++) {
led->id = i;
led->qt2160 = qt2160;
- ret = led_classdev_register(&client->dev, &led->cdev);
- if (ret < 0)
- return ret;
+ error = devm_led_classdev_register(&client->dev, &led->cdev);
+ if (error)
+ return error;
}
/* Tur off LEDs */
return 0;
}
-static void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
- int i;
-
- for (i = 0; i < QT2160_NUM_LEDS_X; i++)
- led_classdev_unregister(&qt2160->leds[i].cdev);
-}
-
#else
static inline int qt2160_register_leds(struct qt2160_data *qt2160)
return 0;
}
-static inline void qt2160_unregister_leds(struct qt2160_data *qt2160)
-{
-}
-
#endif
static bool qt2160_identify(struct i2c_client *client)
int i;
int error;
- /* Check functionality */
- error = i2c_check_functionality(client->adapter,
- I2C_FUNC_SMBUS_BYTE);
- if (!error) {
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
dev_err(&client->dev, "%s adapter not supported\n",
- dev_driver_string(&client->adapter->dev));
+ dev_driver_string(&client->adapter->dev));
return -ENODEV;
}
return -ENODEV;
/* Chip is valid and active. Allocate structure */
- qt2160 = kzalloc(sizeof(struct qt2160_data), GFP_KERNEL);
- input = input_allocate_device();
- if (!qt2160 || !input) {
- dev_err(&client->dev, "insufficient memory\n");
- error = -ENOMEM;
- goto err_free_mem;
- }
+ qt2160 = devm_kzalloc(&client->dev, sizeof(*qt2160), GFP_KERNEL);
+ if (!qt2160)
+ return -ENOMEM;
+
+ input = devm_input_allocate_device(&client->dev);
+ if (!input)
+ return -ENOMEM;
qt2160->client = client;
qt2160->input = input;
- INIT_DELAYED_WORK(&qt2160->dwork, qt2160_worker);
input->name = "AT42QT2160 Touch Sense Keyboard";
input->id.bustype = BUS_I2C;
}
__clear_bit(KEY_RESERVED, input->keybit);
+ input_set_drvdata(input, qt2160);
+
/* Calibrate device */
error = qt2160_write(client, QT2160_CMD_CALIBRATE, 1);
if (error) {
dev_err(&client->dev, "failed to calibrate device\n");
- goto err_free_mem;
+ return error;
}
if (client->irq) {
- error = request_irq(client->irq, qt2160_irq,
- IRQF_TRIGGER_FALLING, "qt2160", qt2160);
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, qt2160_irq,
+ IRQF_ONESHOT,
+ "qt2160", input);
if (error) {
dev_err(&client->dev,
"failed to allocate irq %d\n", client->irq);
- goto err_free_mem;
+ return error;
+ }
+ } else {
+ error = input_setup_polling(input, qt2160_get_key_matrix);
+ if (error) {
+ dev_err(&client->dev, "Failed to setup polling\n");
+ return error;
}
+ input_set_poll_interval(input, QT2160_CYCLE_INTERVAL);
}
error = qt2160_register_leds(qt2160);
if (error) {
dev_err(&client->dev, "Failed to register leds\n");
- goto err_free_irq;
+ return error;
}
error = input_register_device(qt2160->input);
if (error) {
dev_err(&client->dev,
"Failed to register input device\n");
- goto err_unregister_leds;
+ return error;
}
- i2c_set_clientdata(client, qt2160);
- qt2160_schedule_read(qt2160);
-
return 0;
-
-err_unregister_leds:
- qt2160_unregister_leds(qt2160);
-err_free_irq:
- if (client->irq)
- free_irq(client->irq, qt2160);
-err_free_mem:
- input_free_device(input);
- kfree(qt2160);
- return error;
-}
-
-static void qt2160_remove(struct i2c_client *client)
-{
- struct qt2160_data *qt2160 = i2c_get_clientdata(client);
-
- qt2160_unregister_leds(qt2160);
-
- /* Release IRQ so no queue will be scheduled */
- if (client->irq)
- free_irq(client->irq, qt2160);
-
- cancel_delayed_work_sync(&qt2160->dwork);
-
- input_unregister_device(qt2160->input);
- kfree(qt2160);
}
static const struct i2c_device_id qt2160_idtable[] = {
.id_table = qt2160_idtable,
.probe = qt2160_probe,
- .remove = qt2160_remove,
};
module_i2c_driver(qt2160_driver);
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_wakeirq.h>
#include <linux/pm_wakeup.h>
+#include <linux/property.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
#include <linux/slab.h>
input_set_drvdata(lradc->input, lradc);
- lradc->base = devm_ioremap_resource(dev,
- platform_get_resource(pdev, IORESOURCE_MEM, 0));
+ lradc->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(lradc->base))
return PTR_ERR(lradc->base);
#define TCA6416_INVERT 2
#define TCA6416_DIRECTION 3
+#define TCA6416_POLL_INTERVAL 100 /* msec */
+
static const struct i2c_device_id tca6416_id[] = {
{ "tca6416-keys", 16, },
{ "tca6408-keys", 8, },
struct i2c_client *client;
struct input_dev *input;
- struct delayed_work dwork;
int io_size;
int irqnum;
u16 pinmask;
return 0;
}
-static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
+static void tca6416_keys_scan(struct input_dev *input)
{
- struct input_dev *input = chip->input;
+ struct tca6416_keypad_chip *chip = input_get_drvdata(input);
u16 reg_val, val;
int error, i, pin_index;
*/
static irqreturn_t tca6416_keys_isr(int irq, void *dev_id)
{
- struct tca6416_keypad_chip *chip = dev_id;
-
- tca6416_keys_scan(chip);
+ tca6416_keys_scan(dev_id);
return IRQ_HANDLED;
}
-static void tca6416_keys_work_func(struct work_struct *work)
-{
- struct tca6416_keypad_chip *chip =
- container_of(work, struct tca6416_keypad_chip, dwork.work);
-
- tca6416_keys_scan(chip);
- schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
-}
-
static int tca6416_keys_open(struct input_dev *dev)
{
struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
- /* Get initial device state in case it has switches */
- tca6416_keys_scan(chip);
-
- if (chip->use_polling)
- schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
- else
- enable_irq(chip->irqnum);
+ if (!chip->use_polling) {
+ /* Get initial device state in case it has switches */
+ tca6416_keys_scan(dev);
+ enable_irq(chip->client->irq);
+ }
return 0;
}
{
struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
- if (chip->use_polling)
- cancel_delayed_work_sync(&chip->dwork);
- else
- disable_irq(chip->irqnum);
+ if (!chip->use_polling)
+ disable_irq(chip->client->irq);
}
static int tca6416_setup_registers(struct tca6416_keypad_chip *chip)
return -EINVAL;
}
- chip = kzalloc(struct_size(chip, buttons, pdata->nbuttons), GFP_KERNEL);
- input = input_allocate_device();
- if (!chip || !input) {
- error = -ENOMEM;
- goto fail1;
- }
+ chip = devm_kzalloc(&client->dev,
+ struct_size(chip, buttons, pdata->nbuttons),
+ GFP_KERNEL);
+ if (!chip)
+ return -ENOMEM;
+
+ input = devm_input_allocate_device(&client->dev);
+ if (!input)
+ return -ENOMEM;
chip->client = client;
chip->input = input;
chip->pinmask = pdata->pinmask;
chip->use_polling = pdata->use_polling;
- INIT_DELAYED_WORK(&chip->dwork, tca6416_keys_work_func);
-
input->phys = "tca6416-keys/input0";
input->name = client->name;
- input->dev.parent = &client->dev;
input->open = tca6416_keys_open;
input->close = tca6416_keys_close;
*/
error = tca6416_setup_registers(chip);
if (error)
- goto fail1;
+ return error;
- if (!chip->use_polling) {
- if (pdata->irq_is_gpio)
- chip->irqnum = gpio_to_irq(client->irq);
- else
- chip->irqnum = client->irq;
-
- error = request_threaded_irq(chip->irqnum, NULL,
- tca6416_keys_isr,
- IRQF_TRIGGER_FALLING |
- IRQF_ONESHOT | IRQF_NO_AUTOEN,
- "tca6416-keypad", chip);
+ if (chip->use_polling) {
+ error = input_setup_polling(input, tca6416_keys_scan);
+ if (error) {
+ dev_err(&client->dev, "Failed to setup polling\n");
+ return error;
+ }
+
+ input_set_poll_interval(input, TCA6416_POLL_INTERVAL);
+ } else {
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, tca6416_keys_isr,
+ IRQF_TRIGGER_FALLING |
+ IRQF_ONESHOT |
+ IRQF_NO_AUTOEN,
+ "tca6416-keypad", input);
if (error) {
dev_dbg(&client->dev,
"Unable to claim irq %d; error %d\n",
- chip->irqnum, error);
- goto fail1;
+ client->irq, error);
+ return error;
}
}
if (error) {
dev_dbg(&client->dev,
"Unable to register input device, error: %d\n", error);
- goto fail2;
+ return error;
}
i2c_set_clientdata(client, chip);
- device_init_wakeup(&client->dev, 1);
return 0;
-
-fail2:
- if (!chip->use_polling) {
- free_irq(chip->irqnum, chip);
- enable_irq(chip->irqnum);
- }
-fail1:
- input_free_device(input);
- kfree(chip);
- return error;
}
-static void tca6416_keypad_remove(struct i2c_client *client)
-{
- struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
- if (!chip->use_polling) {
- free_irq(chip->irqnum, chip);
- enable_irq(chip->irqnum);
- }
-
- input_unregister_device(chip->input);
- kfree(chip);
-}
-
-static int tca6416_keypad_suspend(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
- if (device_may_wakeup(dev))
- enable_irq_wake(chip->irqnum);
-
- return 0;
-}
-
-static int tca6416_keypad_resume(struct device *dev)
-{
- struct i2c_client *client = to_i2c_client(dev);
- struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
-
- if (device_may_wakeup(dev))
- disable_irq_wake(chip->irqnum);
-
- return 0;
-}
-
-static DEFINE_SIMPLE_DEV_PM_OPS(tca6416_keypad_dev_pm_ops,
- tca6416_keypad_suspend, tca6416_keypad_resume);
-
static struct i2c_driver tca6416_keypad_driver = {
.driver = {
.name = "tca6416-keypad",
- .pm = pm_sleep_ptr(&tca6416_keypad_dev_pm_ops),
},
.probe = tca6416_keypad_probe,
- .remove = tca6416_keypad_remove,
.id_table = tca6416_id,
};
timer_setup(&kbc->timer, tegra_kbc_keypress_timer, 0);
- kbc->mmio = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ kbc->mmio = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(kbc->mmio))
return PTR_ERR(kbc->mmio);
#include <linux/leds.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/pm.h>
#include <linux/regulator/consumer.h>
module will be called iqs626a.
config INPUT_IQS7222
- tristate "Azoteq IQS7222A/B/C capacitive touch controller"
+ tristate "Azoteq IQS7222A/B/C/D capacitive touch controller"
depends on I2C
help
- Say Y to enable support for the Azoteq IQS7222A/B/C family
+ Say Y to enable support for the Azoteq IQS7222A/B/C/D family
of capacitive touch controllers.
To compile this driver as a module, choose M here: the
-/**
+// SPDX-License-Identifier: GPL-2.0-only
+/*
* CPCAP Power Button Input Driver
*
* Copyright (C) 2017 Sebastian Reichel <sre@kernel.org>
- *
- * This file is subject to the terms and conditions of the GNU General
- * Public License. See the file "COPYING" in the main directory of this
- * archive for more details.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
#include <linux/workqueue.h>
#include <linux/regmap.h>
#include <linux/of.h>
return error;
}
+ error = dev_pm_set_wake_irq(&pdev->dev, irq);
+ if (error)
+ dev_warn(&pdev->dev,
+ "Failed to set IRQ %d as a wake IRQ: %d\n",
+ irq, error);
+ else
+ device_init_wakeup(&pdev->dev, true);
+
error = input_register_device(onkey->input);
if (error) {
dev_err(&pdev->dev,
#include <linux/input.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/regulator/consumer.h>
return -ENOMEM;
vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
- err = PTR_ERR_OR_ZERO(vibrator->vcc);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to request regulator: %d\n",
- err);
- return err;
- }
+ if (IS_ERR(vibrator->vcc))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+ "Failed to request regulator\n");
vibrator->gpio = devm_gpiod_get(&pdev->dev, "enable", GPIOD_OUT_LOW);
- err = PTR_ERR_OR_ZERO(vibrator->gpio);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to request main gpio: %d\n",
- err);
- return err;
- }
+ if (IS_ERR(vibrator->gpio))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->gpio),
+ "Failed to request main gpio\n");
INIT_WORK(&vibrator->play_work, gpio_vibrator_play_work);
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of_device.h>
#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/slab.h>
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Azoteq IQS7222A/B/C Capacitive Touch Controller
+ * Azoteq IQS7222A/B/C/D Capacitive Touch Controller
*
* Copyright (C) 2022 Jeff LaBundy <jeff@labundy.com>
*/
#include <linux/gpio/consumer.h>
#include <linux/i2c.h>
#include <linux/input.h>
+#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/ktime.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/property.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
#define IQS7222_PROD_NUM_A 840
#define IQS7222_PROD_NUM_B 698
#define IQS7222_PROD_NUM_C 863
+#define IQS7222_PROD_NUM_D 1046
#define IQS7222_SYS_STATUS 0x10
#define IQS7222_SYS_STATUS_RESET BIT(3)
#define IQS7222_EVENT_MASK_ATI BIT(12)
#define IQS7222_EVENT_MASK_SLDR BIT(10)
+#define IQS7222_EVENT_MASK_TPAD IQS7222_EVENT_MASK_SLDR
#define IQS7222_EVENT_MASK_TOUCH BIT(1)
#define IQS7222_EVENT_MASK_PROX BIT(0)
#define IQS7222_MAX_COLS_CHAN 6
#define IQS7222_MAX_COLS_FILT 2
#define IQS7222_MAX_COLS_SLDR 11
+#define IQS7222_MAX_COLS_TPAD 24
#define IQS7222_MAX_COLS_GPIO 3
#define IQS7222_MAX_COLS_SYS 13
IQS7222_REG_GRP_BTN,
IQS7222_REG_GRP_CHAN,
IQS7222_REG_GRP_SLDR,
+ IQS7222_REG_GRP_TPAD,
IQS7222_REG_GRP_GPIO,
IQS7222_REG_GRP_SYS,
IQS7222_NUM_REG_GRPS
};
static const char * const iqs7222_reg_grp_names[IQS7222_NUM_REG_GRPS] = {
- [IQS7222_REG_GRP_CYCLE] = "cycle",
- [IQS7222_REG_GRP_CHAN] = "channel",
- [IQS7222_REG_GRP_SLDR] = "slider",
- [IQS7222_REG_GRP_GPIO] = "gpio",
+ [IQS7222_REG_GRP_CYCLE] = "cycle-%d",
+ [IQS7222_REG_GRP_CHAN] = "channel-%d",
+ [IQS7222_REG_GRP_SLDR] = "slider-%d",
+ [IQS7222_REG_GRP_TPAD] = "trackpad",
+ [IQS7222_REG_GRP_GPIO] = "gpio-%d",
};
static const unsigned int iqs7222_max_cols[IQS7222_NUM_REG_GRPS] = {
[IQS7222_REG_GRP_CHAN] = IQS7222_MAX_COLS_CHAN,
[IQS7222_REG_GRP_FILT] = IQS7222_MAX_COLS_FILT,
[IQS7222_REG_GRP_SLDR] = IQS7222_MAX_COLS_SLDR,
+ [IQS7222_REG_GRP_TPAD] = IQS7222_MAX_COLS_TPAD,
[IQS7222_REG_GRP_GPIO] = IQS7222_MAX_COLS_GPIO,
[IQS7222_REG_GRP_SYS] = IQS7222_MAX_COLS_SYS,
};
struct iqs7222_event_desc {
const char *name;
+ u16 link;
u16 mask;
u16 val;
+ u16 strict;
u16 enable;
enum iqs7222_reg_key_id reg_key;
};
},
};
+static const struct iqs7222_event_desc iqs7222_tp_events[] = {
+ {
+ .name = "event-press",
+ .link = BIT(7),
+ },
+ {
+ .name = "event-tap",
+ .link = BIT(0),
+ .mask = BIT(0),
+ .val = BIT(0),
+ .enable = BIT(0),
+ .reg_key = IQS7222_REG_KEY_TAP,
+ },
+ {
+ .name = "event-swipe-x-pos",
+ .link = BIT(2),
+ .mask = BIT(2) | BIT(1),
+ .val = BIT(2),
+ .strict = BIT(4),
+ .enable = BIT(1),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-swipe-y-pos",
+ .link = BIT(3),
+ .mask = BIT(3) | BIT(1),
+ .val = BIT(3),
+ .strict = BIT(3),
+ .enable = BIT(1),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-swipe-x-neg",
+ .link = BIT(4),
+ .mask = BIT(4) | BIT(1),
+ .val = BIT(4),
+ .strict = BIT(4),
+ .enable = BIT(1),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-swipe-y-neg",
+ .link = BIT(5),
+ .mask = BIT(5) | BIT(1),
+ .val = BIT(5),
+ .strict = BIT(3),
+ .enable = BIT(1),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-flick-x-pos",
+ .link = BIT(2),
+ .mask = BIT(2) | BIT(1),
+ .val = BIT(2) | BIT(1),
+ .strict = BIT(4),
+ .enable = BIT(2),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-flick-y-pos",
+ .link = BIT(3),
+ .mask = BIT(3) | BIT(1),
+ .val = BIT(3) | BIT(1),
+ .strict = BIT(3),
+ .enable = BIT(2),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-flick-x-neg",
+ .link = BIT(4),
+ .mask = BIT(4) | BIT(1),
+ .val = BIT(4) | BIT(1),
+ .strict = BIT(4),
+ .enable = BIT(2),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+ {
+ .name = "event-flick-y-neg",
+ .link = BIT(5),
+ .mask = BIT(5) | BIT(1),
+ .val = BIT(5) | BIT(1),
+ .strict = BIT(3),
+ .enable = BIT(2),
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ },
+};
+
struct iqs7222_reg_grp_desc {
u16 base;
int num_row;
},
},
},
+ {
+ .prod_num = IQS7222_PROD_NUM_D,
+ .fw_major = 0,
+ .fw_minor = 37,
+ .touch_link = 1770,
+ .allow_offset = 9,
+ .event_offset = 10,
+ .comms_offset = 11,
+ .reg_grps = {
+ [IQS7222_REG_GRP_STAT] = {
+ .base = IQS7222_SYS_STATUS,
+ .num_row = 1,
+ .num_col = 7,
+ },
+ [IQS7222_REG_GRP_CYCLE] = {
+ .base = 0x8000,
+ .num_row = 7,
+ .num_col = 2,
+ },
+ [IQS7222_REG_GRP_GLBL] = {
+ .base = 0x8700,
+ .num_row = 1,
+ .num_col = 3,
+ },
+ [IQS7222_REG_GRP_BTN] = {
+ .base = 0x9000,
+ .num_row = 14,
+ .num_col = 3,
+ },
+ [IQS7222_REG_GRP_CHAN] = {
+ .base = 0xA000,
+ .num_row = 14,
+ .num_col = 4,
+ },
+ [IQS7222_REG_GRP_FILT] = {
+ .base = 0xAE00,
+ .num_row = 1,
+ .num_col = 2,
+ },
+ [IQS7222_REG_GRP_TPAD] = {
+ .base = 0xB000,
+ .num_row = 1,
+ .num_col = 24,
+ },
+ [IQS7222_REG_GRP_GPIO] = {
+ .base = 0xC000,
+ .num_row = 3,
+ .num_col = 3,
+ },
+ [IQS7222_REG_GRP_SYS] = {
+ .base = IQS7222_SYS_SETUP,
+ .num_row = 1,
+ .num_col = 12,
+ },
+ },
+ },
};
struct iqs7222_prop_desc {
.label = "maximum gesture time",
},
{
+ .name = "azoteq,num-rows",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 0,
+ .reg_shift = 4,
+ .reg_width = 4,
+ .val_min = 1,
+ .val_max = 12,
+ .label = "number of rows",
+ },
+ {
+ .name = "azoteq,num-cols",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 0,
+ .reg_shift = 0,
+ .reg_width = 4,
+ .val_min = 1,
+ .val_max = 12,
+ .label = "number of columns",
+ },
+ {
+ .name = "azoteq,lower-cal-y",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 1,
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "lower vertical calibration",
+ },
+ {
+ .name = "azoteq,lower-cal-x",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 1,
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "lower horizontal calibration",
+ },
+ {
+ .name = "azoteq,upper-cal-y",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 2,
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "upper vertical calibration",
+ },
+ {
+ .name = "azoteq,upper-cal-x",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 2,
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "upper horizontal calibration",
+ },
+ {
+ .name = "azoteq,top-speed",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 3,
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_pitch = 4,
+ .label = "top speed",
+ },
+ {
+ .name = "azoteq,bottom-speed",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_offset = 3,
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "bottom speed",
+ },
+ {
+ .name = "azoteq,gesture-min-ms",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_key = IQS7222_REG_KEY_TAP,
+ .reg_offset = 20,
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_pitch = 16,
+ .label = "minimum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-max-ms",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ .reg_offset = 21,
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_pitch = 16,
+ .label = "maximum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-max-ms",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_key = IQS7222_REG_KEY_TAP,
+ .reg_offset = 21,
+ .reg_shift = 0,
+ .reg_width = 8,
+ .val_pitch = 16,
+ .label = "maximum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_key = IQS7222_REG_KEY_TAP,
+ .reg_offset = 22,
+ .reg_shift = 0,
+ .reg_width = 16,
+ .label = "gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_grp = IQS7222_REG_GRP_TPAD,
+ .reg_key = IQS7222_REG_KEY_AXIAL,
+ .reg_offset = 23,
+ .reg_shift = 0,
+ .reg_width = 16,
+ .label = "gesture distance",
+ },
+ {
.name = "drive-open-drain",
.reg_grp = IQS7222_REG_GRP_GPIO,
.reg_offset = 0,
struct gpio_desc *irq_gpio;
struct i2c_client *client;
struct input_dev *keypad;
+ struct touchscreen_properties prop;
unsigned int kp_type[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
unsigned int kp_code[IQS7222_MAX_CHAN][ARRAY_SIZE(iqs7222_kp_events)];
unsigned int sl_code[IQS7222_MAX_SLDR][ARRAY_SIZE(iqs7222_sl_events)];
unsigned int sl_axis[IQS7222_MAX_SLDR];
+ unsigned int tp_code[ARRAY_SIZE(iqs7222_tp_events)];
u16 cycle_setup[IQS7222_MAX_CHAN / 2][IQS7222_MAX_COLS_CYCLE];
u16 glbl_setup[IQS7222_MAX_COLS_GLBL];
u16 btn_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_BTN];
u16 chan_setup[IQS7222_MAX_CHAN][IQS7222_MAX_COLS_CHAN];
u16 filt_setup[IQS7222_MAX_COLS_FILT];
u16 sldr_setup[IQS7222_MAX_SLDR][IQS7222_MAX_COLS_SLDR];
+ u16 tpad_setup[IQS7222_MAX_COLS_TPAD];
u16 gpio_setup[ARRAY_SIZE(iqs7222_gpio_links)][IQS7222_MAX_COLS_GPIO];
u16 sys_setup[IQS7222_MAX_COLS_SYS];
};
case IQS7222_REG_GRP_SLDR:
return iqs7222->sldr_setup[row];
+ case IQS7222_REG_GRP_TPAD:
+ return iqs7222->tpad_setup;
+
case IQS7222_REG_GRP_GPIO:
return iqs7222->gpio_setup[row];
if (error)
return error;
- sys_setup &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
- sys_setup &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
-
for (i = 0; i < IQS7222_NUM_RETRIES; i++) {
/*
* Trigger ATI from streaming and normal-power modes so that
return error;
}
- if (dir == READ)
+ if (dir == READ) {
+ iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_INTF_MODE_MASK;
+ iqs7222->sys_setup[0] &= ~IQS7222_SYS_SETUP_PWR_MODE_MASK;
return 0;
+ }
return iqs7222_ati_trigger(iqs7222);
}
ref_setup[4] = dev_desc->touch_link;
if (fwnode_property_present(chan_node, "azoteq,use-prox"))
ref_setup[4] -= 2;
+ } else if (dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row &&
+ fwnode_property_present(chan_node,
+ "azoteq,counts-filt-enable")) {
+ /*
+ * In the case of IQS7222D, however, the reference mode field
+ * is partially repurposed as a counts filter enable control.
+ */
+ chan_setup[0] |= IQS7222_CHAN_SETUP_0_REF_MODE_REF;
}
if (fwnode_property_present(chan_node, "azoteq,rx-enable")) {
IQS7222_REG_KEY_NO_WHEEL);
}
+static int iqs7222_parse_tpad(struct iqs7222_private *iqs7222,
+ struct fwnode_handle *tpad_node, int tpad_index)
+{
+ const struct iqs7222_dev_desc *dev_desc = iqs7222->dev_desc;
+ struct touchscreen_properties *prop = &iqs7222->prop;
+ struct i2c_client *client = iqs7222->client;
+ int num_chan = dev_desc->reg_grps[IQS7222_REG_GRP_CHAN].num_row;
+ int count, error, i;
+ u16 *event_mask = &iqs7222->sys_setup[dev_desc->event_offset];
+ u16 *tpad_setup = iqs7222->tpad_setup;
+ unsigned int chan_sel[12];
+
+ error = iqs7222_parse_props(iqs7222, tpad_node, tpad_index,
+ IQS7222_REG_GRP_TPAD,
+ IQS7222_REG_KEY_NONE);
+ if (error)
+ return error;
+
+ count = fwnode_property_count_u32(tpad_node, "azoteq,channel-select");
+ if (count < 0) {
+ dev_err(&client->dev, "Failed to count %s channels: %d\n",
+ fwnode_get_name(tpad_node), count);
+ return count;
+ } else if (!count || count > ARRAY_SIZE(chan_sel)) {
+ dev_err(&client->dev, "Invalid number of %s channels\n",
+ fwnode_get_name(tpad_node));
+ return -EINVAL;
+ }
+
+ error = fwnode_property_read_u32_array(tpad_node,
+ "azoteq,channel-select",
+ chan_sel, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read %s channels: %d\n",
+ fwnode_get_name(tpad_node), error);
+ return error;
+ }
+
+ tpad_setup[6] &= ~GENMASK(num_chan - 1, 0);
+
+ for (i = 0; i < ARRAY_SIZE(chan_sel); i++) {
+ tpad_setup[8 + i] = 0;
+ if (i >= count || chan_sel[i] == U8_MAX)
+ continue;
+
+ if (chan_sel[i] >= num_chan) {
+ dev_err(&client->dev, "Invalid %s channel: %u\n",
+ fwnode_get_name(tpad_node), chan_sel[i]);
+ return -EINVAL;
+ }
+
+ /*
+ * The following fields indicate which channels participate in
+ * the trackpad, as well as each channel's relative placement.
+ */
+ tpad_setup[6] |= BIT(chan_sel[i]);
+ tpad_setup[8 + i] = chan_sel[i] * 34 + 1072;
+ }
+
+ tpad_setup[7] = dev_desc->touch_link;
+ if (fwnode_property_present(tpad_node, "azoteq,use-prox"))
+ tpad_setup[7] -= 2;
+
+ for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++)
+ tpad_setup[20] &= ~(iqs7222_tp_events[i].strict |
+ iqs7222_tp_events[i].enable);
+
+ for (i = 0; i < ARRAY_SIZE(iqs7222_tp_events); i++) {
+ const char *event_name = iqs7222_tp_events[i].name;
+ struct fwnode_handle *event_node;
+
+ event_node = fwnode_get_named_child_node(tpad_node, event_name);
+ if (!event_node)
+ continue;
+
+ if (fwnode_property_present(event_node,
+ "azoteq,gesture-angle-tighten"))
+ tpad_setup[20] |= iqs7222_tp_events[i].strict;
+
+ tpad_setup[20] |= iqs7222_tp_events[i].enable;
+
+ error = iqs7222_parse_event(iqs7222, event_node, tpad_index,
+ IQS7222_REG_GRP_TPAD,
+ iqs7222_tp_events[i].reg_key,
+ iqs7222_tp_events[i].link, 1566,
+ NULL,
+ &iqs7222->tp_code[i]);
+ fwnode_handle_put(event_node);
+ if (error)
+ return error;
+
+ if (!dev_desc->event_offset)
+ continue;
+
+ /*
+ * The press/release event is determined based on whether the
+ * coordinate fields report 0xFFFF and solely relies on touch
+ * or proximity interrupts to be unmasked.
+ */
+ if (i)
+ *event_mask |= IQS7222_EVENT_MASK_TPAD;
+ else if (tpad_setup[7] == dev_desc->touch_link)
+ *event_mask |= IQS7222_EVENT_MASK_TOUCH;
+ else
+ *event_mask |= IQS7222_EVENT_MASK_PROX;
+ }
+
+ if (!iqs7222->tp_code[0])
+ return 0;
+
+ input_set_abs_params(iqs7222->keypad, ABS_X,
+ 0, (tpad_setup[4] ? : 1) - 1, 0, 0);
+
+ input_set_abs_params(iqs7222->keypad, ABS_Y,
+ 0, (tpad_setup[5] ? : 1) - 1, 0, 0);
+
+ touchscreen_parse_properties(iqs7222->keypad, false, prop);
+
+ if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+ dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+ prop->max_x, prop->max_y);
+ return -EINVAL;
+ }
+
+ tpad_setup[4] = prop->max_x + 1;
+ tpad_setup[5] = prop->max_y + 1;
+
+ return 0;
+}
+
static int (*iqs7222_parse_extra[IQS7222_NUM_REG_GRPS])
(struct iqs7222_private *iqs7222,
struct fwnode_handle *reg_grp_node,
[IQS7222_REG_GRP_CYCLE] = iqs7222_parse_cycle,
[IQS7222_REG_GRP_CHAN] = iqs7222_parse_chan,
[IQS7222_REG_GRP_SLDR] = iqs7222_parse_sldr,
+ [IQS7222_REG_GRP_TPAD] = iqs7222_parse_tpad,
};
static int iqs7222_parse_reg_grp(struct iqs7222_private *iqs7222,
if (iqs7222_reg_grp_names[reg_grp]) {
char reg_grp_name[16];
- snprintf(reg_grp_name, sizeof(reg_grp_name), "%s-%d",
+ snprintf(reg_grp_name, sizeof(reg_grp_name),
iqs7222_reg_grp_names[reg_grp], reg_grp_index);
reg_grp_node = device_get_named_child_node(&client->dev,
continue;
/*
- * The IQS7222C exposes multiple GPIO and must be informed
- * as to which GPIO this group represents.
+ * The IQS7222C and IQS7222D expose multiple GPIO and must be
+ * informed as to which GPIO this group represents.
*/
for (j = 0; j < ARRAY_SIZE(iqs7222_gpio_links); j++)
gpio_setup[0] &= ~BIT(iqs7222_gpio_links[j]);
iqs7222->sl_code[i][j], 0);
}
+ for (i = 0; i < dev_desc->reg_grps[IQS7222_REG_GRP_TPAD].num_row; i++) {
+ u16 tpad_pos_x = le16_to_cpu(status[4]);
+ u16 tpad_pos_y = le16_to_cpu(status[5]);
+ u16 state = le16_to_cpu(status[6]);
+
+ input_report_key(iqs7222->keypad, iqs7222->tp_code[0],
+ tpad_pos_x < U16_MAX);
+
+ if (tpad_pos_x < U16_MAX)
+ touchscreen_report_pos(iqs7222->keypad, &iqs7222->prop,
+ tpad_pos_x, tpad_pos_y, false);
+
+ if (!(le16_to_cpu(status[1]) & IQS7222_EVENT_MASK_TPAD))
+ continue;
+
+ /*
+ * Skip the press/release event, as it does not have separate
+ * status fields and is handled separately.
+ */
+ for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++) {
+ u16 mask = iqs7222_tp_events[j].mask;
+ u16 val = iqs7222_tp_events[j].val;
+
+ input_report_key(iqs7222->keypad,
+ iqs7222->tp_code[j],
+ (state & mask) == val);
+ }
+
+ input_sync(iqs7222->keypad);
+
+ for (j = 1; j < ARRAY_SIZE(iqs7222_tp_events); j++)
+ input_report_key(iqs7222->keypad,
+ iqs7222->tp_code[j], 0);
+ }
+
input_sync(iqs7222->keypad);
return 0;
{ .compatible = "azoteq,iqs7222a" },
{ .compatible = "azoteq,iqs7222b" },
{ .compatible = "azoteq,iqs7222c" },
+ { .compatible = "azoteq,iqs7222d" },
{ }
};
MODULE_DEVICE_TABLE(of, iqs7222_of_match);
module_i2c_driver(iqs7222_i2c_driver);
MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
-MODULE_DESCRIPTION("Azoteq IQS7222A/B/C Capacitive Touch Controller");
+MODULE_DESCRIPTION("Azoteq IQS7222A/B/C/D Capacitive Touch Controller");
MODULE_LICENSE("GPL");
#include <linux/delay.h>
#include <linux/i2c.h>
#include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
#define MMA8450_DRV_NAME "mma8450"
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/regmap.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <linux/regmap.h>
#include <linux/log2.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#define PON_CNTL_1 0x1C
#define PON_CNTL_PULL_UP BIT(7)
return -ENOMEM;
beeper->pwm = devm_pwm_get(dev, NULL);
- if (IS_ERR(beeper->pwm)) {
- error = PTR_ERR(beeper->pwm);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to request PWM device: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(beeper->pwm))
+ return dev_err_probe(dev, PTR_ERR(beeper->pwm), "Failed to request PWM device\n");
/* Sync up PWM state and ensure it is off. */
pwm_init_state(beeper->pwm, &state);
}
beeper->amplifier = devm_regulator_get(dev, "amp");
- if (IS_ERR(beeper->amplifier)) {
- error = PTR_ERR(beeper->amplifier);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get 'amp' regulator: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(beeper->amplifier))
+ return dev_err_probe(dev, PTR_ERR(beeper->amplifier),
+ "Failed to get 'amp' regulator\n");
INIT_WORK(&beeper->work, pwm_beeper_work);
#include <linux/input.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/property.h>
#include <linux/pwm.h>
return -ENOMEM;
vibrator->vcc = devm_regulator_get(&pdev->dev, "vcc");
- err = PTR_ERR_OR_ZERO(vibrator->vcc);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to request regulator: %d\n",
- err);
- return err;
- }
+ if (IS_ERR(vibrator->vcc))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->vcc),
+ "Failed to request regulator\n");
vibrator->enable_gpio = devm_gpiod_get_optional(&pdev->dev, "enable",
GPIOD_OUT_LOW);
- err = PTR_ERR_OR_ZERO(vibrator->enable_gpio);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to request enable gpio: %d\n",
- err);
- return err;
- }
+ if (IS_ERR(vibrator->enable_gpio))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->enable_gpio),
+ "Failed to request enable gpio\n");
vibrator->pwm = devm_pwm_get(&pdev->dev, "enable");
- err = PTR_ERR_OR_ZERO(vibrator->pwm);
- if (err) {
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "Failed to request main pwm: %d\n",
- err);
- return err;
- }
+ if (IS_ERR(vibrator->pwm))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vibrator->pwm),
+ "Failed to request main pwm\n");
INIT_WORK(&vibrator->play_work, pwm_vibrator_play_work);
device_property_read_bool(dev, "rotary-encoder,relative-axis");
encoder->gpios = devm_gpiod_get_array(dev, NULL, GPIOD_IN);
- if (IS_ERR(encoder->gpios)) {
- err = PTR_ERR(encoder->gpios);
- if (err != -EPROBE_DEFER)
- dev_err(dev, "unable to get gpios: %d\n", err);
- return err;
- }
+ if (IS_ERR(encoder->gpios))
+ return dev_err_probe(dev, PTR_ERR(encoder->gpios), "unable to get gpios\n");
if (encoder->gpios->ndescs < 2) {
dev_err(dev, "not enough gpios found\n");
return -EINVAL;
input->name = pdev->name;
input->id.bustype = BUS_HOST;
- input->dev.parent = dev;
if (encoder->relative_axis)
input_set_capability(input, EV_REL, encoder->axis);
#include <linux/module.h>
#include <linux/init.h>
#include <linux/input.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
#include <asm/io.h>
mutex_init(&data->sysfs_mutex);
data->vcc = devm_regulator_get(dev, "vcc");
- if (IS_ERR(data->vcc)) {
- error = PTR_ERR(data->vcc);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get 'vcc' regulator: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(data->vcc))
+ return dev_err_probe(dev, PTR_ERR(data->vcc), "Failed to get 'vcc' regulator\n");
error = regulator_enable(data->vcc);
if (error) {
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/delay.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/libps2.h>
return PSMOUSE_FULL_PACKET;
}
-static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+static void psmouse_activate_smbus_mode(struct psmouse_smbus_dev *smbdev)
{
- struct psmouse_smbus_dev *smbdev = psmouse->private;
-
- if (smbdev->need_deactivate)
- psmouse_deactivate(psmouse);
+ if (smbdev->need_deactivate) {
+ psmouse_deactivate(smbdev->psmouse);
+ /* Give the device time to switch into SMBus mode */
+ msleep(30);
+ }
+}
+static int psmouse_smbus_reconnect(struct psmouse *psmouse)
+{
+ psmouse_activate_smbus_mode(psmouse->private);
return 0;
}
}
}
- if (need_deactivate)
- psmouse_deactivate(psmouse);
+ psmouse_activate_smbus_mode(smbdev);
psmouse->private = smbdev;
psmouse->protocol_handler = psmouse_smbus_process_byte;
* Contributors: Daniel Hellstrom <daniel@gaisler.com>
*/
#include <linux/platform_device.h>
-#include <linux/of_device.h>
#include <linux/module.h>
#include <linux/serio.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/device.h>
#include <linux/delay.h>
.driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS |
SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP)
},
+ /* See comment on TUXEDO InfinityBook S17 Gen6 / Clevo NS70MU above */
+ {
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "PD5x_7xPNP_PNR_PNN_PNT"),
+ },
+ .driver_data = (void *)(SERIO_QUIRK_NOAUX)
+ },
{
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "X170SM"),
#ifndef _I8042_SPARCIO_H
#define _I8042_SPARCIO_H
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <asm/io.h>
int tx_irq, rx_irq;
rx_irq = platform_get_irq(dev, 0);
- if (rx_irq <= 0)
- return rx_irq < 0 ? rx_irq : -ENXIO;
+ if (rx_irq < 0)
+ return rx_irq;
tx_irq = platform_get_irq(dev, 1);
- if (tx_irq <= 0)
- return tx_irq < 0 ? tx_irq : -ENXIO;
+ if (tx_irq < 0)
+ return tx_irq;
serio = kzalloc(sizeof(struct serio), GFP_KERNEL);
rpckbd = kzalloc(sizeof(*rpckbd), GFP_KERNEL);
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#define DRIVER_NAME "xilinx_ps2"
module will be called mtouch.
config TOUCHSCREEN_NOVATEK_NVT_TS
- tristate "Novatek NVT-ts touchscreen support"
+ tristate "Novatek NT11205 touchscreen support"
depends on I2C
help
- Say Y here if you have a Novatek NVT-ts touchscreen.
+ Say Y here if you have a Novatek NT11205 touchscreen.
If unsure, say N.
To compile this driver as a module, choose M here: the
To compile this driver as a module, choose M here: the
module will be called iqs5xx.
+config TOUCHSCREEN_IQS7211
+ tristate "Azoteq IQS7210A/7211A/E trackpad/touchscreen controller"
+ depends on I2C
+ help
+ Say Y to enable support for the Azoteq IQS7210A/7211A/E
+ family of trackpad/touchscreen controllers.
+
+ To compile this driver as a module, choose M here: the
+ module will be called iqs7211.
+
config TOUCHSCREEN_ZINITIX
tristate "Zinitix touchscreen support"
depends on I2C
obj-$(CONFIG_TOUCHSCREEN_ROHM_BU21023) += rohm_bu21023.o
obj-$(CONFIG_TOUCHSCREEN_RASPBERRYPI_FW) += raspberrypi-ts.o
obj-$(CONFIG_TOUCHSCREEN_IQS5XX) += iqs5xx.o
+obj-$(CONFIG_TOUCHSCREEN_IQS7211) += iqs7211.o
obj-$(CONFIG_TOUCHSCREEN_ZINITIX) += zinitix.o
obj-$(CONFIG_TOUCHSCREEN_HIMAX_HX83112B) += himax_hx83112b.o
struct input_dev *in_dev;
struct input_absinfo *info;
u32 max_x = 0, max_y = 0;
+ struct device *dev = &client->dev;
int error;
if (!i2c_check_functionality(client->adapter,
I2C_FUNC_SMBUS_BYTE_DATA)) {
- dev_err(&client->dev, "i2c smbus byte data not supported\n");
+ dev_err(dev, "i2c smbus byte data not supported\n");
return -EIO;
}
if (!client->irq) {
- dev_err(&client->dev, "No IRQ set up\n");
+ dev_err(dev, "No IRQ set up\n");
return -EINVAL;
}
- ts = devm_kzalloc(&client->dev, sizeof(*ts), GFP_KERNEL);
+ ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL);
if (!ts)
return -ENOMEM;
ts->client = client;
- ts->x_flip = device_property_read_bool(&client->dev, "rohm,flip-x");
- ts->y_flip = device_property_read_bool(&client->dev, "rohm,flip-y");
+ ts->x_flip = device_property_read_bool(dev, "rohm,flip-x");
+ ts->y_flip = device_property_read_bool(dev, "rohm,flip-y");
- in_dev = devm_input_allocate_device(&client->dev);
+ in_dev = devm_input_allocate_device(dev);
if (!in_dev) {
- dev_err(&client->dev, "device memory alloc failed\n");
+ dev_err(dev, "device memory alloc failed\n");
return -ENOMEM;
}
ts->in_dev = in_dev;
in_dev->name = DRIVER_TP;
in_dev->id.bustype = BUS_I2C;
- device_property_read_u32(&client->dev, "rohm,touch-max-x", &max_x);
- device_property_read_u32(&client->dev, "rohm,touch-max-y", &max_y);
+ device_property_read_u32(dev, "rohm,touch-max-x", &max_x);
+ device_property_read_u32(dev, "rohm,touch-max-y", &max_y);
input_set_abs_params(in_dev, ABS_MT_POSITION_X, 0, max_x, 0, 0);
input_set_abs_params(in_dev, ABS_MT_POSITION_Y, 0, max_y, 0, 0);
/* Adjust for the legacy "flip" properties, if present */
if (!ts->props.invert_x &&
- device_property_read_bool(&client->dev, "rohm,flip-x")) {
+ device_property_read_bool(dev, "rohm,flip-x")) {
info = &in_dev->absinfo[ABS_MT_POSITION_X];
info->maximum -= info->minimum;
info->minimum = 0;
}
if (!ts->props.invert_y &&
- device_property_read_bool(&client->dev, "rohm,flip-y")) {
+ device_property_read_bool(dev, "rohm,flip-y")) {
info = &in_dev->absinfo[ABS_MT_POSITION_Y];
info->maximum -= info->minimum;
info->minimum = 0;
INPUT_MT_DIRECT | INPUT_MT_TRACK |
INPUT_MT_DROP_UNUSED);
if (error) {
- dev_err(&client->dev, "failed to initialize MT slots");
+ dev_err(dev, "failed to initialize MT slots");
return error;
}
- ts->regulator = devm_regulator_get(&client->dev, "avdd");
+ ts->regulator = devm_regulator_get(dev, "avdd");
if (IS_ERR(ts->regulator)) {
- dev_err(&client->dev, "regulator_get failed\n");
+ dev_err(dev, "regulator_get failed\n");
return PTR_ERR(ts->regulator);
}
error = regulator_enable(ts->regulator);
if (error) {
- dev_err(&client->dev, "regulator enable failed\n");
+ dev_err(dev, "regulator enable failed\n");
return error;
}
- error = devm_add_action_or_reset(&client->dev, bu21013_power_off, ts);
+ error = devm_add_action_or_reset(dev, bu21013_power_off, ts);
if (error) {
- dev_err(&client->dev, "failed to install power off handler\n");
+ dev_err(dev, "failed to install power off handler\n");
return error;
}
/* Named "CS" on the chip, DT binding is "reset" */
- ts->cs_gpiod = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
- error = PTR_ERR_OR_ZERO(ts->cs_gpiod);
- if (error) {
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev, "failed to get CS GPIO\n");
- return error;
- }
+ ts->cs_gpiod = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(ts->cs_gpiod))
+ return dev_err_probe(dev, PTR_ERR(ts->cs_gpiod), "failed to get CS GPIO\n");
+
gpiod_set_consumer_name(ts->cs_gpiod, "BU21013 CS");
- error = devm_add_action_or_reset(&client->dev,
- bu21013_disable_chip, ts);
+ error = devm_add_action_or_reset(dev, bu21013_disable_chip, ts);
if (error) {
- dev_err(&client->dev,
- "failed to install chip disable handler\n");
+ dev_err(dev, "failed to install chip disable handler\n");
return error;
}
/* Named "INT" on the chip, DT binding is "touch" */
- ts->int_gpiod = devm_gpiod_get_optional(&client->dev,
- "touch", GPIOD_IN);
+ ts->int_gpiod = devm_gpiod_get_optional(dev, "touch", GPIOD_IN);
error = PTR_ERR_OR_ZERO(ts->int_gpiod);
- if (error) {
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev, "failed to get INT GPIO\n");
- return error;
- }
+ if (error)
+ return dev_err_probe(dev, error, "failed to get INT GPIO\n");
if (ts->int_gpiod)
gpiod_set_consumer_name(ts->int_gpiod, "BU21013 INT");
/* configure the touch panel controller */
error = bu21013_init_chip(ts);
if (error) {
- dev_err(&client->dev, "error in bu21013 config\n");
+ dev_err(dev, "error in bu21013 config\n");
return error;
}
- error = devm_request_threaded_irq(&client->dev, client->irq,
- NULL, bu21013_gpio_irq,
+ error = devm_request_threaded_irq(dev, client->irq, NULL, bu21013_gpio_irq,
IRQF_ONESHOT, DRIVER_TP, ts);
if (error) {
- dev_err(&client->dev, "request irq %d failed\n",
- client->irq);
+ dev_err(dev, "request irq %d failed\n", client->irq);
return error;
}
error = input_register_device(in_dev);
if (error) {
- dev_err(&client->dev, "failed to register input device\n");
+ dev_err(dev, "failed to register input device\n");
return error;
}
static int bu21029_probe(struct i2c_client *client)
{
+ struct device *dev = &client->dev;
struct bu21029_ts_data *bu21029;
struct input_dev *in_dev;
int error;
I2C_FUNC_SMBUS_WRITE_BYTE |
I2C_FUNC_SMBUS_WRITE_BYTE_DATA |
I2C_FUNC_SMBUS_READ_I2C_BLOCK)) {
- dev_err(&client->dev,
- "i2c functionality support is not sufficient\n");
+ dev_err(dev, "i2c functionality support is not sufficient\n");
return -EIO;
}
- bu21029 = devm_kzalloc(&client->dev, sizeof(*bu21029), GFP_KERNEL);
+ bu21029 = devm_kzalloc(dev, sizeof(*bu21029), GFP_KERNEL);
if (!bu21029)
return -ENOMEM;
- error = device_property_read_u32(&client->dev, "rohm,x-plate-ohms",
- &bu21029->x_plate_ohms);
+ error = device_property_read_u32(dev, "rohm,x-plate-ohms", &bu21029->x_plate_ohms);
if (error) {
- dev_err(&client->dev,
- "invalid 'x-plate-ohms' supplied: %d\n", error);
+ dev_err(dev, "invalid 'x-plate-ohms' supplied: %d\n", error);
return error;
}
- bu21029->vdd = devm_regulator_get(&client->dev, "vdd");
- if (IS_ERR(bu21029->vdd)) {
- error = PTR_ERR(bu21029->vdd);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "failed to acquire 'vdd' supply: %d\n", error);
- return error;
- }
+ bu21029->vdd = devm_regulator_get(dev, "vdd");
+ if (IS_ERR(bu21029->vdd))
+ return dev_err_probe(dev, PTR_ERR(bu21029->vdd),
+ "failed to acquire 'vdd' supply\n");
- bu21029->reset_gpios = devm_gpiod_get_optional(&client->dev,
- "reset", GPIOD_OUT_HIGH);
- if (IS_ERR(bu21029->reset_gpios)) {
- error = PTR_ERR(bu21029->reset_gpios);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "failed to acquire 'reset' gpio: %d\n", error);
- return error;
- }
+ bu21029->reset_gpios = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+ if (IS_ERR(bu21029->reset_gpios))
+ return dev_err_probe(dev, PTR_ERR(bu21029->reset_gpios),
+ "failed to acquire 'reset' gpio\n");
- in_dev = devm_input_allocate_device(&client->dev);
+ in_dev = devm_input_allocate_device(dev);
if (!in_dev) {
- dev_err(&client->dev, "unable to allocate input device\n");
+ dev_err(dev, "unable to allocate input device\n");
return -ENOMEM;
}
input_set_drvdata(in_dev, bu21029);
- error = devm_request_threaded_irq(&client->dev, client->irq,
- NULL, bu21029_touch_soft_irq,
+ error = devm_request_threaded_irq(dev, client->irq, NULL,
+ bu21029_touch_soft_irq,
IRQF_ONESHOT | IRQF_NO_AUTOEN,
DRIVER_NAME, bu21029);
if (error) {
- dev_err(&client->dev,
- "unable to request touch irq: %d\n", error);
+ dev_err(dev, "unable to request touch irq: %d\n", error);
return error;
}
error = input_register_device(in_dev);
if (error) {
- dev_err(&client->dev,
- "unable to register input device: %d\n", error);
+ dev_err(dev, "unable to register input device: %d\n", error);
return error;
}
return -ENOMEM;
data->wake_gpio = devm_gpiod_get(dev, "wake", GPIOD_OUT_LOW);
- if (IS_ERR(data->wake_gpio)) {
- error = PTR_ERR(data->wake_gpio);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Error getting wake gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(data->wake_gpio))
+ return dev_err_probe(dev, PTR_ERR(data->wake_gpio), "Error getting wake gpio\n");
input = devm_input_allocate_device(dev);
if (!input)
ts->regulators[1].supply = "vdd";
error = devm_regulator_bulk_get(dev, ARRAY_SIZE(ts->regulators),
ts->regulators);
- if (error) {
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get regulators %d\n",
- error);
- return error;
- }
+ if (error)
+ return dev_err_probe(dev, error, "Failed to get regulators\n");
error = cy8ctma140_power_up(ts);
if (error)
#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/regmap.h>
#include <asm/unaligned.h>
tsdata->max_support_points = chip_data->max_support_points;
tsdata->vcc = devm_regulator_get(&client->dev, "vcc");
- if (IS_ERR(tsdata->vcc)) {
- error = PTR_ERR(tsdata->vcc);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "failed to request regulator: %d\n", error);
- return error;
- }
+ if (IS_ERR(tsdata->vcc))
+ return dev_err_probe(&client->dev, PTR_ERR(tsdata->vcc),
+ "failed to request regulator\n");
tsdata->iovcc = devm_regulator_get(&client->dev, "iovcc");
if (IS_ERR(tsdata->iovcc)) {
/* This requests the gpio *and* turns on the touchscreen controller */
ts->power_gpios = devm_gpiod_get(dev, "power", GPIOD_OUT_HIGH);
- if (IS_ERR(ts->power_gpios)) {
- error = PTR_ERR(ts->power_gpios);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Error getting power gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->power_gpios))
+ return dev_err_probe(dev, PTR_ERR(ts->power_gpios), "Error getting power gpio\n");
input = devm_input_allocate_device(dev);
if (!input)
i2c_set_clientdata(client, ts);
ts->vcc33 = devm_regulator_get(&client->dev, "vcc33");
- if (IS_ERR(ts->vcc33)) {
- error = PTR_ERR(ts->vcc33);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get 'vcc33' regulator: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(ts->vcc33))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->vcc33),
+ "Failed to get 'vcc33' regulator\n");
ts->vccio = devm_regulator_get(&client->dev, "vccio");
- if (IS_ERR(ts->vccio)) {
- error = PTR_ERR(ts->vccio);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get 'vccio' regulator: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(ts->vccio))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+ "Failed to get 'vccio' regulator\n");
ts->reset_gpio = devm_gpiod_get(&client->dev, "reset", GPIOD_OUT_HIGH);
if (IS_ERR(ts->reset_gpio)) {
* minimal implementation based on egalax_ts.c and egalax_i2c.c
*/
+#include <linux/acpi.h>
#include <linux/bitops.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/regulator/consumer.h>
#include <linux/sizes.h>
#include <linux/timer.h>
#include <asm/unaligned.h>
if (IS_ERR(data->reset))
return PTR_ERR(data->reset);
+ /* For proper reset sequence, enable power while reset asserted */
+ error = devm_regulator_get_enable(&client->dev, "vdd");
+ if (error && error != -ENODEV)
+ return dev_err_probe(&client->dev, error,
+ "failed to request vdd regulator\n");
+
if (data->reset) {
msleep(EXC3000_RESET_MS);
gpiod_set_value_cansleep(data->reset, 0);
MODULE_DEVICE_TABLE(of, exc3000_of_match);
#endif
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id exc3000_acpi_match[] = {
+ { "EGA00001", .driver_data = (kernel_ulong_t)&exc3000_info[EETI_EXC80H60] },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, exc3000_acpi_match);
+#endif
+
static struct i2c_driver exc3000_driver = {
.driver = {
.name = "exc3000",
.of_match_table = of_match_ptr(exc3000_of_match),
+ .acpi_match_table = ACPI_PTR(exc3000_acpi_match),
},
.id_table = exc3000_id,
.probe = exc3000_probe,
*/
static int goodix_get_gpio_config(struct goodix_ts_data *ts)
{
- int error;
struct device *dev;
struct gpio_desc *gpiod;
bool added_acpi_mappings = false;
ts->gpiod_rst_flags = GPIOD_IN;
ts->avdd28 = devm_regulator_get(dev, "AVDD28");
- if (IS_ERR(ts->avdd28)) {
- error = PTR_ERR(ts->avdd28);
- if (error != -EPROBE_DEFER)
- dev_err(dev,
- "Failed to get AVDD28 regulator: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->avdd28))
+ return dev_err_probe(dev, PTR_ERR(ts->avdd28), "Failed to get AVDD28 regulator\n");
ts->vddio = devm_regulator_get(dev, "VDDIO");
- if (IS_ERR(ts->vddio)) {
- error = PTR_ERR(ts->vddio);
- if (error != -EPROBE_DEFER)
- dev_err(dev,
- "Failed to get VDDIO regulator: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->vddio))
+ return dev_err_probe(dev, PTR_ERR(ts->vddio), "Failed to get VDDIO regulator\n");
retry_get_irq_gpio:
/* Get the interrupt GPIO pin number */
gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_INT_NAME, GPIOD_IN);
- if (IS_ERR(gpiod)) {
- error = PTR_ERR(gpiod);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get %s GPIO: %d\n",
- GOODIX_GPIO_INT_NAME, error);
- return error;
- }
+ if (IS_ERR(gpiod))
+ return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+ GOODIX_GPIO_INT_NAME);
+
if (!gpiod && has_acpi_companion(dev) && !added_acpi_mappings) {
added_acpi_mappings = true;
if (goodix_add_acpi_gpio_mappings(ts) == 0)
/* Get the reset line GPIO pin number */
gpiod = devm_gpiod_get_optional(dev, GOODIX_GPIO_RST_NAME, ts->gpiod_rst_flags);
- if (IS_ERR(gpiod)) {
- error = PTR_ERR(gpiod);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get %s GPIO: %d\n",
- GOODIX_GPIO_RST_NAME, error);
- return error;
- }
+ if (IS_ERR(gpiod))
+ return dev_err_probe(dev, PTR_ERR(gpiod), "Failed to get %s GPIO\n",
+ GOODIX_GPIO_RST_NAME);
ts->gpiod_rst = gpiod;
static const struct acpi_device_id goodix_acpi_match[] = {
{ "GDIX1001", 0 },
{ "GDIX1002", 0 },
+ { "GDX9110", 0 },
{ }
};
MODULE_DEVICE_TABLE(acpi, goodix_acpi_match);
#include <linux/input/mt.h>
#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller
+ *
+ * Copyright (C) 2023 Jeff LaBundy <jeff@labundy.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/slab.h>
+#include <asm/unaligned.h>
+
+#define IQS7211_PROD_NUM 0x00
+
+#define IQS7211_EVENT_MASK_ALL GENMASK(14, 8)
+#define IQS7211_EVENT_MASK_ALP BIT(13)
+#define IQS7211_EVENT_MASK_BTN BIT(12)
+#define IQS7211_EVENT_MASK_ATI BIT(11)
+#define IQS7211_EVENT_MASK_MOVE BIT(10)
+#define IQS7211_EVENT_MASK_GSTR BIT(9)
+#define IQS7211_EVENT_MODE BIT(8)
+
+#define IQS7211_COMMS_ERROR 0xEEEE
+#define IQS7211_COMMS_RETRY_MS 50
+#define IQS7211_COMMS_SLEEP_US 100
+#define IQS7211_COMMS_TIMEOUT_US (100 * USEC_PER_MSEC)
+#define IQS7211_RESET_TIMEOUT_MS 150
+#define IQS7211_START_TIMEOUT_US (1 * USEC_PER_SEC)
+
+#define IQS7211_NUM_RETRIES 5
+#define IQS7211_NUM_CRX 8
+#define IQS7211_MAX_CTX 13
+
+#define IQS7211_MAX_CONTACTS 2
+#define IQS7211_MAX_CYCLES 21
+
+/*
+ * The following delay is used during instances that must wait for the open-
+ * drain RDY pin to settle. Its value is calculated as 5*R*C, where R and C
+ * represent typical datasheet values of 4.7k and 100 nF, respectively.
+ */
+#define iqs7211_irq_wait() usleep_range(2500, 2600)
+
+enum iqs7211_dev_id {
+ IQS7210A,
+ IQS7211A,
+ IQS7211E,
+};
+
+enum iqs7211_comms_mode {
+ IQS7211_COMMS_MODE_WAIT,
+ IQS7211_COMMS_MODE_FREE,
+ IQS7211_COMMS_MODE_FORCE,
+};
+
+struct iqs7211_reg_field_desc {
+ struct list_head list;
+ u8 addr;
+ u16 mask;
+ u16 val;
+};
+
+enum iqs7211_reg_key_id {
+ IQS7211_REG_KEY_NONE,
+ IQS7211_REG_KEY_PROX,
+ IQS7211_REG_KEY_TOUCH,
+ IQS7211_REG_KEY_TAP,
+ IQS7211_REG_KEY_HOLD,
+ IQS7211_REG_KEY_PALM,
+ IQS7211_REG_KEY_AXIAL_X,
+ IQS7211_REG_KEY_AXIAL_Y,
+ IQS7211_REG_KEY_RESERVED
+};
+
+enum iqs7211_reg_grp_id {
+ IQS7211_REG_GRP_TP,
+ IQS7211_REG_GRP_BTN,
+ IQS7211_REG_GRP_ALP,
+ IQS7211_REG_GRP_SYS,
+ IQS7211_NUM_REG_GRPS
+};
+
+static const char * const iqs7211_reg_grp_names[IQS7211_NUM_REG_GRPS] = {
+ [IQS7211_REG_GRP_TP] = "trackpad",
+ [IQS7211_REG_GRP_BTN] = "button",
+ [IQS7211_REG_GRP_ALP] = "alp",
+};
+
+static const u16 iqs7211_reg_grp_masks[IQS7211_NUM_REG_GRPS] = {
+ [IQS7211_REG_GRP_TP] = IQS7211_EVENT_MASK_GSTR,
+ [IQS7211_REG_GRP_BTN] = IQS7211_EVENT_MASK_BTN,
+ [IQS7211_REG_GRP_ALP] = IQS7211_EVENT_MASK_ALP,
+};
+
+struct iqs7211_event_desc {
+ const char *name;
+ u16 mask;
+ u16 enable;
+ enum iqs7211_reg_grp_id reg_grp;
+ enum iqs7211_reg_key_id reg_key;
+};
+
+static const struct iqs7211_event_desc iqs7210a_kp_events[] = {
+ {
+ .mask = BIT(10),
+ .enable = BIT(13) | BIT(12),
+ .reg_grp = IQS7211_REG_GRP_ALP,
+ },
+ {
+ .name = "event-prox",
+ .mask = BIT(2),
+ .enable = BIT(5) | BIT(4),
+ .reg_grp = IQS7211_REG_GRP_BTN,
+ .reg_key = IQS7211_REG_KEY_PROX,
+ },
+ {
+ .name = "event-touch",
+ .mask = BIT(3),
+ .enable = BIT(5) | BIT(4),
+ .reg_grp = IQS7211_REG_GRP_BTN,
+ .reg_key = IQS7211_REG_KEY_TOUCH,
+ },
+ {
+ .name = "event-tap",
+ .mask = BIT(0),
+ .enable = BIT(0),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_TAP,
+ },
+ {
+ .name = "event-hold",
+ .mask = BIT(1),
+ .enable = BIT(1),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-swipe-x-neg",
+ .mask = BIT(2),
+ .enable = BIT(2),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-x-pos",
+ .mask = BIT(3),
+ .enable = BIT(3),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-y-pos",
+ .mask = BIT(4),
+ .enable = BIT(4),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+ {
+ .name = "event-swipe-y-neg",
+ .mask = BIT(5),
+ .enable = BIT(5),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+};
+
+static const struct iqs7211_event_desc iqs7211a_kp_events[] = {
+ {
+ .mask = BIT(14),
+ .reg_grp = IQS7211_REG_GRP_ALP,
+ },
+ {
+ .name = "event-tap",
+ .mask = BIT(0),
+ .enable = BIT(0),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_TAP,
+ },
+ {
+ .name = "event-hold",
+ .mask = BIT(1),
+ .enable = BIT(1),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-swipe-x-neg",
+ .mask = BIT(2),
+ .enable = BIT(2),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-x-pos",
+ .mask = BIT(3),
+ .enable = BIT(3),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-y-pos",
+ .mask = BIT(4),
+ .enable = BIT(4),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+ {
+ .name = "event-swipe-y-neg",
+ .mask = BIT(5),
+ .enable = BIT(5),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+};
+
+static const struct iqs7211_event_desc iqs7211e_kp_events[] = {
+ {
+ .mask = BIT(14),
+ .reg_grp = IQS7211_REG_GRP_ALP,
+ },
+ {
+ .name = "event-tap",
+ .mask = BIT(0),
+ .enable = BIT(0),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_TAP,
+ },
+ {
+ .name = "event-tap-double",
+ .mask = BIT(1),
+ .enable = BIT(1),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_TAP,
+ },
+ {
+ .name = "event-tap-triple",
+ .mask = BIT(2),
+ .enable = BIT(2),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_TAP,
+ },
+ {
+ .name = "event-hold",
+ .mask = BIT(3),
+ .enable = BIT(3),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-palm",
+ .mask = BIT(4),
+ .enable = BIT(4),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_PALM,
+ },
+ {
+ .name = "event-swipe-x-pos",
+ .mask = BIT(8),
+ .enable = BIT(8),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-x-neg",
+ .mask = BIT(9),
+ .enable = BIT(9),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ },
+ {
+ .name = "event-swipe-y-pos",
+ .mask = BIT(10),
+ .enable = BIT(10),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+ {
+ .name = "event-swipe-y-neg",
+ .mask = BIT(11),
+ .enable = BIT(11),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ },
+ {
+ .name = "event-swipe-x-pos-hold",
+ .mask = BIT(12),
+ .enable = BIT(12),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-swipe-x-neg-hold",
+ .mask = BIT(13),
+ .enable = BIT(13),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-swipe-y-pos-hold",
+ .mask = BIT(14),
+ .enable = BIT(14),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+ {
+ .name = "event-swipe-y-neg-hold",
+ .mask = BIT(15),
+ .enable = BIT(15),
+ .reg_grp = IQS7211_REG_GRP_TP,
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ },
+};
+
+struct iqs7211_dev_desc {
+ const char *tp_name;
+ const char *kp_name;
+ u16 prod_num;
+ u16 show_reset;
+ u16 ati_error[IQS7211_NUM_REG_GRPS];
+ u16 ati_start[IQS7211_NUM_REG_GRPS];
+ u16 suspend;
+ u16 ack_reset;
+ u16 comms_end;
+ u16 comms_req;
+ int charge_shift;
+ int info_offs;
+ int gesture_offs;
+ int contact_offs;
+ u8 sys_stat;
+ u8 sys_ctrl;
+ u8 alp_config;
+ u8 tp_config;
+ u8 exp_file;
+ u8 kp_enable[IQS7211_NUM_REG_GRPS];
+ u8 gesture_angle;
+ u8 rx_tx_map;
+ u8 cycle_alloc[2];
+ u8 cycle_limit[2];
+ const struct iqs7211_event_desc *kp_events;
+ int num_kp_events;
+ int min_crx_alp;
+ int num_ctx;
+};
+
+static const struct iqs7211_dev_desc iqs7211_devs[] = {
+ [IQS7210A] = {
+ .tp_name = "iqs7210a_trackpad",
+ .kp_name = "iqs7210a_keys",
+ .prod_num = 944,
+ .show_reset = BIT(15),
+ .ati_error = {
+ [IQS7211_REG_GRP_TP] = BIT(12),
+ [IQS7211_REG_GRP_BTN] = BIT(0),
+ [IQS7211_REG_GRP_ALP] = BIT(8),
+ },
+ .ati_start = {
+ [IQS7211_REG_GRP_TP] = BIT(13),
+ [IQS7211_REG_GRP_BTN] = BIT(1),
+ [IQS7211_REG_GRP_ALP] = BIT(9),
+ },
+ .suspend = BIT(11),
+ .ack_reset = BIT(7),
+ .comms_end = BIT(2),
+ .comms_req = BIT(1),
+ .charge_shift = 4,
+ .info_offs = 0,
+ .gesture_offs = 1,
+ .contact_offs = 4,
+ .sys_stat = 0x0A,
+ .sys_ctrl = 0x35,
+ .alp_config = 0x39,
+ .tp_config = 0x4E,
+ .exp_file = 0x57,
+ .kp_enable = {
+ [IQS7211_REG_GRP_TP] = 0x58,
+ [IQS7211_REG_GRP_BTN] = 0x37,
+ [IQS7211_REG_GRP_ALP] = 0x37,
+ },
+ .gesture_angle = 0x5F,
+ .rx_tx_map = 0x60,
+ .cycle_alloc = { 0x66, 0x75, },
+ .cycle_limit = { 10, 6, },
+ .kp_events = iqs7210a_kp_events,
+ .num_kp_events = ARRAY_SIZE(iqs7210a_kp_events),
+ .min_crx_alp = 4,
+ .num_ctx = IQS7211_MAX_CTX - 1,
+ },
+ [IQS7211A] = {
+ .tp_name = "iqs7211a_trackpad",
+ .kp_name = "iqs7211a_keys",
+ .prod_num = 763,
+ .show_reset = BIT(7),
+ .ati_error = {
+ [IQS7211_REG_GRP_TP] = BIT(3),
+ [IQS7211_REG_GRP_ALP] = BIT(5),
+ },
+ .ati_start = {
+ [IQS7211_REG_GRP_TP] = BIT(5),
+ [IQS7211_REG_GRP_ALP] = BIT(6),
+ },
+ .ack_reset = BIT(7),
+ .comms_req = BIT(4),
+ .charge_shift = 0,
+ .info_offs = 0,
+ .gesture_offs = 1,
+ .contact_offs = 4,
+ .sys_stat = 0x10,
+ .sys_ctrl = 0x50,
+ .tp_config = 0x60,
+ .alp_config = 0x72,
+ .exp_file = 0x74,
+ .kp_enable = {
+ [IQS7211_REG_GRP_TP] = 0x80,
+ },
+ .gesture_angle = 0x87,
+ .rx_tx_map = 0x90,
+ .cycle_alloc = { 0xA0, 0xB0, },
+ .cycle_limit = { 10, 8, },
+ .kp_events = iqs7211a_kp_events,
+ .num_kp_events = ARRAY_SIZE(iqs7211a_kp_events),
+ .num_ctx = IQS7211_MAX_CTX - 1,
+ },
+ [IQS7211E] = {
+ .tp_name = "iqs7211e_trackpad",
+ .kp_name = "iqs7211e_keys",
+ .prod_num = 1112,
+ .show_reset = BIT(7),
+ .ati_error = {
+ [IQS7211_REG_GRP_TP] = BIT(3),
+ [IQS7211_REG_GRP_ALP] = BIT(5),
+ },
+ .ati_start = {
+ [IQS7211_REG_GRP_TP] = BIT(5),
+ [IQS7211_REG_GRP_ALP] = BIT(6),
+ },
+ .suspend = BIT(11),
+ .ack_reset = BIT(7),
+ .comms_end = BIT(6),
+ .comms_req = BIT(4),
+ .charge_shift = 0,
+ .info_offs = 1,
+ .gesture_offs = 0,
+ .contact_offs = 2,
+ .sys_stat = 0x0E,
+ .sys_ctrl = 0x33,
+ .tp_config = 0x41,
+ .alp_config = 0x36,
+ .exp_file = 0x4A,
+ .kp_enable = {
+ [IQS7211_REG_GRP_TP] = 0x4B,
+ },
+ .gesture_angle = 0x55,
+ .rx_tx_map = 0x56,
+ .cycle_alloc = { 0x5D, 0x6C, },
+ .cycle_limit = { 10, 11, },
+ .kp_events = iqs7211e_kp_events,
+ .num_kp_events = ARRAY_SIZE(iqs7211e_kp_events),
+ .num_ctx = IQS7211_MAX_CTX,
+ },
+};
+
+struct iqs7211_prop_desc {
+ const char *name;
+ enum iqs7211_reg_key_id reg_key;
+ u8 reg_addr[IQS7211_NUM_REG_GRPS][ARRAY_SIZE(iqs7211_devs)];
+ int reg_shift;
+ int reg_width;
+ int val_pitch;
+ int val_min;
+ int val_max;
+ const char *label;
+};
+
+static const struct iqs7211_prop_desc iqs7211_props[] = {
+ {
+ .name = "azoteq,ati-frac-div-fine",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x1E,
+ [IQS7211A] = 0x30,
+ [IQS7211E] = 0x21,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x22,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x23,
+ [IQS7211A] = 0x36,
+ [IQS7211E] = 0x25,
+ },
+ },
+ .reg_shift = 9,
+ .reg_width = 5,
+ .label = "ATI fine fractional divider",
+ },
+ {
+ .name = "azoteq,ati-frac-mult-coarse",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x1E,
+ [IQS7211A] = 0x30,
+ [IQS7211E] = 0x21,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x22,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x23,
+ [IQS7211A] = 0x36,
+ [IQS7211E] = 0x25,
+ },
+ },
+ .reg_shift = 5,
+ .reg_width = 4,
+ .label = "ATI coarse fractional multiplier",
+ },
+ {
+ .name = "azoteq,ati-frac-div-coarse",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x1E,
+ [IQS7211A] = 0x30,
+ [IQS7211E] = 0x21,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x22,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x23,
+ [IQS7211A] = 0x36,
+ [IQS7211E] = 0x25,
+ },
+ },
+ .reg_shift = 0,
+ .reg_width = 5,
+ .label = "ATI coarse fractional divider",
+ },
+ {
+ .name = "azoteq,ati-comp-div",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x1F,
+ [IQS7211E] = 0x22,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x24,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7211E] = 0x26,
+ },
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .val_max = 31,
+ .label = "ATI compensation divider",
+ },
+ {
+ .name = "azoteq,ati-comp-div",
+ .reg_addr = {
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x24,
+ },
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_max = 31,
+ .label = "ATI compensation divider",
+ },
+ {
+ .name = "azoteq,ati-comp-div",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7211A] = 0x31,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7211A] = 0x37,
+ },
+ },
+ .val_max = 31,
+ .label = "ATI compensation divider",
+ },
+ {
+ .name = "azoteq,ati-target",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x20,
+ [IQS7211A] = 0x32,
+ [IQS7211E] = 0x23,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x27,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x28,
+ [IQS7211A] = 0x38,
+ [IQS7211E] = 0x27,
+ },
+ },
+ .label = "ATI target",
+ },
+ {
+ .name = "azoteq,ati-base",
+ .reg_addr[IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x26,
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_pitch = 8,
+ .label = "ATI base",
+ },
+ {
+ .name = "azoteq,ati-base",
+ .reg_addr[IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x26,
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .val_pitch = 8,
+ .label = "ATI base",
+ },
+ {
+ .name = "azoteq,rate-active-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x29,
+ [IQS7211A] = 0x40,
+ [IQS7211E] = 0x28,
+ },
+ .label = "active mode report rate",
+ },
+ {
+ .name = "azoteq,rate-touch-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2A,
+ [IQS7211A] = 0x41,
+ [IQS7211E] = 0x29,
+ },
+ .label = "idle-touch mode report rate",
+ },
+ {
+ .name = "azoteq,rate-idle-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2B,
+ [IQS7211A] = 0x42,
+ [IQS7211E] = 0x2A,
+ },
+ .label = "idle mode report rate",
+ },
+ {
+ .name = "azoteq,rate-lp1-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2C,
+ [IQS7211A] = 0x43,
+ [IQS7211E] = 0x2B,
+ },
+ .label = "low-power mode 1 report rate",
+ },
+ {
+ .name = "azoteq,rate-lp2-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2D,
+ [IQS7211A] = 0x44,
+ [IQS7211E] = 0x2C,
+ },
+ .label = "low-power mode 2 report rate",
+ },
+ {
+ .name = "azoteq,timeout-active-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2E,
+ [IQS7211A] = 0x45,
+ [IQS7211E] = 0x2D,
+ },
+ .val_pitch = 1000,
+ .label = "active mode timeout",
+ },
+ {
+ .name = "azoteq,timeout-touch-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x2F,
+ [IQS7211A] = 0x46,
+ [IQS7211E] = 0x2E,
+ },
+ .val_pitch = 1000,
+ .label = "idle-touch mode timeout",
+ },
+ {
+ .name = "azoteq,timeout-idle-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x30,
+ [IQS7211A] = 0x47,
+ [IQS7211E] = 0x2F,
+ },
+ .val_pitch = 1000,
+ .label = "idle mode timeout",
+ },
+ {
+ .name = "azoteq,timeout-lp1-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x31,
+ [IQS7211A] = 0x48,
+ [IQS7211E] = 0x30,
+ },
+ .val_pitch = 1000,
+ .label = "low-power mode 1 timeout",
+ },
+ {
+ .name = "azoteq,timeout-lp2-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x32,
+ [IQS7211E] = 0x31,
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_pitch = 1000,
+ .val_max = 60000,
+ .label = "trackpad reference value update rate",
+ },
+ {
+ .name = "azoteq,timeout-lp2-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7211A] = 0x49,
+ },
+ .val_pitch = 1000,
+ .val_max = 60000,
+ .label = "trackpad reference value update rate",
+ },
+ {
+ .name = "azoteq,timeout-ati-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x32,
+ [IQS7211E] = 0x31,
+ },
+ .reg_width = 8,
+ .val_pitch = 1000,
+ .val_max = 60000,
+ .label = "ATI error timeout",
+ },
+ {
+ .name = "azoteq,timeout-ati-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7211A] = 0x35,
+ },
+ .val_pitch = 1000,
+ .val_max = 60000,
+ .label = "ATI error timeout",
+ },
+ {
+ .name = "azoteq,timeout-comms-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x33,
+ [IQS7211A] = 0x4A,
+ [IQS7211E] = 0x32,
+ },
+ .label = "communication timeout",
+ },
+ {
+ .name = "azoteq,timeout-press-ms",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x34,
+ },
+ .reg_width = 8,
+ .val_pitch = 1000,
+ .val_max = 60000,
+ .label = "press timeout",
+ },
+ {
+ .name = "azoteq,ati-mode",
+ .reg_addr[IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x37,
+ },
+ .reg_shift = 15,
+ .reg_width = 1,
+ .label = "ATI mode",
+ },
+ {
+ .name = "azoteq,ati-mode",
+ .reg_addr[IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x37,
+ },
+ .reg_shift = 7,
+ .reg_width = 1,
+ .label = "ATI mode",
+ },
+ {
+ .name = "azoteq,sense-mode",
+ .reg_addr[IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x37,
+ [IQS7211A] = 0x72,
+ [IQS7211E] = 0x36,
+ },
+ .reg_shift = 8,
+ .reg_width = 1,
+ .label = "sensing mode",
+ },
+ {
+ .name = "azoteq,sense-mode",
+ .reg_addr[IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x37,
+ },
+ .reg_shift = 0,
+ .reg_width = 2,
+ .val_max = 2,
+ .label = "sensing mode",
+ },
+ {
+ .name = "azoteq,fosc-freq",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x38,
+ [IQS7211A] = 0x52,
+ [IQS7211E] = 0x35,
+ },
+ .reg_shift = 4,
+ .reg_width = 1,
+ .label = "core clock frequency selection",
+ },
+ {
+ .name = "azoteq,fosc-trim",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x38,
+ [IQS7211A] = 0x52,
+ [IQS7211E] = 0x35,
+ },
+ .reg_shift = 0,
+ .reg_width = 4,
+ .label = "core clock frequency trim",
+ },
+ {
+ .name = "azoteq,touch-exit",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x3B,
+ [IQS7211A] = 0x53,
+ [IQS7211E] = 0x38,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x3E,
+ },
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "touch exit factor",
+ },
+ {
+ .name = "azoteq,touch-enter",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x3B,
+ [IQS7211A] = 0x53,
+ [IQS7211E] = 0x38,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x3E,
+ },
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "touch entrance factor",
+ },
+ {
+ .name = "azoteq,thresh",
+ .reg_addr = {
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x3C,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x3D,
+ [IQS7211A] = 0x54,
+ [IQS7211E] = 0x39,
+ },
+ },
+ .label = "threshold",
+ },
+ {
+ .name = "azoteq,debounce-exit",
+ .reg_addr = {
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x3F,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x40,
+ [IQS7211A] = 0x56,
+ [IQS7211E] = 0x3A,
+ },
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "debounce exit factor",
+ },
+ {
+ .name = "azoteq,debounce-enter",
+ .reg_addr = {
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x3F,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x40,
+ [IQS7211A] = 0x56,
+ [IQS7211E] = 0x3A,
+ },
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "debounce entrance factor",
+ },
+ {
+ .name = "azoteq,conv-frac",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x48,
+ [IQS7211A] = 0x58,
+ [IQS7211E] = 0x3D,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x49,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x4A,
+ [IQS7211A] = 0x59,
+ [IQS7211E] = 0x3E,
+ },
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "conversion frequency fractional divider",
+ },
+ {
+ .name = "azoteq,conv-period",
+ .reg_addr = {
+ [IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x48,
+ [IQS7211A] = 0x58,
+ [IQS7211E] = 0x3D,
+ },
+ [IQS7211_REG_GRP_BTN] = {
+ [IQS7210A] = 0x49,
+ },
+ [IQS7211_REG_GRP_ALP] = {
+ [IQS7210A] = 0x4A,
+ [IQS7211A] = 0x59,
+ [IQS7211E] = 0x3E,
+ },
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "conversion period",
+ },
+ {
+ .name = "azoteq,thresh",
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x55,
+ [IQS7211A] = 0x67,
+ [IQS7211E] = 0x48,
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "threshold",
+ },
+ {
+ .name = "azoteq,contact-split",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x55,
+ [IQS7211A] = 0x67,
+ [IQS7211E] = 0x48,
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "contact split factor",
+ },
+ {
+ .name = "azoteq,trim-x",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x56,
+ [IQS7211E] = 0x49,
+ },
+ .reg_shift = 0,
+ .reg_width = 8,
+ .label = "horizontal trim width",
+ },
+ {
+ .name = "azoteq,trim-x",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7211A] = 0x68,
+ },
+ .label = "horizontal trim width",
+ },
+ {
+ .name = "azoteq,trim-y",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7210A] = 0x56,
+ [IQS7211E] = 0x49,
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .label = "vertical trim height",
+ },
+ {
+ .name = "azoteq,trim-y",
+ .reg_addr[IQS7211_REG_GRP_SYS] = {
+ [IQS7211A] = 0x69,
+ },
+ .label = "vertical trim height",
+ },
+ {
+ .name = "azoteq,gesture-max-ms",
+ .reg_key = IQS7211_REG_KEY_TAP,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x59,
+ [IQS7211A] = 0x81,
+ [IQS7211E] = 0x4C,
+ },
+ .label = "maximum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-mid-ms",
+ .reg_key = IQS7211_REG_KEY_TAP,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7211E] = 0x4D,
+ },
+ .label = "repeated gesture time",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_key = IQS7211_REG_KEY_TAP,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5A,
+ [IQS7211A] = 0x82,
+ [IQS7211E] = 0x4E,
+ },
+ .label = "gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5A,
+ [IQS7211A] = 0x82,
+ [IQS7211E] = 0x4E,
+ },
+ .label = "gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-min-ms",
+ .reg_key = IQS7211_REG_KEY_HOLD,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5B,
+ [IQS7211A] = 0x83,
+ [IQS7211E] = 0x4F,
+ },
+ .label = "minimum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-max-ms",
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5C,
+ [IQS7211A] = 0x84,
+ [IQS7211E] = 0x50,
+ },
+ .label = "maximum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-max-ms",
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5C,
+ [IQS7211A] = 0x84,
+ [IQS7211E] = 0x50,
+ },
+ .label = "maximum gesture time",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5D,
+ [IQS7211A] = 0x85,
+ [IQS7211E] = 0x51,
+ },
+ .label = "gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-dist",
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7210A] = 0x5E,
+ [IQS7211A] = 0x86,
+ [IQS7211E] = 0x52,
+ },
+ .label = "gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-dist-rep",
+ .reg_key = IQS7211_REG_KEY_AXIAL_X,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7211E] = 0x53,
+ },
+ .label = "repeated gesture distance",
+ },
+ {
+ .name = "azoteq,gesture-dist-rep",
+ .reg_key = IQS7211_REG_KEY_AXIAL_Y,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7211E] = 0x54,
+ },
+ .label = "repeated gesture distance",
+ },
+ {
+ .name = "azoteq,thresh",
+ .reg_key = IQS7211_REG_KEY_PALM,
+ .reg_addr[IQS7211_REG_GRP_TP] = {
+ [IQS7211E] = 0x55,
+ },
+ .reg_shift = 8,
+ .reg_width = 8,
+ .val_max = 42,
+ .label = "threshold",
+ },
+};
+
+static const u8 iqs7211_gesture_angle[] = {
+ 0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x06, 0x07, 0x08,
+ 0x09, 0x0A, 0x0B, 0x0C,
+ 0x0E, 0x0F, 0x10, 0x11,
+ 0x12, 0x14, 0x15, 0x16,
+ 0x17, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1E, 0x1F, 0x21,
+ 0x22, 0x23, 0x25, 0x26,
+ 0x28, 0x2A, 0x2B, 0x2D,
+ 0x2E, 0x30, 0x32, 0x34,
+ 0x36, 0x38, 0x3A, 0x3C,
+ 0x3E, 0x40, 0x42, 0x45,
+ 0x47, 0x4A, 0x4C, 0x4F,
+ 0x52, 0x55, 0x58, 0x5B,
+ 0x5F, 0x63, 0x66, 0x6B,
+ 0x6F, 0x73, 0x78, 0x7E,
+ 0x83, 0x89, 0x90, 0x97,
+ 0x9E, 0xA7, 0xB0, 0xBA,
+ 0xC5, 0xD1, 0xDF, 0xEF,
+};
+
+struct iqs7211_ver_info {
+ __le16 prod_num;
+ __le16 major;
+ __le16 minor;
+ __le32 patch;
+} __packed;
+
+struct iqs7211_touch_data {
+ __le16 abs_x;
+ __le16 abs_y;
+ __le16 pressure;
+ __le16 area;
+} __packed;
+
+struct iqs7211_tp_config {
+ u8 tp_settings;
+ u8 total_rx;
+ u8 total_tx;
+ u8 num_contacts;
+ __le16 max_x;
+ __le16 max_y;
+} __packed;
+
+struct iqs7211_private {
+ const struct iqs7211_dev_desc *dev_desc;
+ struct gpio_desc *reset_gpio;
+ struct gpio_desc *irq_gpio;
+ struct i2c_client *client;
+ struct input_dev *tp_idev;
+ struct input_dev *kp_idev;
+ struct iqs7211_ver_info ver_info;
+ struct iqs7211_tp_config tp_config;
+ struct touchscreen_properties prop;
+ struct list_head reg_field_head;
+ enum iqs7211_comms_mode comms_init;
+ enum iqs7211_comms_mode comms_mode;
+ unsigned int num_contacts;
+ unsigned int kp_code[ARRAY_SIZE(iqs7211e_kp_events)];
+ u8 rx_tx_map[IQS7211_MAX_CTX + 1];
+ u8 cycle_alloc[2][33];
+ u8 exp_file[2];
+ u16 event_mask;
+ u16 ati_start;
+ u16 gesture_cache;
+};
+
+static int iqs7211_irq_poll(struct iqs7211_private *iqs7211, u64 timeout_us)
+{
+ int error, val;
+
+ error = readx_poll_timeout(gpiod_get_value_cansleep, iqs7211->irq_gpio,
+ val, val, IQS7211_COMMS_SLEEP_US, timeout_us);
+
+ return val < 0 ? val : error;
+}
+
+static int iqs7211_hard_reset(struct iqs7211_private *iqs7211)
+{
+ if (!iqs7211->reset_gpio)
+ return 0;
+
+ gpiod_set_value_cansleep(iqs7211->reset_gpio, 1);
+
+ /*
+ * The following delay ensures the shared RDY/MCLR pin is sampled in
+ * between periodic assertions by the device and assumes the default
+ * communication timeout has not been overwritten in OTP memory.
+ */
+ if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+ msleep(IQS7211_RESET_TIMEOUT_MS);
+ else
+ usleep_range(1000, 1100);
+
+ gpiod_set_value_cansleep(iqs7211->reset_gpio, 0);
+ if (iqs7211->reset_gpio == iqs7211->irq_gpio)
+ iqs7211_irq_wait();
+
+ return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+}
+
+static int iqs7211_force_comms(struct iqs7211_private *iqs7211)
+{
+ u8 msg_buf[] = { 0xFF, };
+ int ret;
+
+ switch (iqs7211->comms_mode) {
+ case IQS7211_COMMS_MODE_WAIT:
+ return iqs7211_irq_poll(iqs7211, IQS7211_START_TIMEOUT_US);
+
+ case IQS7211_COMMS_MODE_FREE:
+ return 0;
+
+ case IQS7211_COMMS_MODE_FORCE:
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * The device cannot communicate until it asserts its interrupt (RDY)
+ * pin. Attempts to do so while RDY is deasserted return an ACK; how-
+ * ever all write data is ignored, and all read data returns 0xEE.
+ *
+ * Unsolicited communication must be preceded by a special force com-
+ * munication command, after which the device eventually asserts its
+ * RDY pin and agrees to communicate.
+ *
+ * Regardless of whether communication is forced or the result of an
+ * interrupt, the device automatically deasserts its RDY pin once it
+ * detects an I2C stop condition, or a timeout expires.
+ */
+ ret = gpiod_get_value_cansleep(iqs7211->irq_gpio);
+ if (ret < 0)
+ return ret;
+ else if (ret > 0)
+ return 0;
+
+ ret = i2c_master_send(iqs7211->client, msg_buf, sizeof(msg_buf));
+ if (ret < (int)sizeof(msg_buf)) {
+ if (ret >= 0)
+ ret = -EIO;
+
+ msleep(IQS7211_COMMS_RETRY_MS);
+ return ret;
+ }
+
+ iqs7211_irq_wait();
+
+ return iqs7211_irq_poll(iqs7211, IQS7211_COMMS_TIMEOUT_US);
+}
+
+static int iqs7211_read_burst(struct iqs7211_private *iqs7211,
+ u8 reg, void *val, u16 val_len)
+{
+ int ret, i;
+ struct i2c_client *client = iqs7211->client;
+ struct i2c_msg msg[] = {
+ {
+ .addr = client->addr,
+ .flags = 0,
+ .len = sizeof(reg),
+ .buf = ®,
+ },
+ {
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = val_len,
+ .buf = (u8 *)val,
+ },
+ };
+
+ /*
+ * The following loop protects against an edge case in which the RDY
+ * pin is automatically deasserted just as the read is initiated. In
+ * that case, the read must be retried using forced communication.
+ */
+ for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+ ret = iqs7211_force_comms(iqs7211);
+ if (ret < 0)
+ continue;
+
+ ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+ if (ret < (int)ARRAY_SIZE(msg)) {
+ if (ret >= 0)
+ ret = -EIO;
+
+ msleep(IQS7211_COMMS_RETRY_MS);
+ continue;
+ }
+
+ if (get_unaligned_le16(msg[1].buf) == IQS7211_COMMS_ERROR) {
+ ret = -ENODATA;
+ continue;
+ }
+
+ ret = 0;
+ break;
+ }
+
+ iqs7211_irq_wait();
+
+ if (ret < 0)
+ dev_err(&client->dev,
+ "Failed to read from address 0x%02X: %d\n", reg, ret);
+
+ return ret;
+}
+
+static int iqs7211_read_word(struct iqs7211_private *iqs7211, u8 reg, u16 *val)
+{
+ __le16 val_buf;
+ int error;
+
+ error = iqs7211_read_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+ if (error)
+ return error;
+
+ *val = le16_to_cpu(val_buf);
+
+ return 0;
+}
+
+static int iqs7211_write_burst(struct iqs7211_private *iqs7211,
+ u8 reg, const void *val, u16 val_len)
+{
+ int msg_len = sizeof(reg) + val_len;
+ int ret, i;
+ struct i2c_client *client = iqs7211->client;
+ u8 *msg_buf;
+
+ msg_buf = kzalloc(msg_len, GFP_KERNEL);
+ if (!msg_buf)
+ return -ENOMEM;
+
+ *msg_buf = reg;
+ memcpy(msg_buf + sizeof(reg), val, val_len);
+
+ /*
+ * The following loop protects against an edge case in which the RDY
+ * pin is automatically asserted just before the force communication
+ * command is sent.
+ *
+ * In that case, the subsequent I2C stop condition tricks the device
+ * into preemptively deasserting the RDY pin and the command must be
+ * sent again.
+ */
+ for (i = 0; i < IQS7211_NUM_RETRIES; i++) {
+ ret = iqs7211_force_comms(iqs7211);
+ if (ret < 0)
+ continue;
+
+ ret = i2c_master_send(client, msg_buf, msg_len);
+ if (ret < msg_len) {
+ if (ret >= 0)
+ ret = -EIO;
+
+ msleep(IQS7211_COMMS_RETRY_MS);
+ continue;
+ }
+
+ ret = 0;
+ break;
+ }
+
+ kfree(msg_buf);
+
+ iqs7211_irq_wait();
+
+ if (ret < 0)
+ dev_err(&client->dev,
+ "Failed to write to address 0x%02X: %d\n", reg, ret);
+
+ return ret;
+}
+
+static int iqs7211_write_word(struct iqs7211_private *iqs7211, u8 reg, u16 val)
+{
+ __le16 val_buf = cpu_to_le16(val);
+
+ return iqs7211_write_burst(iqs7211, reg, &val_buf, sizeof(val_buf));
+}
+
+static int iqs7211_start_comms(struct iqs7211_private *iqs7211)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ bool forced_comms;
+ unsigned int val;
+ u16 comms_setup;
+ int error;
+
+ /*
+ * Until forced communication can be enabled, the host must wait for a
+ * communication window each time it intends to elicit a response from
+ * the device.
+ *
+ * Forced communication is not necessary, however, if the host adapter
+ * can support clock stretching. In that case, the device freely clock
+ * stretches until all pending conversions are complete.
+ */
+ forced_comms = device_property_present(&client->dev,
+ "azoteq,forced-comms");
+
+ error = device_property_read_u32(&client->dev,
+ "azoteq,forced-comms-default", &val);
+ if (error == -EINVAL) {
+ iqs7211->comms_init = IQS7211_COMMS_MODE_WAIT;
+ } else if (error) {
+ dev_err(&client->dev,
+ "Failed to read default communication mode: %d\n",
+ error);
+ return error;
+ } else if (val) {
+ iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_FORCE
+ : IQS7211_COMMS_MODE_WAIT;
+ } else {
+ iqs7211->comms_init = forced_comms ? IQS7211_COMMS_MODE_WAIT
+ : IQS7211_COMMS_MODE_FREE;
+ }
+
+ iqs7211->comms_mode = iqs7211->comms_init;
+
+ error = iqs7211_hard_reset(iqs7211);
+ if (error) {
+ dev_err(&client->dev, "Failed to reset device: %d\n", error);
+ return error;
+ }
+
+ error = iqs7211_read_burst(iqs7211, IQS7211_PROD_NUM,
+ &iqs7211->ver_info,
+ sizeof(iqs7211->ver_info));
+ if (error)
+ return error;
+
+ if (le16_to_cpu(iqs7211->ver_info.prod_num) != dev_desc->prod_num) {
+ dev_err(&client->dev, "Invalid product number: %u\n",
+ le16_to_cpu(iqs7211->ver_info.prod_num));
+ return -EINVAL;
+ }
+
+ error = iqs7211_read_word(iqs7211, dev_desc->sys_ctrl + 1,
+ &comms_setup);
+ if (error)
+ return error;
+
+ if (forced_comms)
+ comms_setup |= dev_desc->comms_req;
+ else
+ comms_setup &= ~dev_desc->comms_req;
+
+ error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+ comms_setup | dev_desc->comms_end);
+ if (error)
+ return error;
+
+ if (forced_comms)
+ iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+ else
+ iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+ error = iqs7211_read_burst(iqs7211, dev_desc->exp_file,
+ iqs7211->exp_file,
+ sizeof(iqs7211->exp_file));
+ if (error)
+ return error;
+
+ error = iqs7211_read_burst(iqs7211, dev_desc->tp_config,
+ &iqs7211->tp_config,
+ sizeof(iqs7211->tp_config));
+ if (error)
+ return error;
+
+ error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+ comms_setup);
+ if (error)
+ return error;
+
+ iqs7211->event_mask = comms_setup & ~IQS7211_EVENT_MASK_ALL;
+ iqs7211->event_mask |= (IQS7211_EVENT_MASK_ATI | IQS7211_EVENT_MODE);
+
+ return 0;
+}
+
+static int iqs7211_init_device(struct iqs7211_private *iqs7211)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct iqs7211_reg_field_desc *reg_field;
+ __le16 sys_ctrl[] = {
+ cpu_to_le16(dev_desc->ack_reset),
+ cpu_to_le16(iqs7211->event_mask),
+ };
+ int error, i;
+
+ /*
+ * Acknowledge reset before writing any registers in case the device
+ * suffers a spurious reset during initialization. The communication
+ * mode is configured at this time as well.
+ */
+ error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+ sizeof(sys_ctrl));
+ if (error)
+ return error;
+
+ if (iqs7211->event_mask & dev_desc->comms_req)
+ iqs7211->comms_mode = IQS7211_COMMS_MODE_FORCE;
+ else
+ iqs7211->comms_mode = IQS7211_COMMS_MODE_FREE;
+
+ /*
+ * Take advantage of the stop-bit disable function, if available, to
+ * save the trouble of having to reopen a communication window after
+ * each read or write.
+ */
+ error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl + 1,
+ iqs7211->event_mask | dev_desc->comms_end);
+ if (error)
+ return error;
+
+ list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+ u16 new_val = reg_field->val;
+
+ if (reg_field->mask < U16_MAX) {
+ u16 old_val;
+
+ error = iqs7211_read_word(iqs7211, reg_field->addr,
+ &old_val);
+ if (error)
+ return error;
+
+ new_val = old_val & ~reg_field->mask;
+ new_val |= reg_field->val;
+
+ if (new_val == old_val)
+ continue;
+ }
+
+ error = iqs7211_write_word(iqs7211, reg_field->addr, new_val);
+ if (error)
+ return error;
+ }
+
+ error = iqs7211_write_burst(iqs7211, dev_desc->tp_config,
+ &iqs7211->tp_config,
+ sizeof(iqs7211->tp_config));
+ if (error)
+ return error;
+
+ if (**iqs7211->cycle_alloc) {
+ error = iqs7211_write_burst(iqs7211, dev_desc->rx_tx_map,
+ &iqs7211->rx_tx_map,
+ dev_desc->num_ctx);
+ if (error)
+ return error;
+
+ for (i = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+ error = iqs7211_write_burst(iqs7211,
+ dev_desc->cycle_alloc[i],
+ iqs7211->cycle_alloc[i],
+ dev_desc->cycle_limit[i] * 3);
+ if (error)
+ return error;
+ }
+ }
+
+ *sys_ctrl = cpu_to_le16(iqs7211->ati_start);
+
+ return iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+ sizeof(sys_ctrl));
+}
+
+static int iqs7211_add_field(struct iqs7211_private *iqs7211,
+ struct iqs7211_reg_field_desc new_field)
+{
+ struct i2c_client *client = iqs7211->client;
+ struct iqs7211_reg_field_desc *reg_field;
+
+ if (!new_field.addr)
+ return 0;
+
+ list_for_each_entry(reg_field, &iqs7211->reg_field_head, list) {
+ if (reg_field->addr != new_field.addr)
+ continue;
+
+ reg_field->mask |= new_field.mask;
+ reg_field->val |= new_field.val;
+ return 0;
+ }
+
+ reg_field = devm_kzalloc(&client->dev, sizeof(*reg_field), GFP_KERNEL);
+ if (!reg_field)
+ return -ENOMEM;
+
+ reg_field->addr = new_field.addr;
+ reg_field->mask = new_field.mask;
+ reg_field->val = new_field.val;
+
+ list_add(®_field->list, &iqs7211->reg_field_head);
+
+ return 0;
+}
+
+static int iqs7211_parse_props(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *reg_grp_node,
+ enum iqs7211_reg_grp_id reg_grp,
+ enum iqs7211_reg_key_id reg_key)
+{
+ struct i2c_client *client = iqs7211->client;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(iqs7211_props); i++) {
+ const char *name = iqs7211_props[i].name;
+ u8 reg_addr = iqs7211_props[i].reg_addr[reg_grp]
+ [iqs7211->dev_desc -
+ iqs7211_devs];
+ int reg_shift = iqs7211_props[i].reg_shift;
+ int reg_width = iqs7211_props[i].reg_width ? : 16;
+ int val_pitch = iqs7211_props[i].val_pitch ? : 1;
+ int val_min = iqs7211_props[i].val_min;
+ int val_max = iqs7211_props[i].val_max;
+ const char *label = iqs7211_props[i].label ? : name;
+ struct iqs7211_reg_field_desc reg_field;
+ unsigned int val;
+ int error;
+
+ if (iqs7211_props[i].reg_key != reg_key)
+ continue;
+
+ if (!reg_addr)
+ continue;
+
+ error = fwnode_property_read_u32(reg_grp_node, name, &val);
+ if (error == -EINVAL) {
+ continue;
+ } else if (error) {
+ dev_err(&client->dev, "Failed to read %s %s: %d\n",
+ fwnode_get_name(reg_grp_node), label, error);
+ return error;
+ }
+
+ if (!val_max)
+ val_max = GENMASK(reg_width - 1, 0) * val_pitch;
+
+ if (val < val_min || val > val_max) {
+ dev_err(&client->dev, "Invalid %s: %u\n", label, val);
+ return -EINVAL;
+ }
+
+ reg_field.addr = reg_addr;
+ reg_field.mask = GENMASK(reg_shift + reg_width - 1, reg_shift);
+ reg_field.val = val / val_pitch << reg_shift;
+
+ error = iqs7211_add_field(iqs7211, reg_field);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+static int iqs7211_parse_event(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *event_node,
+ enum iqs7211_reg_grp_id reg_grp,
+ enum iqs7211_reg_key_id reg_key,
+ unsigned int *event_code)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ struct iqs7211_reg_field_desc reg_field;
+ unsigned int val;
+ int error;
+
+ error = iqs7211_parse_props(iqs7211, event_node, reg_grp, reg_key);
+ if (error)
+ return error;
+
+ if (reg_key == IQS7211_REG_KEY_AXIAL_X ||
+ reg_key == IQS7211_REG_KEY_AXIAL_Y) {
+ error = fwnode_property_read_u32(event_node,
+ "azoteq,gesture-angle", &val);
+ if (!error) {
+ if (val >= ARRAY_SIZE(iqs7211_gesture_angle)) {
+ dev_err(&client->dev,
+ "Invalid %s gesture angle: %u\n",
+ fwnode_get_name(event_node), val);
+ return -EINVAL;
+ }
+
+ reg_field.addr = dev_desc->gesture_angle;
+ reg_field.mask = U8_MAX;
+ reg_field.val = iqs7211_gesture_angle[val];
+
+ error = iqs7211_add_field(iqs7211, reg_field);
+ if (error)
+ return error;
+ } else if (error != -EINVAL) {
+ dev_err(&client->dev,
+ "Failed to read %s gesture angle: %d\n",
+ fwnode_get_name(event_node), error);
+ return error;
+ }
+ }
+
+ error = fwnode_property_read_u32(event_node, "linux,code", event_code);
+ if (error == -EINVAL)
+ error = 0;
+ else if (error)
+ dev_err(&client->dev, "Failed to read %s code: %d\n",
+ fwnode_get_name(event_node), error);
+
+ return error;
+}
+
+static int iqs7211_parse_cycles(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *tp_node)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ int num_cycles = dev_desc->cycle_limit[0] + dev_desc->cycle_limit[1];
+ int error, count, i, j, k, cycle_start;
+ unsigned int cycle_alloc[IQS7211_MAX_CYCLES][2];
+ u8 total_rx = iqs7211->tp_config.total_rx;
+ u8 total_tx = iqs7211->tp_config.total_tx;
+
+ for (i = 0; i < IQS7211_MAX_CYCLES * 2; i++)
+ *(cycle_alloc[0] + i) = U8_MAX;
+
+ count = fwnode_property_count_u32(tp_node, "azoteq,channel-select");
+ if (count == -EINVAL) {
+ /*
+ * Assign each sensing cycle's slots (0 and 1) to a channel,
+ * defined as the intersection between two CRx and CTx pins.
+ * A channel assignment of 255 means the slot is unused.
+ */
+ for (i = 0, cycle_start = 0; i < total_tx; i++) {
+ int cycle_stop = 0;
+
+ for (j = 0; j < total_rx; j++) {
+ /*
+ * Channels formed by CRx0-3 and CRx4-7 are
+ * bound to slots 0 and 1, respectively.
+ */
+ int slot = iqs7211->rx_tx_map[j] < 4 ? 0 : 1;
+ int chan = i * total_rx + j;
+
+ for (k = cycle_start; k < num_cycles; k++) {
+ if (cycle_alloc[k][slot] < U8_MAX)
+ continue;
+
+ cycle_alloc[k][slot] = chan;
+ break;
+ }
+
+ if (k < num_cycles) {
+ cycle_stop = max(k, cycle_stop);
+ continue;
+ }
+
+ dev_err(&client->dev,
+ "Insufficient number of cycles\n");
+ return -EINVAL;
+ }
+
+ /*
+ * Sensing cycles cannot straddle more than one CTx
+ * pin. As such, the next row's starting cycle must
+ * be greater than the previous row's highest cycle.
+ */
+ cycle_start = cycle_stop + 1;
+ }
+ } else if (count < 0) {
+ dev_err(&client->dev, "Failed to count channels: %d\n", count);
+ return count;
+ } else if (count > num_cycles * 2) {
+ dev_err(&client->dev, "Insufficient number of cycles\n");
+ return -EINVAL;
+ } else if (count > 0) {
+ error = fwnode_property_read_u32_array(tp_node,
+ "azoteq,channel-select",
+ cycle_alloc[0], count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read channels: %d\n",
+ error);
+ return error;
+ }
+
+ for (i = 0; i < count; i++) {
+ int chan = *(cycle_alloc[0] + i);
+
+ if (chan == U8_MAX)
+ continue;
+
+ if (chan >= total_rx * total_tx) {
+ dev_err(&client->dev, "Invalid channel: %d\n",
+ chan);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < count; j++) {
+ if (j == i || *(cycle_alloc[0] + j) != chan)
+ continue;
+
+ dev_err(&client->dev, "Duplicate channel: %d\n",
+ chan);
+ return -EINVAL;
+ }
+ }
+ }
+
+ /*
+ * Once the raw channel assignments have been derived, they must be
+ * packed according to the device's register map.
+ */
+ for (i = 0, cycle_start = 0; i < sizeof(dev_desc->cycle_limit); i++) {
+ int offs = 0;
+
+ for (j = cycle_start;
+ j < cycle_start + dev_desc->cycle_limit[i]; j++) {
+ iqs7211->cycle_alloc[i][offs++] = 0x05;
+ iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][0];
+ iqs7211->cycle_alloc[i][offs++] = cycle_alloc[j][1];
+ }
+
+ cycle_start += dev_desc->cycle_limit[i];
+ }
+
+ return 0;
+}
+
+static int iqs7211_parse_tp(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *tp_node)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ unsigned int pins[IQS7211_MAX_CTX];
+ int error, count, i, j;
+
+ count = fwnode_property_count_u32(tp_node, "azoteq,rx-enable");
+ if (count == -EINVAL) {
+ return 0;
+ } else if (count < 0) {
+ dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+ return count;
+ } else if (count > IQS7211_NUM_CRX) {
+ dev_err(&client->dev, "Invalid number of CRx pins\n");
+ return -EINVAL;
+ }
+
+ error = fwnode_property_read_u32_array(tp_node, "azoteq,rx-enable",
+ pins, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read CRx pins: %d\n", error);
+ return error;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (pins[i] >= IQS7211_NUM_CRX) {
+ dev_err(&client->dev, "Invalid CRx pin: %u\n", pins[i]);
+ return -EINVAL;
+ }
+
+ iqs7211->rx_tx_map[i] = pins[i];
+ }
+
+ iqs7211->tp_config.total_rx = count;
+
+ count = fwnode_property_count_u32(tp_node, "azoteq,tx-enable");
+ if (count < 0) {
+ dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+ return count;
+ } else if (count > dev_desc->num_ctx) {
+ dev_err(&client->dev, "Invalid number of CTx pins\n");
+ return -EINVAL;
+ }
+
+ error = fwnode_property_read_u32_array(tp_node, "azoteq,tx-enable",
+ pins, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read CTx pins: %d\n", error);
+ return error;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (pins[i] >= dev_desc->num_ctx) {
+ dev_err(&client->dev, "Invalid CTx pin: %u\n", pins[i]);
+ return -EINVAL;
+ }
+
+ for (j = 0; j < iqs7211->tp_config.total_rx; j++) {
+ if (iqs7211->rx_tx_map[j] != pins[i])
+ continue;
+
+ dev_err(&client->dev, "Conflicting CTx pin: %u\n",
+ pins[i]);
+ return -EINVAL;
+ }
+
+ iqs7211->rx_tx_map[iqs7211->tp_config.total_rx + i] = pins[i];
+ }
+
+ iqs7211->tp_config.total_tx = count;
+
+ return iqs7211_parse_cycles(iqs7211, tp_node);
+}
+
+static int iqs7211_parse_alp(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *alp_node)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ struct iqs7211_reg_field_desc reg_field;
+ int error, count, i;
+
+ count = fwnode_property_count_u32(alp_node, "azoteq,rx-enable");
+ if (count < 0 && count != -EINVAL) {
+ dev_err(&client->dev, "Failed to count CRx pins: %d\n", count);
+ return count;
+ } else if (count > IQS7211_NUM_CRX) {
+ dev_err(&client->dev, "Invalid number of CRx pins\n");
+ return -EINVAL;
+ } else if (count >= 0) {
+ unsigned int pins[IQS7211_NUM_CRX];
+
+ error = fwnode_property_read_u32_array(alp_node,
+ "azoteq,rx-enable",
+ pins, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read CRx pins: %d\n",
+ error);
+ return error;
+ }
+
+ reg_field.addr = dev_desc->alp_config;
+ reg_field.mask = GENMASK(IQS7211_NUM_CRX - 1, 0);
+ reg_field.val = 0;
+
+ for (i = 0; i < count; i++) {
+ if (pins[i] < dev_desc->min_crx_alp ||
+ pins[i] >= IQS7211_NUM_CRX) {
+ dev_err(&client->dev, "Invalid CRx pin: %u\n",
+ pins[i]);
+ return -EINVAL;
+ }
+
+ reg_field.val |= BIT(pins[i]);
+ }
+
+ error = iqs7211_add_field(iqs7211, reg_field);
+ if (error)
+ return error;
+ }
+
+ count = fwnode_property_count_u32(alp_node, "azoteq,tx-enable");
+ if (count < 0 && count != -EINVAL) {
+ dev_err(&client->dev, "Failed to count CTx pins: %d\n", count);
+ return count;
+ } else if (count > dev_desc->num_ctx) {
+ dev_err(&client->dev, "Invalid number of CTx pins\n");
+ return -EINVAL;
+ } else if (count >= 0) {
+ unsigned int pins[IQS7211_MAX_CTX];
+
+ error = fwnode_property_read_u32_array(alp_node,
+ "azoteq,tx-enable",
+ pins, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read CTx pins: %d\n",
+ error);
+ return error;
+ }
+
+ reg_field.addr = dev_desc->alp_config + 1;
+ reg_field.mask = GENMASK(dev_desc->num_ctx - 1, 0);
+ reg_field.val = 0;
+
+ for (i = 0; i < count; i++) {
+ if (pins[i] >= dev_desc->num_ctx) {
+ dev_err(&client->dev, "Invalid CTx pin: %u\n",
+ pins[i]);
+ return -EINVAL;
+ }
+
+ reg_field.val |= BIT(pins[i]);
+ }
+
+ error = iqs7211_add_field(iqs7211, reg_field);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+static int (*iqs7211_parse_extra[IQS7211_NUM_REG_GRPS])
+ (struct iqs7211_private *iqs7211,
+ struct fwnode_handle *reg_grp_node) = {
+ [IQS7211_REG_GRP_TP] = iqs7211_parse_tp,
+ [IQS7211_REG_GRP_ALP] = iqs7211_parse_alp,
+};
+
+static int iqs7211_parse_reg_grp(struct iqs7211_private *iqs7211,
+ struct fwnode_handle *reg_grp_node,
+ enum iqs7211_reg_grp_id reg_grp)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct iqs7211_reg_field_desc reg_field;
+ int error, i;
+
+ error = iqs7211_parse_props(iqs7211, reg_grp_node, reg_grp,
+ IQS7211_REG_KEY_NONE);
+ if (error)
+ return error;
+
+ if (iqs7211_parse_extra[reg_grp]) {
+ error = iqs7211_parse_extra[reg_grp](iqs7211, reg_grp_node);
+ if (error)
+ return error;
+ }
+
+ iqs7211->ati_start |= dev_desc->ati_start[reg_grp];
+
+ reg_field.addr = dev_desc->kp_enable[reg_grp];
+ reg_field.mask = 0;
+ reg_field.val = 0;
+
+ for (i = 0; i < dev_desc->num_kp_events; i++) {
+ const char *event_name = dev_desc->kp_events[i].name;
+ struct fwnode_handle *event_node;
+
+ if (dev_desc->kp_events[i].reg_grp != reg_grp)
+ continue;
+
+ reg_field.mask |= dev_desc->kp_events[i].enable;
+
+ if (event_name)
+ event_node = fwnode_get_named_child_node(reg_grp_node,
+ event_name);
+ else
+ event_node = fwnode_handle_get(reg_grp_node);
+
+ if (!event_node)
+ continue;
+
+ error = iqs7211_parse_event(iqs7211, event_node,
+ dev_desc->kp_events[i].reg_grp,
+ dev_desc->kp_events[i].reg_key,
+ &iqs7211->kp_code[i]);
+ fwnode_handle_put(event_node);
+ if (error)
+ return error;
+
+ reg_field.val |= dev_desc->kp_events[i].enable;
+
+ iqs7211->event_mask |= iqs7211_reg_grp_masks[reg_grp];
+ }
+
+ return iqs7211_add_field(iqs7211, reg_field);
+}
+
+static int iqs7211_register_kp(struct iqs7211_private *iqs7211)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct input_dev *kp_idev = iqs7211->kp_idev;
+ struct i2c_client *client = iqs7211->client;
+ int error, i;
+
+ for (i = 0; i < dev_desc->num_kp_events; i++)
+ if (iqs7211->kp_code[i])
+ break;
+
+ if (i == dev_desc->num_kp_events)
+ return 0;
+
+ kp_idev = devm_input_allocate_device(&client->dev);
+ if (!kp_idev)
+ return -ENOMEM;
+
+ iqs7211->kp_idev = kp_idev;
+
+ kp_idev->name = dev_desc->kp_name;
+ kp_idev->id.bustype = BUS_I2C;
+
+ for (i = 0; i < dev_desc->num_kp_events; i++)
+ if (iqs7211->kp_code[i])
+ input_set_capability(iqs7211->kp_idev, EV_KEY,
+ iqs7211->kp_code[i]);
+
+ error = input_register_device(kp_idev);
+ if (error)
+ dev_err(&client->dev, "Failed to register %s: %d\n",
+ kp_idev->name, error);
+
+ return error;
+}
+
+static int iqs7211_register_tp(struct iqs7211_private *iqs7211)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct touchscreen_properties *prop = &iqs7211->prop;
+ struct input_dev *tp_idev = iqs7211->tp_idev;
+ struct i2c_client *client = iqs7211->client;
+ int error;
+
+ error = device_property_read_u32(&client->dev, "azoteq,num-contacts",
+ &iqs7211->num_contacts);
+ if (error == -EINVAL) {
+ return 0;
+ } else if (error) {
+ dev_err(&client->dev, "Failed to read number of contacts: %d\n",
+ error);
+ return error;
+ } else if (iqs7211->num_contacts > IQS7211_MAX_CONTACTS) {
+ dev_err(&client->dev, "Invalid number of contacts: %u\n",
+ iqs7211->num_contacts);
+ return -EINVAL;
+ }
+
+ iqs7211->tp_config.num_contacts = iqs7211->num_contacts ? : 1;
+
+ if (!iqs7211->num_contacts)
+ return 0;
+
+ iqs7211->event_mask |= IQS7211_EVENT_MASK_MOVE;
+
+ tp_idev = devm_input_allocate_device(&client->dev);
+ if (!tp_idev)
+ return -ENOMEM;
+
+ iqs7211->tp_idev = tp_idev;
+
+ tp_idev->name = dev_desc->tp_name;
+ tp_idev->id.bustype = BUS_I2C;
+
+ input_set_abs_params(tp_idev, ABS_MT_POSITION_X,
+ 0, le16_to_cpu(iqs7211->tp_config.max_x), 0, 0);
+
+ input_set_abs_params(tp_idev, ABS_MT_POSITION_Y,
+ 0, le16_to_cpu(iqs7211->tp_config.max_y), 0, 0);
+
+ input_set_abs_params(tp_idev, ABS_MT_PRESSURE, 0, U16_MAX, 0, 0);
+
+ touchscreen_parse_properties(tp_idev, true, prop);
+
+ /*
+ * The device reserves 0xFFFF for coordinates that correspond to slots
+ * which are not in a state of touch.
+ */
+ if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+ dev_err(&client->dev, "Invalid trackpad size: %u*%u\n",
+ prop->max_x, prop->max_y);
+ return -EINVAL;
+ }
+
+ iqs7211->tp_config.max_x = cpu_to_le16(prop->max_x);
+ iqs7211->tp_config.max_y = cpu_to_le16(prop->max_y);
+
+ error = input_mt_init_slots(tp_idev, iqs7211->num_contacts,
+ INPUT_MT_DIRECT);
+ if (error) {
+ dev_err(&client->dev, "Failed to initialize slots: %d\n",
+ error);
+ return error;
+ }
+
+ error = input_register_device(tp_idev);
+ if (error)
+ dev_err(&client->dev, "Failed to register %s: %d\n",
+ tp_idev->name, error);
+
+ return error;
+}
+
+static int iqs7211_report(struct iqs7211_private *iqs7211)
+{
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ struct i2c_client *client = iqs7211->client;
+ struct iqs7211_touch_data *touch_data;
+ u16 info_flags, charge_mode, gesture_flags;
+ __le16 status[12];
+ int error, i;
+
+ error = iqs7211_read_burst(iqs7211, dev_desc->sys_stat, status,
+ dev_desc->contact_offs * sizeof(__le16) +
+ iqs7211->num_contacts * sizeof(*touch_data));
+ if (error)
+ return error;
+
+ info_flags = le16_to_cpu(status[dev_desc->info_offs]);
+
+ if (info_flags & dev_desc->show_reset) {
+ dev_err(&client->dev, "Unexpected device reset\n");
+
+ /*
+ * The device may or may not expect forced communication after
+ * it exits hardware reset, so the corresponding state machine
+ * must be reset as well.
+ */
+ iqs7211->comms_mode = iqs7211->comms_init;
+
+ return iqs7211_init_device(iqs7211);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dev_desc->ati_error); i++) {
+ if (!(info_flags & dev_desc->ati_error[i]))
+ continue;
+
+ dev_err(&client->dev, "Unexpected %s ATI error\n",
+ iqs7211_reg_grp_names[i]);
+ return 0;
+ }
+
+ for (i = 0; i < iqs7211->num_contacts; i++) {
+ u16 pressure;
+
+ touch_data = (struct iqs7211_touch_data *)
+ &status[dev_desc->contact_offs] + i;
+ pressure = le16_to_cpu(touch_data->pressure);
+
+ input_mt_slot(iqs7211->tp_idev, i);
+ if (input_mt_report_slot_state(iqs7211->tp_idev, MT_TOOL_FINGER,
+ pressure != 0)) {
+ touchscreen_report_pos(iqs7211->tp_idev, &iqs7211->prop,
+ le16_to_cpu(touch_data->abs_x),
+ le16_to_cpu(touch_data->abs_y),
+ true);
+ input_report_abs(iqs7211->tp_idev, ABS_MT_PRESSURE,
+ pressure);
+ }
+ }
+
+ if (iqs7211->num_contacts) {
+ input_mt_sync_frame(iqs7211->tp_idev);
+ input_sync(iqs7211->tp_idev);
+ }
+
+ if (!iqs7211->kp_idev)
+ return 0;
+
+ charge_mode = info_flags & GENMASK(dev_desc->charge_shift + 2,
+ dev_desc->charge_shift);
+ charge_mode >>= dev_desc->charge_shift;
+
+ /*
+ * A charging mode higher than 2 (idle mode) indicates the device last
+ * operated in low-power mode and intends to express an ALP event.
+ */
+ if (info_flags & dev_desc->kp_events->mask && charge_mode > 2) {
+ input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 1);
+ input_sync(iqs7211->kp_idev);
+
+ input_report_key(iqs7211->kp_idev, *iqs7211->kp_code, 0);
+ }
+
+ for (i = 0; i < dev_desc->num_kp_events; i++) {
+ if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_BTN)
+ continue;
+
+ input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+ info_flags & dev_desc->kp_events[i].mask);
+ }
+
+ gesture_flags = le16_to_cpu(status[dev_desc->gesture_offs]);
+
+ for (i = 0; i < dev_desc->num_kp_events; i++) {
+ enum iqs7211_reg_key_id reg_key = dev_desc->kp_events[i].reg_key;
+ u16 mask = dev_desc->kp_events[i].mask;
+
+ if (dev_desc->kp_events[i].reg_grp != IQS7211_REG_GRP_TP)
+ continue;
+
+ if ((gesture_flags ^ iqs7211->gesture_cache) & mask)
+ input_report_key(iqs7211->kp_idev, iqs7211->kp_code[i],
+ gesture_flags & mask);
+
+ iqs7211->gesture_cache &= ~mask;
+
+ /*
+ * Hold and palm gestures persist while the contact remains in
+ * place; all others are momentary and hence are followed by a
+ * complementary release event.
+ */
+ if (reg_key == IQS7211_REG_KEY_HOLD ||
+ reg_key == IQS7211_REG_KEY_PALM) {
+ iqs7211->gesture_cache |= gesture_flags & mask;
+ gesture_flags &= ~mask;
+ }
+ }
+
+ if (gesture_flags) {
+ input_sync(iqs7211->kp_idev);
+
+ for (i = 0; i < dev_desc->num_kp_events; i++)
+ if (dev_desc->kp_events[i].reg_grp == IQS7211_REG_GRP_TP &&
+ gesture_flags & dev_desc->kp_events[i].mask)
+ input_report_key(iqs7211->kp_idev,
+ iqs7211->kp_code[i], 0);
+ }
+
+ input_sync(iqs7211->kp_idev);
+
+ return 0;
+}
+
+static irqreturn_t iqs7211_irq(int irq, void *context)
+{
+ struct iqs7211_private *iqs7211 = context;
+
+ return iqs7211_report(iqs7211) ? IRQ_NONE : IRQ_HANDLED;
+}
+
+static int iqs7211_suspend(struct device *dev)
+{
+ struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ int error;
+
+ if (!dev_desc->suspend || device_may_wakeup(dev))
+ return 0;
+
+ /*
+ * I2C communication prompts the device to assert its RDY pin if it is
+ * not already asserted. As such, the interrupt must be disabled so as
+ * to prevent reentrant interrupts.
+ */
+ disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+ error = iqs7211_write_word(iqs7211, dev_desc->sys_ctrl,
+ dev_desc->suspend);
+
+ enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+ return error;
+}
+
+static int iqs7211_resume(struct device *dev)
+{
+ struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+ const struct iqs7211_dev_desc *dev_desc = iqs7211->dev_desc;
+ __le16 sys_ctrl[] = {
+ 0,
+ cpu_to_le16(iqs7211->event_mask),
+ };
+ int error;
+
+ if (!dev_desc->suspend || device_may_wakeup(dev))
+ return 0;
+
+ disable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+ /*
+ * Forced communication, if in use, must be explicitly enabled as part
+ * of the wake-up command.
+ */
+ error = iqs7211_write_burst(iqs7211, dev_desc->sys_ctrl, sys_ctrl,
+ sizeof(sys_ctrl));
+
+ enable_irq(gpiod_to_irq(iqs7211->irq_gpio));
+
+ return error;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(iqs7211_pm, iqs7211_suspend, iqs7211_resume);
+
+static ssize_t fw_info_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct iqs7211_private *iqs7211 = dev_get_drvdata(dev);
+
+ return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n",
+ le16_to_cpu(iqs7211->ver_info.prod_num),
+ le32_to_cpu(iqs7211->ver_info.patch),
+ le16_to_cpu(iqs7211->ver_info.major),
+ le16_to_cpu(iqs7211->ver_info.minor),
+ iqs7211->exp_file[1], iqs7211->exp_file[0]);
+}
+
+static DEVICE_ATTR_RO(fw_info);
+
+static struct attribute *iqs7211_attrs[] = {
+ &dev_attr_fw_info.attr,
+ NULL
+};
+ATTRIBUTE_GROUPS(iqs7211);
+
+static const struct of_device_id iqs7211_of_match[] = {
+ {
+ .compatible = "azoteq,iqs7210a",
+ .data = &iqs7211_devs[IQS7210A],
+ },
+ {
+ .compatible = "azoteq,iqs7211a",
+ .data = &iqs7211_devs[IQS7211A],
+ },
+ {
+ .compatible = "azoteq,iqs7211e",
+ .data = &iqs7211_devs[IQS7211E],
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(of, iqs7211_of_match);
+
+static int iqs7211_probe(struct i2c_client *client)
+{
+ struct iqs7211_private *iqs7211;
+ enum iqs7211_reg_grp_id reg_grp;
+ unsigned long irq_flags;
+ bool shared_irq;
+ int error, irq;
+
+ iqs7211 = devm_kzalloc(&client->dev, sizeof(*iqs7211), GFP_KERNEL);
+ if (!iqs7211)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, iqs7211);
+ iqs7211->client = client;
+
+ INIT_LIST_HEAD(&iqs7211->reg_field_head);
+
+ iqs7211->dev_desc = device_get_match_data(&client->dev);
+ if (!iqs7211->dev_desc)
+ return -ENODEV;
+
+ shared_irq = iqs7211->dev_desc->num_ctx == IQS7211_MAX_CTX;
+
+ /*
+ * The RDY pin behaves as an interrupt, but must also be polled ahead
+ * of unsolicited I2C communication. As such, it is first opened as a
+ * GPIO and then passed to gpiod_to_irq() to register the interrupt.
+ *
+ * If an extra CTx pin is present, the RDY and MCLR pins are combined
+ * into a single bidirectional pin. In that case, the platform's GPIO
+ * must be configured as an open-drain output.
+ */
+ iqs7211->irq_gpio = devm_gpiod_get(&client->dev, "irq",
+ shared_irq ? GPIOD_OUT_LOW
+ : GPIOD_IN);
+ if (IS_ERR(iqs7211->irq_gpio)) {
+ error = PTR_ERR(iqs7211->irq_gpio);
+ dev_err(&client->dev, "Failed to request IRQ GPIO: %d\n",
+ error);
+ return error;
+ }
+
+ if (shared_irq) {
+ iqs7211->reset_gpio = iqs7211->irq_gpio;
+ } else {
+ iqs7211->reset_gpio = devm_gpiod_get_optional(&client->dev,
+ "reset",
+ GPIOD_OUT_HIGH);
+ if (IS_ERR(iqs7211->reset_gpio)) {
+ error = PTR_ERR(iqs7211->reset_gpio);
+ dev_err(&client->dev,
+ "Failed to request reset GPIO: %d\n", error);
+ return error;
+ }
+ }
+
+ error = iqs7211_start_comms(iqs7211);
+ if (error)
+ return error;
+
+ for (reg_grp = 0; reg_grp < IQS7211_NUM_REG_GRPS; reg_grp++) {
+ const char *reg_grp_name = iqs7211_reg_grp_names[reg_grp];
+ struct fwnode_handle *reg_grp_node;
+
+ if (reg_grp_name)
+ reg_grp_node = device_get_named_child_node(&client->dev,
+ reg_grp_name);
+ else
+ reg_grp_node = fwnode_handle_get(dev_fwnode(&client->dev));
+
+ if (!reg_grp_node)
+ continue;
+
+ error = iqs7211_parse_reg_grp(iqs7211, reg_grp_node, reg_grp);
+ fwnode_handle_put(reg_grp_node);
+ if (error)
+ return error;
+ }
+
+ error = iqs7211_register_kp(iqs7211);
+ if (error)
+ return error;
+
+ error = iqs7211_register_tp(iqs7211);
+ if (error)
+ return error;
+
+ error = iqs7211_init_device(iqs7211);
+ if (error)
+ return error;
+
+ irq = gpiod_to_irq(iqs7211->irq_gpio);
+ if (irq < 0)
+ return irq;
+
+ irq_flags = gpiod_is_active_low(iqs7211->irq_gpio) ? IRQF_TRIGGER_LOW
+ : IRQF_TRIGGER_HIGH;
+ irq_flags |= IRQF_ONESHOT;
+
+ error = devm_request_threaded_irq(&client->dev, irq, NULL, iqs7211_irq,
+ irq_flags, client->name, iqs7211);
+ if (error)
+ dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+
+ return error;
+}
+
+static struct i2c_driver iqs7211_i2c_driver = {
+ .probe = iqs7211_probe,
+ .driver = {
+ .name = "iqs7211",
+ .of_match_table = iqs7211_of_match,
+ .dev_groups = iqs7211_groups,
+ .pm = pm_sleep_ptr(&iqs7211_pm),
+ },
+};
+module_i2c_driver(iqs7211_i2c_driver);
+
+MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
+MODULE_DESCRIPTION("Azoteq IQS7210A/7211A/E Trackpad/Touchscreen Controller");
+MODULE_LICENSE("GPL");
static int lpc32xx_ts_probe(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
struct lpc32xx_tsc *tsc;
struct input_dev *input;
- struct resource *res;
- resource_size_t size;
int irq;
int error;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res) {
- dev_err(&pdev->dev, "Can't get memory resource\n");
- return -ENOENT;
- }
-
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
- tsc = kzalloc(sizeof(*tsc), GFP_KERNEL);
- input = input_allocate_device();
- if (!tsc || !input) {
- dev_err(&pdev->dev, "failed allocating memory\n");
- error = -ENOMEM;
- goto err_free_mem;
- }
+ tsc = devm_kzalloc(dev, sizeof(*tsc), GFP_KERNEL);
+ if (!tsc)
+ return -ENOMEM;
- tsc->dev = input;
tsc->irq = irq;
- size = resource_size(res);
-
- if (!request_mem_region(res->start, size, pdev->name)) {
- dev_err(&pdev->dev, "TSC registers are not free\n");
- error = -EBUSY;
- goto err_free_mem;
- }
+ tsc->tsc_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(tsc->tsc_base))
+ return PTR_ERR(tsc->tsc_base);
- tsc->tsc_base = ioremap(res->start, size);
- if (!tsc->tsc_base) {
- dev_err(&pdev->dev, "Can't map memory\n");
- error = -ENOMEM;
- goto err_release_mem;
- }
-
- tsc->clk = clk_get(&pdev->dev, NULL);
+ tsc->clk = devm_clk_get(dev, NULL);
if (IS_ERR(tsc->clk)) {
dev_err(&pdev->dev, "failed getting clock\n");
- error = PTR_ERR(tsc->clk);
- goto err_unmap;
+ return PTR_ERR(tsc->clk);
+ }
+
+ input = devm_input_allocate_device(dev);
+ if (!input) {
+ dev_err(&pdev->dev, "failed allocating input device\n");
+ return -ENOMEM;
}
input->name = MOD_NAME;
input->id.vendor = 0x0001;
input->id.product = 0x0002;
input->id.version = 0x0100;
- input->dev.parent = &pdev->dev;
input->open = lpc32xx_ts_open;
input->close = lpc32xx_ts_close;
- input->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
- input->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+ input_set_capability(input, EV_KEY, BTN_TOUCH);
input_set_abs_params(input, ABS_X, LPC32XX_TSC_MIN_XY_VAL,
LPC32XX_TSC_MAX_XY_VAL, 0, 0);
input_set_abs_params(input, ABS_Y, LPC32XX_TSC_MIN_XY_VAL,
LPC32XX_TSC_MAX_XY_VAL, 0, 0);
input_set_drvdata(input, tsc);
+ tsc->dev = input;
- error = request_irq(tsc->irq, lpc32xx_ts_interrupt,
- 0, pdev->name, tsc);
+ error = devm_request_irq(dev, tsc->irq, lpc32xx_ts_interrupt,
+ 0, pdev->name, tsc);
if (error) {
dev_err(&pdev->dev, "failed requesting interrupt\n");
- goto err_put_clock;
+ return error;
}
error = input_register_device(input);
if (error) {
dev_err(&pdev->dev, "failed registering input device\n");
- goto err_free_irq;
+ return error;
}
platform_set_drvdata(pdev, tsc);
- device_init_wakeup(&pdev->dev, 1);
-
- return 0;
-
-err_free_irq:
- free_irq(tsc->irq, tsc);
-err_put_clock:
- clk_put(tsc->clk);
-err_unmap:
- iounmap(tsc->tsc_base);
-err_release_mem:
- release_mem_region(res->start, size);
-err_free_mem:
- input_free_device(input);
- kfree(tsc);
-
- return error;
-}
-
-static int lpc32xx_ts_remove(struct platform_device *pdev)
-{
- struct lpc32xx_tsc *tsc = platform_get_drvdata(pdev);
- struct resource *res;
-
- free_irq(tsc->irq, tsc);
-
- input_unregister_device(tsc->dev);
-
- clk_put(tsc->clk);
-
- iounmap(tsc->tsc_base);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- release_mem_region(res->start, resource_size(res));
-
- kfree(tsc);
+ device_init_wakeup(&pdev->dev, true);
return 0;
}
static struct platform_driver lpc32xx_ts_driver = {
.probe = lpc32xx_ts_probe,
- .remove = lpc32xx_ts_remove,
.driver = {
.name = MOD_NAME,
.pm = LPC32XX_TS_PM_OPS,
ts->gpio_ce = devm_gpiod_get_optional(&client->dev,
"ce", GPIOD_OUT_LOW);
- if (IS_ERR(ts->gpio_ce)) {
- error = PTR_ERR(ts->gpio_ce);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->gpio_ce))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->gpio_ce), "Failed to get gpio\n");
error = mip4_power_on(ts);
if (error)
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/i2c.h>
#include <linux/input/mt.h>
#include <linux/input/touchscreen.h>
/* Touchscreen absolute values */
#define MMS114_MAX_AREA 0xff
+#define MMS114_MAX_TOUCHKEYS 15
#define MMS114_MAX_TOUCH 10
#define MMS114_EVENT_SIZE 8
#define MMS136_EVENT_SIZE 6
unsigned int contact_threshold;
unsigned int moving_threshold;
+ u32 keycodes[MMS114_MAX_TOUCHKEYS];
+ int num_keycodes;
+
/* Use cache data for mode control register(write only) */
u8 cache_mode_control;
};
return;
}
- if (touch->type != MMS114_TYPE_TOUCHSCREEN) {
- dev_err(&client->dev, "Wrong touch type (%d)\n", touch->type);
- return;
- }
-
id = touch->id - 1;
x = touch->x_lo | touch->x_hi << 8;
y = touch->y_lo | touch->y_hi << 8;
}
}
+static void mms114_process_touchkey(struct mms114_data *data,
+ struct mms114_touch *touch)
+{
+ struct i2c_client *client = data->client;
+ struct input_dev *input_dev = data->input_dev;
+ unsigned int keycode_id;
+
+ if (touch->id == 0)
+ return;
+
+ if (touch->id > data->num_keycodes) {
+ dev_err(&client->dev, "Wrong touch id for touchkey (%d)\n",
+ touch->id);
+ return;
+ }
+
+ keycode_id = touch->id - 1;
+ dev_dbg(&client->dev, "keycode id: %d, pressed: %d\n", keycode_id,
+ touch->pressed);
+
+ input_report_key(input_dev, data->keycodes[keycode_id], touch->pressed);
+}
+
static irqreturn_t mms114_interrupt(int irq, void *dev_id)
{
struct mms114_data *data = dev_id;
+ struct i2c_client *client = data->client;
struct input_dev *input_dev = data->input_dev;
struct mms114_touch touch[MMS114_MAX_TOUCH];
int packet_size;
if (error < 0)
goto out;
- for (index = 0; index < touch_size; index++)
- mms114_process_mt(data, touch + index);
+ for (index = 0; index < touch_size; index++) {
+ switch (touch[index].type) {
+ case MMS114_TYPE_TOUCHSCREEN:
+ mms114_process_mt(data, touch + index);
+ break;
+
+ case MMS114_TYPE_TOUCHKEY:
+ mms114_process_touchkey(data, touch + index);
+ break;
+
+ default:
+ dev_err(&client->dev, "Wrong touch type (%d)\n",
+ touch[index].type);
+ break;
+ }
+ }
input_mt_report_pointer_emulation(data->input_dev, true);
input_sync(data->input_dev);
struct input_dev *input_dev;
const void *match_data;
int error;
+ int i;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
dev_err(&client->dev, "Not supported I2C adapter\n");
data->type = (enum mms_type)match_data;
+ data->num_keycodes = device_property_count_u32(&client->dev,
+ "linux,keycodes");
+ if (data->num_keycodes == -EINVAL) {
+ data->num_keycodes = 0;
+ } else if (data->num_keycodes < 0) {
+ dev_err(&client->dev,
+ "Unable to parse linux,keycodes property: %d\n",
+ data->num_keycodes);
+ return data->num_keycodes;
+ } else if (data->num_keycodes > MMS114_MAX_TOUCHKEYS) {
+ dev_warn(&client->dev,
+ "Found %d linux,keycodes but max is %d, ignoring the rest\n",
+ data->num_keycodes, MMS114_MAX_TOUCHKEYS);
+ data->num_keycodes = MMS114_MAX_TOUCHKEYS;
+ }
+
+ if (data->num_keycodes > 0) {
+ error = device_property_read_u32_array(&client->dev,
+ "linux,keycodes",
+ data->keycodes,
+ data->num_keycodes);
+ if (error) {
+ dev_err(&client->dev,
+ "Unable to read linux,keycodes values: %d\n",
+ error);
+ return error;
+ }
+
+ input_dev->keycode = data->keycodes;
+ input_dev->keycodemax = data->num_keycodes;
+ input_dev->keycodesize = sizeof(data->keycodes[0]);
+ for (i = 0; i < data->num_keycodes; i++)
+ input_set_capability(input_dev,
+ EV_KEY, data->keycodes[i]);
+ }
+
input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
input_set_abs_params(input_dev, ABS_MT_PRESSURE, 0, 255, 0, 0);
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * Driver for Novatek i2c touchscreen controller as found on
- * the Acer Iconia One 7 B1-750 tablet. The Touchscreen controller
- * model-number is unknown. Android calls this a "NVT-ts" touchscreen,
- * but that may apply to other Novatek controller models too.
+ * Driver for Novatek NT11205 i2c touchscreen controller as found
+ * on the Acer Iconia One 7 B1-750 tablet.
*
* Copyright (c) 2023 Hans de Goede <hdegoede@redhat.com>
*/
error = input_register_device(input);
if (error) {
- dev_err(dev, "failed to request irq: %d\n", error);
+ dev_err(dev, "failed to register input device: %d\n", error);
return error;
}
module_i2c_driver(nvt_ts_driver);
-MODULE_DESCRIPTION("Novatek NVT-ts touchscreen driver");
+MODULE_DESCRIPTION("Novatek NT11205 touchscreen driver");
MODULE_AUTHOR("Hans de Goede <hdegoede@redhat.com>");
MODULE_LICENSE("GPL");
#include <linux/input/mt.h>
#include <linux/input/touchscreen.h>
#include <linux/interrupt.h>
-#include <linux/of_device.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/slab.h>
#define PIXCIR_MAX_SLOTS 5 /* Max fingers supported by driver */
input_set_drvdata(input, tsdata);
tsdata->gpio_attb = devm_gpiod_get(dev, "attb", GPIOD_IN);
- if (IS_ERR(tsdata->gpio_attb)) {
- error = PTR_ERR(tsdata->gpio_attb);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to request ATTB gpio: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(tsdata->gpio_attb))
+ return dev_err_probe(dev, PTR_ERR(tsdata->gpio_attb),
+ "Failed to request ATTB gpio\n");
tsdata->gpio_reset = devm_gpiod_get_optional(dev, "reset",
GPIOD_OUT_LOW);
- if (IS_ERR(tsdata->gpio_reset)) {
- error = PTR_ERR(tsdata->gpio_reset);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to request RESET gpio: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(tsdata->gpio_reset))
+ return dev_err_probe(dev, PTR_ERR(tsdata->gpio_reset),
+ "Failed to request RESET gpio\n");
tsdata->gpio_wake = devm_gpiod_get_optional(dev, "wake",
GPIOD_OUT_HIGH);
- if (IS_ERR(tsdata->gpio_wake)) {
- error = PTR_ERR(tsdata->gpio_wake);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get wake gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(tsdata->gpio_wake))
+ return dev_err_probe(dev, PTR_ERR(tsdata->gpio_wake),
+ "Failed to get wake gpio\n");
tsdata->gpio_enable = devm_gpiod_get_optional(dev, "enable",
GPIOD_OUT_HIGH);
- if (IS_ERR(tsdata->gpio_enable)) {
- error = PTR_ERR(tsdata->gpio_enable);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "Failed to get enable gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(tsdata->gpio_enable))
+ return dev_err_probe(dev, PTR_ERR(tsdata->gpio_enable),
+ "Failed to get enable gpio\n");
if (tsdata->gpio_enable)
msleep(100);
i2c_set_clientdata(client, ts);
ts->avdd = devm_regulator_get(&client->dev, "avdd");
- if (IS_ERR(ts->avdd)) {
- error = PTR_ERR(ts->avdd);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get 'avdd' regulator: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->avdd))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->avdd),
+ "Failed to get 'avdd' regulator\n");
ts->vccio = devm_regulator_get(&client->dev, "vccio");
- if (IS_ERR(ts->vccio)) {
- error = PTR_ERR(ts->vccio);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get 'vccio' regulator: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->vccio))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->vccio),
+ "Failed to get 'vccio' regulator\n");
ts->reset_gpio = devm_gpiod_get_optional(&client->dev, "reset",
GPIOD_OUT_LOW);
- if (IS_ERR(ts->reset_gpio)) {
- error = PTR_ERR(ts->reset_gpio);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "failed to get reset gpio: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->reset_gpio))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+ "Failed to get reset gpio\n");
error = raydium_i2c_power_on(ts);
if (error)
/* get the channels from IIO device */
st->iio_chans = devm_iio_channel_get_all(dev);
- if (IS_ERR(st->iio_chans)) {
- error = PTR_ERR(st->iio_chans);
- if (error != -EPROBE_DEFER)
- dev_err(dev, "can't get iio channels.\n");
- return error;
- }
+ if (IS_ERR(st->iio_chans))
+ return dev_err_probe(dev, PTR_ERR(st->iio_chans), "can't get iio channels\n");
if (!device_property_present(dev, "io-channel-names"))
return -ENODEV;
/* Power GPIO pin */
data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW);
- if (IS_ERR(data->gpio_power)) {
- if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER)
- dev_err(dev, "Shutdown GPIO request failed\n");
- return PTR_ERR(data->gpio_power);
- }
+ if (IS_ERR(data->gpio_power))
+ return dev_err_probe(dev, PTR_ERR(data->gpio_power),
+ "Shutdown GPIO request failed\n");
error = silead_ts_setup(client);
if (error)
ts->attn_gpio = devm_gpiod_get_optional(&client->dev,
"attn", GPIOD_IN);
- if (IS_ERR(ts->attn_gpio)) {
- error = PTR_ERR(ts->attn_gpio);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get attention GPIO: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->attn_gpio))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->attn_gpio),
+ "Failed to get attention GPIO\n");
ts->reset_gpio = devm_gpiod_get_optional(&client->dev,
"reset", GPIOD_OUT_LOW);
- if (IS_ERR(ts->reset_gpio)) {
- error = PTR_ERR(ts->reset_gpio);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev,
- "Failed to get reset GPIO: %d\n", error);
- return error;
- }
+ if (IS_ERR(ts->reset_gpio))
+ return dev_err_probe(&client->dev, PTR_ERR(ts->reset_gpio),
+ "Failed to get reset GPIO\n");
sis_ts_reset(ts);
*/
static int surface3_spi_get_gpio_config(struct surface3_ts_data *data)
{
- int error;
struct device *dev;
struct gpio_desc *gpiod;
int i;
/* Get the reset lines GPIO pin number */
for (i = 0; i < 2; i++) {
gpiod = devm_gpiod_get_index(dev, NULL, i, GPIOD_OUT_LOW);
- if (IS_ERR(gpiod)) {
- error = PTR_ERR(gpiod);
- if (error != -EPROBE_DEFER)
- dev_err(dev,
- "Failed to get power GPIO %d: %d\n",
- i,
- error);
- return error;
- }
+ if (IS_ERR(gpiod))
+ return dev_err_probe(dev, PTR_ERR(gpiod),
+ "Failed to get power GPIO %d\n", i);
data->gpiod_rst[i] = gpiod;
}
sx8654->gpio_reset = devm_gpiod_get_optional(&client->dev, "reset",
GPIOD_OUT_HIGH);
- if (IS_ERR(sx8654->gpio_reset)) {
- error = PTR_ERR(sx8654->gpio_reset);
- if (error != -EPROBE_DEFER)
- dev_err(&client->dev, "unable to get reset-gpio: %d\n",
- error);
- return error;
- }
+ if (IS_ERR(sx8654->gpio_reset))
+ return dev_err_probe(&client->dev, PTR_ERR(sx8654->gpio_reset),
+ "unable to get reset-gpio\n");
dev_dbg(&client->dev, "got GPIO reset pin\n");
sx8654->data = device_get_match_data(&client->dev);
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/sort.h>
#include <linux/pm_wakeirq.h>
#include <linux/io.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
+#include <linux/of.h>
#define INTR_STAT_OFS 0x0
#define INTR_SET_OFS 0x8
#include <linux/mailbox_controller.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#define INTR_STAT_OFS 0x0
#define INTR_SET_OFS 0x8
mbox->dev = dev;
platform_set_drvdata(pdev, mbox);
- /* Get resource for registers */
- iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ /* Get resource for registers and map registers of all rings */
+ mbox->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &iomem);
if (!iomem || (resource_size(iomem) < RING_REGS_SIZE)) {
ret = -ENODEV;
goto fail;
- }
-
- /* Map registers of all rings */
- mbox->regs = devm_ioremap_resource(&pdev->dev, iomem);
- if (IS_ERR(mbox->regs)) {
+ } else if (IS_ERR(mbox->regs)) {
ret = PTR_ERR(mbox->regs);
goto fail;
}
* pdc_tx_list_sg_add() - Add the buffers in a scatterlist to the transmit
* descriptors for a given SPU. The scatterlist buffers contain the data for a
* SPU request message.
- * @spu_idx: The index of the SPU to submit the request to, [0, max_spu)
+ * @pdcs: PDC state for the SPU that will process this request
* @sg: Scatterlist whose buffers contain part of the SPU request
*
* If a scatterlist buffer is larger than PDC_DMA_BUF_MAX, multiple descriptors
* pdc_rx_list_sg_add() - Add the buffers in a scatterlist to the receive
* descriptors for a given SPU. The caller must have already DMA mapped the
* scatterlist.
- * @spu_idx: Indicates which SPU the buffers are for
+ * @pdcs: PDC state for the SPU that will process this request
* @sg: Scatterlist whose buffers are added to the receive ring
*
* If a receive buffer in the scatterlist is larger than PDC_DMA_BUF_MAX,
/**
* pdc_tasklet_cb() - Tasklet callback that runs the deferred processing after
* a DMA receive interrupt. Reenables the receive interrupt.
- * @data: PDC state structure
+ * @t: Pointer to the Altera sSGDMA channel structure
*/
static void pdc_tasklet_cb(struct tasklet_struct *t)
{
if (err)
goto cleanup_ring_pool;
- pdc_regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!pdc_regs) {
- err = -ENODEV;
- goto cleanup_ring_pool;
- }
- dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
- &pdc_regs->start, &pdc_regs->end);
-
- pdcs->pdc_reg_vbase = devm_ioremap_resource(&pdev->dev, pdc_regs);
+ pdcs->pdc_reg_vbase = devm_platform_get_and_ioremap_resource(pdev, 0, &pdc_regs);
if (IS_ERR(pdcs->pdc_reg_vbase)) {
err = PTR_ERR(pdcs->pdc_reg_vbase);
goto cleanup_ring_pool;
}
+ dev_dbg(dev, "PDC register region res.start = %pa, res.end = %pa",
+ &pdc_regs->start, &pdc_regs->end);
/* create rx buffer pool after dt read to know how big buffers are */
err = pdc_rx_buf_pool_create(pdcs);
#include <linux/iopoll.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/kfifo.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/suspend.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/mailbox_controller.h>
#include <soc/microchip/mpfs.h>
return -ENOMEM;
/* It's okay for MMIO to be NULL */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- tdev->tx_mmio = devm_ioremap_resource(&pdev->dev, res);
+ tdev->tx_mmio = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (PTR_ERR(tdev->tx_mmio) == -EBUSY) {
/* if reserved area in SRAM, try just ioremap */
size = resource_size(res);
}
/* If specified, second reg entry is Rx MMIO */
- res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
- tdev->rx_mmio = devm_ioremap_resource(&pdev->dev, res);
+ tdev->rx_mmio = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
if (PTR_ERR(tdev->rx_mmio) == -EBUSY) {
size = resource_size(res);
tdev->rx_mmio = devm_ioremap(&pdev->dev, res->start, size);
tdev->tx_channel = mbox_test_request_channel(pdev, "tx");
tdev->rx_channel = mbox_test_request_channel(pdev, "rx");
- if (!tdev->tx_channel && !tdev->rx_channel)
+ if (IS_ERR_OR_NULL(tdev->tx_channel) && IS_ERR_OR_NULL(tdev->rx_channel))
return -EPROBE_DEFER;
/* If Rx is not specified but has Rx MMIO, then Rx = Tx */
#include <linux/bitops.h>
#include <linux/mailbox_client.h>
#include <linux/mailbox_controller.h>
+#include <linux/of.h>
#include "mailbox.h"
#include <linux/kernel.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/slab.h>
struct mtk_adsp_mbox_priv {
#include <linux/platform_device.h>
#include <linux/mailbox_controller.h>
#include <linux/mailbox/mtk-cmdq-mailbox.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#define CMDQ_OP_CODE_MASK (0xff << CMDQ_OP_CODE_SHIFT)
#define CMDQ_NUM_CMD(t) (t->cmd_buf_size / CMDQ_INST_SIZE)
#include <linux/kfifo.h>
#include <linux/err.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/omap-mailbox.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/mailbox_controller.h>
for (i = 0; i < MHU_CHANS; i++) {
mhu->chan[i].con_priv = &mhu->mlink[i];
mhu->mlink[i].irq = platform_get_irq(pdev, i);
- if (mhu->mlink[i].irq < 0) {
- dev_err(dev, "failed to get irq%d\n", i);
+ if (mhu->mlink[i].irq < 0)
return mhu->mlink[i].irq;
- }
mhu->mlink[i].rx_reg = mhu->base + platform_mhu_reg[i];
mhu->mlink[i].tx_reg = mhu->mlink[i].rx_reg + TX_REG_OFFSET;
}
ret = of_parse_phandle_with_args(client_dn, "mboxes",
"#mbox-cells", j, &curr_ph);
of_node_put(curr_ph.np);
- if (!ret && curr_ph.np == controller_dn) {
+ if (!ret && curr_ph.np == controller_dn)
ipcc->num_chans++;
- break;
- }
}
}
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/mailbox_controller.h>
+#include <linux/of.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#define MAILBOX_A2B_INTEN 0x00
mb->mbox.ops = &rockchip_mbox_chan_ops;
mb->mbox.txdone_irq = true;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENODEV;
-
- mb->mbox_base = devm_ioremap_resource(&pdev->dev, res);
+ mb->mbox_base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
if (IS_ERR(mb->mbox_base))
return PTR_ERR(mb->mbox_base);
#include <linux/io.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/mailbox_controller.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_wakeirq.h>
#include <linux/io.h>
#include <linux/mailbox_controller.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/slab.h>
static int tegra_hsp_probe(struct platform_device *pdev)
{
struct tegra_hsp *hsp;
- struct resource *res;
unsigned int i;
u32 value;
int err;
INIT_LIST_HEAD(&hsp->doorbells);
spin_lock_init(&hsp->lock);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- hsp->regs = devm_ioremap_resource(&pdev->dev, res);
+ hsp->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hsp->regs))
return PTR_ERR(hsp->regs);
struct device *dev = &pdev->dev;
const struct of_device_id *of_id;
struct device_node *np;
- struct resource *res;
const struct ti_msgmgr_desc *desc;
struct ti_msgmgr_inst *inst;
struct ti_queue_inst *qinst;
inst->dev = dev;
inst->desc = desc;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- desc->data_region_name);
- inst->queue_proxy_region = devm_ioremap_resource(dev, res);
+ inst->queue_proxy_region =
+ devm_platform_ioremap_resource_byname(pdev, desc->data_region_name);
if (IS_ERR(inst->queue_proxy_region))
return PTR_ERR(inst->queue_proxy_region);
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- desc->status_region_name);
- inst->queue_state_debug_region = devm_ioremap_resource(dev, res);
+ inst->queue_state_debug_region =
+ devm_platform_ioremap_resource_byname(pdev, desc->status_region_name);
if (IS_ERR(inst->queue_state_debug_region))
return PTR_ERR(inst->queue_state_debug_region);
if (desc->is_sproxy) {
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- desc->ctrl_region_name);
- inst->queue_ctrl_region = devm_ioremap_resource(dev, res);
+ inst->queue_ctrl_region =
+ devm_platform_ioremap_resource_byname(pdev, desc->ctrl_region_name);
if (IS_ERR(inst->queue_ctrl_region))
return PTR_ERR(inst->queue_ctrl_region);
}
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
#include <linux/platform_device.h>
/* IPI agent ID any */
{
struct ksz_device *dev = ds->priv;
- if (dev->chip_id == KSZ8830_CHIP_ID) {
+ switch (dev->chip_id) {
+ case KSZ8830_CHIP_ID:
/* Silicon Errata Sheet (DS80000830A):
* Port 1 does not work with LinkMD Cable-Testing.
* Port 1 does not respond to received PAUSE control frames.
*/
if (!port)
return MICREL_KSZ8_P1_ERRATA;
+ break;
+ case KSZ9477_CHIP_ID:
+ /* KSZ9477 Errata DS80000754C
+ *
+ * Module 4: Energy Efficient Ethernet (EEE) feature select must
+ * be manually disabled
+ * The EEE feature is enabled by default, but it is not fully
+ * operational. It must be manually disabled through register
+ * controls. If not disabled, the PHY ports can auto-negotiate
+ * to enable EEE, and this feature can cause link drops when
+ * linked to another device supporting EEE.
+ */
+ return MICREL_NO_EEE;
}
return 0;
int max_frame_mem;
int num_ports;
bool multiple_cascade_ports;
+ /* Every {port, TXQ} has its own CBS shaper */
+ bool fixed_cbs_mapping;
enum dsa_tag_protocol tag_proto;
const struct sja1105_dynamic_table_ops *dyn_ops;
const struct sja1105_table_ops *static_ops;
}
#define BYTES_PER_KBIT (1000LL / 8)
+/* Port 0 (the uC port) does not have CBS shapers */
+#define SJA1110_FIXED_CBS(port, prio) ((((port) - 1) * SJA1105_NUM_TC) + (prio))
+
+static int sja1105_find_cbs_shaper(struct sja1105_private *priv,
+ int port, int prio)
+{
+ int i;
+
+ if (priv->info->fixed_cbs_mapping) {
+ i = SJA1110_FIXED_CBS(port, prio);
+ if (i >= 0 && i < priv->info->num_cbs_shapers)
+ return i;
+
+ return -1;
+ }
+
+ for (i = 0; i < priv->info->num_cbs_shapers; i++)
+ if (priv->cbs[i].port == port && priv->cbs[i].prio == prio)
+ return i;
+
+ return -1;
+}
static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
{
int i;
+ if (priv->info->fixed_cbs_mapping)
+ return -1;
+
for (i = 0; i < priv->info->num_cbs_shapers; i++)
if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
return i;
{
struct sja1105_private *priv = ds->priv;
struct sja1105_cbs_entry *cbs;
+ s64 port_transmit_rate_kbps;
int index;
if (!offload->enable)
return sja1105_delete_cbs_shaper(priv, port, offload->queue);
- index = sja1105_find_unused_cbs_shaper(priv);
- if (index < 0)
- return -ENOSPC;
+ /* The user may be replacing an existing shaper */
+ index = sja1105_find_cbs_shaper(priv, port, offload->queue);
+ if (index < 0) {
+ /* That isn't the case - see if we can allocate a new one */
+ index = sja1105_find_unused_cbs_shaper(priv);
+ if (index < 0)
+ return -ENOSPC;
+ }
cbs = &priv->cbs[index];
cbs->port = port;
*/
cbs->credit_hi = offload->hicredit;
cbs->credit_lo = abs(offload->locredit);
- /* User space is in kbits/sec, hardware in bytes/sec */
- cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
- cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
+ /* User space is in kbits/sec, while the hardware in bytes/sec times
+ * link speed. Since the given offload->sendslope is good only for the
+ * current link speed anyway, and user space is likely to reprogram it
+ * when that changes, don't even bother to track the port's link speed,
+ * but deduce the port transmit rate from idleslope - sendslope.
+ */
+ port_transmit_rate_kbps = offload->idleslope - offload->sendslope;
+ cbs->idle_slope = div_s64(offload->idleslope * BYTES_PER_KBIT,
+ port_transmit_rate_kbps);
+ cbs->send_slope = div_s64(abs(offload->sendslope * BYTES_PER_KBIT),
+ port_transmit_rate_kbps);
/* Convert the negative values from 64-bit 2's complement
* to 32-bit 2's complement (for the case of 0x80000000 whose
* negative is still negative).
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
+ .fixed_cbs_mapping = true,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
+ .fixed_cbs_mapping = true,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
+ .fixed_cbs_mapping = true,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
.tag_proto = DSA_TAG_PROTO_SJA1110,
.can_limit_mcast_flood = true,
.multiple_cascade_ports = true,
+ .fixed_cbs_mapping = true,
.ptp_ts_bits = 32,
.ptpegr_ts_bytes = 8,
.max_frame_mem = SJA1110_MAX_FRAME_MEMORY,
return;
si = enetc_psi_create(pdev);
- if (si)
+ if (!IS_ERR(si))
enetc_psi_destroy(pdev);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
if (!skb)
return -1;
- skb_shinfo(rx->ctx.skb_tail)->frag_list = skb;
+ if (rx->ctx.skb_tail == rx->ctx.skb_head)
+ skb_shinfo(rx->ctx.skb_head)->frag_list = skb;
+ else
+ rx->ctx.skb_tail->next = skb;
rx->ctx.skb_tail = skb;
num_frags = 0;
}
u8 max_tc; /* Total number of TCs */
u8 num_tc; /* Total number of enabled TCs */
bool mqprio_active;
+ bool dcb_ets_active;
};
#define HNAE3_MAX_DSCP 64
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(h->pdev);
struct hnae3_dev_specs *dev_specs = &ae_dev->dev_specs;
struct hnae3_knic_private_info *kinfo = &h->kinfo;
+ struct net_device *dev = kinfo->netdev;
*pos += scnprintf(buf + *pos, len - *pos, "dev_spec:\n");
*pos += scnprintf(buf + *pos, len - *pos, "MAC entry num: %u\n",
dev_specs->mc_mac_size);
*pos += scnprintf(buf + *pos, len - *pos, "MAC statistics number: %u\n",
dev_specs->mac_stats_num);
+ *pos += scnprintf(buf + *pos, len - *pos,
+ "TX timeout threshold: %d seconds\n",
+ dev->watchdog_timeo / HZ);
}
static int hns3_dbg_dev_info(struct hnae3_handle *h, char *buf, int len)
return 0;
out:
- mutex_destroy(&handle->dbgfs_lock);
debugfs_remove_recursive(handle->hnae3_dbgfs);
handle->hnae3_dbgfs = NULL;
+ mutex_destroy(&handle->dbgfs_lock);
return ret;
}
{
u32 i;
+ debugfs_remove_recursive(handle->hnae3_dbgfs);
+ handle->hnae3_dbgfs = NULL;
+
for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
if (handle->dbgfs_buf[i]) {
kvfree(handle->dbgfs_buf[i]);
}
mutex_destroy(&handle->dbgfs_lock);
- debugfs_remove_recursive(handle->hnae3_dbgfs);
- handle->hnae3_dbgfs = NULL;
}
void hns3_dbg_register_debugfs(const char *debugfs_dir_name)
*/
if (test_bit(HNS3_NIC_STATE_TX_PUSH_ENABLE, &priv->state) && num &&
!ring->pending_buf && num <= HNS3_MAX_PUSH_BD_NUM && doorbell) {
+ /* This smp_store_release() pairs with smp_load_aquire() in
+ * hns3_nic_reclaim_desc(). Ensure that the BD valid bit
+ * is updated.
+ */
+ smp_store_release(&ring->last_to_use, ring->next_to_use);
hns3_tx_push_bd(ring, num);
- WRITE_ONCE(ring->last_to_use, ring->next_to_use);
return;
}
return;
}
+ /* This smp_store_release() pairs with smp_load_aquire() in
+ * hns3_nic_reclaim_desc(). Ensure that the BD valid bit is updated.
+ */
+ smp_store_release(&ring->last_to_use, ring->next_to_use);
+
if (ring->tqp->mem_base)
hns3_tx_mem_doorbell(ring);
else
ring->tqp->io_base + HNS3_RING_TX_RING_TAIL_REG);
ring->pending_buf = 0;
- WRITE_ONCE(ring->last_to_use, ring->next_to_use);
}
static void hns3_tsyn(struct net_device *netdev, struct sk_buff *skb,
netdev->priv_flags |= IFF_UNICAST_FLT;
- netdev->gso_partial_features |= NETIF_F_GSO_GRE_CSUM;
-
netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
static bool hns3_nic_reclaim_desc(struct hns3_enet_ring *ring,
int *bytes, int *pkts, int budget)
{
- /* pair with ring->last_to_use update in hns3_tx_doorbell(),
- * smp_store_release() is not used in hns3_tx_doorbell() because
- * the doorbell operation already have the needed barrier operation.
+ /* This smp_load_acquire() pairs with smp_store_release() in
+ * hns3_tx_doorbell().
*/
int ltu = smp_load_acquire(&ring->last_to_use);
int ntc = ring->next_to_clean;
hns3_get_ksettings(h, cmd);
break;
case HNAE3_MEDIA_TYPE_FIBER:
- if (module_type == HNAE3_MODULE_TYPE_CR)
+ if (module_type == HNAE3_MODULE_TYPE_UNKNOWN)
+ cmd->base.port = PORT_OTHER;
+ else if (module_type == HNAE3_MODULE_TYPE_CR)
cmd->base.port = PORT_DA;
else
cmd->base.port = PORT_FIBRE;
int ret;
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
- hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+ h->kinfo.tc_info.mqprio_active)
return -EINVAL;
ret = hclge_ets_validate(hdev, ets, &num_tc, &map_changed);
}
hclge_tm_schd_info_update(hdev, num_tc);
- if (num_tc > 1)
- hdev->flag |= HCLGE_FLAG_DCB_ENABLE;
- else
- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
+ h->kinfo.tc_info.dcb_ets_active = num_tc > 1;
ret = hclge_ieee_ets_to_tm_info(hdev, ets);
if (ret)
struct hclge_vport *vport = hclge_get_vport(h);
struct hclge_dev *hdev = vport->back;
- if (hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE)
+ if (h->kinfo.tc_info.mqprio_active)
return 0;
return hdev->dcbx_cap;
if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
return -EBUSY;
- if (hdev->flag & HCLGE_FLAG_DCB_ENABLE)
+ kinfo = &vport->nic.kinfo;
+ if (kinfo->tc_info.dcb_ets_active)
return -EINVAL;
ret = hclge_mqprio_qopt_check(hdev, mqprio_qopt);
if (ret)
return ret;
- kinfo = &vport->nic.kinfo;
memcpy(&old_tc_info, &kinfo->tc_info, sizeof(old_tc_info));
hclge_sync_mqprio_qopt(&kinfo->tc_info, mqprio_qopt);
kinfo->tc_info.mqprio_active = tc > 0;
if (ret)
goto err_out;
- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE;
-
- if (tc > 1)
- hdev->flag |= HCLGE_FLAG_MQPRIO_ENABLE;
- else
- hdev->flag &= ~HCLGE_FLAG_MQPRIO_ENABLE;
-
return hclge_notify_init_up(hdev);
err_out:
struct hclge_desc desc[3];
int pos = 0;
int ret, i;
- u32 *req;
+ __le32 *req;
hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_FD_TCAM_OP, true);
desc[0].flag |= cpu_to_le16(HCLGE_COMM_CMD_FLAG_NEXT);
tcam_msg.loc);
/* tcam_data0 ~ tcam_data1 */
- req = (u32 *)req1->tcam_data;
+ req = (__le32 *)req1->tcam_data;
for (i = 0; i < 2; i++)
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
- "%08x\n", *req++);
+ "%08x\n", le32_to_cpu(*req++));
/* tcam_data2 ~ tcam_data7 */
- req = (u32 *)req2->tcam_data;
+ req = (__le32 *)req2->tcam_data;
for (i = 0; i < 6; i++)
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
- "%08x\n", *req++);
+ "%08x\n", le32_to_cpu(*req++));
/* tcam_data8 ~ tcam_data12 */
- req = (u32 *)req3->tcam_data;
+ req = (__le32 *)req3->tcam_data;
for (i = 0; i < 5; i++)
pos += scnprintf(tcam_buf + pos, HCLGE_DBG_TCAM_BUF_SIZE - pos,
- "%08x\n", *req++);
+ "%08x\n", le32_to_cpu(*req++));
return ret;
}
static void hclge_info_show(struct hclge_dev *hdev)
{
+ struct hnae3_handle *handle = &hdev->vport->nic;
struct device *dev = &hdev->pdev->dev;
dev_info(dev, "PF info begin:\n");
dev_info(dev, "This is %s PF\n",
hdev->flag & HCLGE_FLAG_MAIN ? "main" : "not main");
dev_info(dev, "DCB %s\n",
- hdev->flag & HCLGE_FLAG_DCB_ENABLE ? "enable" : "disable");
+ handle->kinfo.tc_info.dcb_ets_active ? "enable" : "disable");
dev_info(dev, "MQPRIO %s\n",
- hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE ? "enable" : "disable");
+ handle->kinfo.tc_info.mqprio_active ? "enable" : "disable");
dev_info(dev, "Default tx spare buffer size: %u\n",
hdev->tx_spare_buf_size);
#define HCLGE_FLAG_MAIN BIT(0)
#define HCLGE_FLAG_DCB_CAPABLE BIT(1)
-#define HCLGE_FLAG_DCB_ENABLE BIT(2)
-#define HCLGE_FLAG_MQPRIO_ENABLE BIT(3)
u32 flag;
u32 pkt_buf_size; /* Total pf buf size for tx/rx */
/* TX/RX descriptor defines */
#define IGB_DEFAULT_TXD 256
#define IGB_DEFAULT_TX_WORK 128
-#define IGB_MIN_TXD 80
+#define IGB_MIN_TXD 64
#define IGB_MAX_TXD 4096
#define IGB_DEFAULT_RXD 256
-#define IGB_MIN_RXD 80
+#define IGB_MIN_RXD 64
#define IGB_MAX_RXD 4096
#define IGB_DEFAULT_ITR 3 /* dynamic */
struct pci_dev *pdev = adapter->pdev;
struct e1000_hw *hw = &adapter->hw;
- /* Virtualization features not supported on i210 family. */
- if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211))
+ /* Virtualization features not supported on i210 and 82580 family. */
+ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211) ||
+ (hw->mac.type == e1000_82580))
return;
/* Of the below we really only want the effect of getting
/* Tx/Rx descriptor defines */
#define IGBVF_DEFAULT_TXD 256
#define IGBVF_MAX_TXD 4096
-#define IGBVF_MIN_TXD 80
+#define IGBVF_MIN_TXD 64
#define IGBVF_DEFAULT_RXD 256
#define IGBVF_MAX_RXD 4096
-#define IGBVF_MIN_RXD 80
+#define IGBVF_MIN_RXD 64
#define IGBVF_MIN_ITR_USECS 10 /* 100000 irq/sec */
#define IGBVF_MAX_ITR_USECS 10000 /* 100 irq/sec */
/* TX/RX descriptor defines */
#define IGC_DEFAULT_TXD 256
#define IGC_DEFAULT_TX_WORK 128
-#define IGC_MIN_TXD 80
+#define IGC_MIN_TXD 64
#define IGC_MAX_TXD 4096
#define IGC_DEFAULT_RXD 256
-#define IGC_MIN_RXD 80
+#define IGC_MIN_RXD 64
#define IGC_MAX_RXD 4096
/* Supported Rx Buffer Sizes */
return 0;
}
+static void nix_get_aq_req_smq(struct rvu *rvu, struct nix_aq_enq_req *req,
+ u16 *smq, u16 *smq_mask)
+{
+ struct nix_cn10k_aq_enq_req *aq_req;
+
+ if (!is_rvu_otx2(rvu)) {
+ aq_req = (struct nix_cn10k_aq_enq_req *)req;
+ *smq = aq_req->sq.smq;
+ *smq_mask = aq_req->sq_mask.smq;
+ } else {
+ *smq = req->sq.smq;
+ *smq_mask = req->sq_mask.smq;
+ }
+}
+
static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
struct nix_aq_enq_req *req,
struct nix_aq_enq_rsp *rsp)
struct rvu_block *block;
struct admin_queue *aq;
struct rvu_pfvf *pfvf;
+ u16 smq, smq_mask;
void *ctx, *mask;
bool ena;
u64 cfg;
if (rc)
return rc;
+ nix_get_aq_req_smq(rvu, req, &smq, &smq_mask);
/* Check if SQ pointed SMQ belongs to this PF/VF or not */
if (req->ctype == NIX_AQ_CTYPE_SQ &&
((req->op == NIX_AQ_INSTOP_INIT && req->sq.ena) ||
(req->op == NIX_AQ_INSTOP_WRITE &&
- req->sq_mask.ena && req->sq_mask.smq && req->sq.ena))) {
+ req->sq_mask.ena && req->sq.ena && smq_mask))) {
if (!is_valid_txschq(rvu, blkaddr, NIX_TXSCH_LVL_SMQ,
- pcifunc, req->sq.smq))
+ pcifunc, smq))
return NIX_AF_ERR_AQ_ENQUEUE;
}
if (err)
return err;
- if (mlx5e_is_eswitch_flow(parse_state->flow))
+ if (mlx5e_is_eswitch_flow(parse_state->flow)) {
attr->esw_attr->split_count = attr->esw_attr->out_count;
+ parse_state->if_count = 0;
+ }
attr->flags |= MLX5_ATTR_FLAG_CT;
if (err)
return err;
+ parse_state->if_count = 0;
esw_attr->out_count++;
return 0;
}
attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
- if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
esw_attr->split_count = esw_attr->out_count;
+ parse_state->if_count = 0;
+ }
return 0;
}
if (err)
return err;
+ parse_state->if_count = 0;
esw_attr->out_count++;
return 0;
return err;
esw_attr->split_count = esw_attr->out_count;
+ parse_state->if_count = 0;
return 0;
}
if (err)
return err;
- if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
attr->esw_attr->split_count = attr->esw_attr->out_count;
+ parse_state->if_count = 0;
+ }
return 0;
}
}
i_split = i + 1;
+ parse_state->if_count = 0;
list_add(&attr->list, &flow->attrs);
}
mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw,
enum mlx5_eswitch_vport_event enabled_events)
{
+ bool pf_needed;
int ret;
+ pf_needed = mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ esw->mode == MLX5_ESWITCH_LEGACY;
+
/* Enable PF vport */
- ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events);
- if (ret)
- return ret;
+ if (pf_needed) {
+ ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF,
+ enabled_events);
+ if (ret)
+ return ret;
+ }
/* Enable external host PF HCA */
ret = host_pf_enable_hca(esw->dev);
ecpf_err:
host_pf_disable_hca(esw->dev);
pf_hca_err:
- mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+ if (pf_needed)
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
return ret;
}
}
host_pf_disable_hca(esw->dev);
- mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
+
+ if (mlx5_core_is_ecpf_esw_manager(esw->dev) ||
+ esw->mode == MLX5_ESWITCH_LEGACY)
+ mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF);
}
static void mlx5_eswitch_get_devlink_param(struct mlx5_eswitch *esw)
esw_acl_ingress_ofld_cleanup(esw, vport);
}
-static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw)
{
- struct mlx5_vport *vport;
+ struct mlx5_vport *uplink, *manager;
+ int ret;
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- if (IS_ERR(vport))
- return PTR_ERR(vport);
+ uplink = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ if (IS_ERR(uplink))
+ return PTR_ERR(uplink);
+
+ ret = esw_vport_create_offloads_acl_tables(esw, uplink);
+ if (ret)
+ return ret;
+
+ manager = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (IS_ERR(manager)) {
+ ret = PTR_ERR(manager);
+ goto err_manager;
+ }
- return esw_vport_create_offloads_acl_tables(esw, vport);
+ ret = esw_vport_create_offloads_acl_tables(esw, manager);
+ if (ret)
+ goto err_manager;
+
+ return 0;
+
+err_manager:
+ esw_vport_destroy_offloads_acl_tables(esw, uplink);
+ return ret;
}
-static void esw_destroy_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
+static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
{
struct mlx5_vport *vport;
- vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
- if (IS_ERR(vport))
- return;
+ vport = mlx5_eswitch_get_vport(esw, esw->manager_vport);
+ if (!IS_ERR(vport))
+ esw_vport_destroy_offloads_acl_tables(esw, vport);
- esw_vport_destroy_offloads_acl_tables(esw, vport);
+ vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ if (!IS_ERR(vport))
+ esw_vport_destroy_offloads_acl_tables(esw, vport);
}
int mlx5_eswitch_reload_reps(struct mlx5_eswitch *esw)
}
esw->fdb_table.offloads.indir = indir;
- err = esw_create_uplink_offloads_acl_tables(esw);
+ err = esw_create_offloads_acl_tables(esw);
if (err)
goto create_acl_err;
create_restore_err:
esw_destroy_offloads_table(esw);
create_offloads_err:
- esw_destroy_uplink_offloads_acl_tables(esw);
+ esw_destroy_offloads_acl_tables(esw);
create_acl_err:
mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
create_indir_err:
esw_destroy_offloads_fdb_tables(esw);
esw_destroy_restore_table(esw);
esw_destroy_offloads_table(esw);
- esw_destroy_uplink_offloads_acl_tables(esw);
+ esw_destroy_offloads_acl_tables(esw);
mlx5_esw_indir_table_destroy(esw->fdb_table.offloads.indir);
mutex_destroy(&esw->fdb_table.offloads.vports.lock);
}
/* Handle a received packet. Second half: Touches packet payload. */
void __efx_rx_packet(struct efx_channel *channel)
{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
struct efx_nic *efx = channel->efx;
struct efx_rx_buffer *rx_buf =
- efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+ efx_rx_buffer(rx_queue, channel->rx_pkt_index);
u8 *eh = efx_rx_buf_va(rx_buf);
/* Read length from the prefix if necessary. This already
* excludes the length of the prefix itself.
*/
- if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN)
+ if (rx_buf->flags & EFX_RX_PKT_PREFIX_LEN) {
rx_buf->len = le16_to_cpup((__le16 *)
(eh + efx->rx_packet_len_offset));
+ /* A known issue may prevent this being filled in;
+ * if that happens, just drop the packet.
+ * Must do that in the driver since passing a zero-length
+ * packet up to the stack may cause a crash.
+ */
+ if (unlikely(!rx_buf->len)) {
+ efx_free_rx_buffers(rx_queue, rx_buf,
+ channel->rx_pkt_n_frags);
+ channel->n_rx_frm_trunc++;
+ goto out;
+ }
+ }
/* If we're in loopback test, then pass the packet directly to the
* loopback layer, and free the rx_buf here
*/
if (unlikely(efx->loopback_selftest)) {
- struct efx_rx_queue *rx_queue;
-
efx_loopback_rx_packet(efx, eh, rx_buf->len);
- rx_queue = efx_channel_get_rx_queue(channel);
efx_free_rx_buffers(rx_queue, rx_buf,
channel->rx_pkt_n_frags);
goto out;
return ERR_PTR(phy_mode);
plat->phy_interface = phy_mode;
- plat->mac_interface = stmmac_of_get_mac_mode(np);
- if (plat->mac_interface < 0)
- plat->mac_interface = plat->phy_interface;
+ rc = stmmac_of_get_mac_mode(np);
+ plat->mac_interface = rc < 0 ? plat->phy_interface : rc;
/* Some wrapper drivers still rely on phy_node. Let's save it while
* they are not converted to phylink. */
struct crypto_aead *tfm;
int ret;
- /* Pick a sync gcm(aes) cipher to ensure order is preserved. */
- tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+ tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
if (IS_ERR(tfm))
return tfm;
/* Transmit waveform amplitude can be improved (1000BASE-T, 100BASE-TX, 10BASE-Te) */
{0x1c, 0x04, 0x00d0},
- /* Energy Efficient Ethernet (EEE) feature select must be manually disabled */
- {0x07, 0x3c, 0x0000},
-
/* Register settings are required to meet data sheet supply current specifications */
{0x1c, 0x13, 0x6eff},
{0x1c, 0x14, 0xe6ff},
return err;
}
+ /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes
+ * in this switch shall be regarded as broken.
+ */
+ if (phydev->dev_flags & MICREL_NO_EEE)
+ phydev->eee_broken_modes = -1;
+
err = genphy_restart_aneg(phydev);
if (err)
return err;
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct veth_rq *rq = NULL;
+ int ret = NETDEV_TX_OK;
struct net_device *rcv;
int length = skb->len;
bool use_napi = false;
} else {
drop:
atomic64_inc(&priv->dropped);
+ ret = NET_XMIT_DROP;
}
rcu_read_unlock();
- return NETDEV_TX_OK;
+ return ret;
}
static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
#ifdef CONFIG_ACPI
static const struct acpi_device_id acpi_id[] = {
{ "NXP1001" },
+ { "NXP1002" },
{ "NXP7471" },
{ }
};
/* Enable all counters */
armv8pmu_pmcr_write(armv8pmu_pmcr_read() | ARMV8_PMU_PMCR_E);
+
+ kvm_vcpu_pmu_resync_el0();
}
static void armv8pmu_stop(struct arm_pmu *cpu_pmu)
#include "../cxl/pmu.h"
#define CXL_PMU_CAP_REG 0x0
-#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(4, 0)
+#define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(5, 0)
#define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8)
#define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20)
#define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32)
#include <linux/acpi.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/pwm.h>
-#include <linux/radix-tree.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/err.h>
}
struct pwm_device *
-of_pwm_xlate_with_flags(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_xlate_with_flags(struct pwm_chip *chip, const struct of_phandle_args *args)
{
struct pwm_device *pwm;
- if (pc->of_pwm_n_cells < 2)
+ if (chip->of_pwm_n_cells < 2)
return ERR_PTR(-EINVAL);
/* flags in the third cell are optional */
if (args->args_count < 2)
return ERR_PTR(-EINVAL);
- if (args->args[0] >= pc->npwm)
+ if (args->args[0] >= chip->npwm)
return ERR_PTR(-EINVAL);
- pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+ pwm = pwm_request_from_chip(chip, args->args[0], NULL);
if (IS_ERR(pwm))
return pwm;
pwm->args.period = args->args[1];
pwm->args.polarity = PWM_POLARITY_NORMAL;
- if (pc->of_pwm_n_cells >= 3) {
+ if (chip->of_pwm_n_cells >= 3) {
if (args->args_count > 2 && args->args[2] & PWM_POLARITY_INVERTED)
pwm->args.polarity = PWM_POLARITY_INVERSED;
}
EXPORT_SYMBOL_GPL(of_pwm_xlate_with_flags);
struct pwm_device *
-of_pwm_single_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+of_pwm_single_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
{
struct pwm_device *pwm;
- if (pc->of_pwm_n_cells < 1)
+ if (chip->of_pwm_n_cells < 1)
return ERR_PTR(-EINVAL);
/* validate that one cell is specified, optionally with flags */
if (args->args_count != 1 && args->args_count != 2)
return ERR_PTR(-EINVAL);
- pwm = pwm_request_from_chip(pc, 0, NULL);
+ pwm = pwm_request_from_chip(chip, 0, NULL);
if (IS_ERR(pwm))
return pwm;
* pwmchip_remove() - remove a PWM chip
* @chip: the PWM chip to remove
*
- * Removes a PWM chip. This function may return busy if the PWM chip provides
- * a PWM device that is still requested.
- *
- * Returns: 0 on success or a negative error code on failure.
+ * Removes a PWM chip.
*/
void pwmchip_remove(struct pwm_chip *chip)
{
pwmchip_sysfs_unexport(chip);
+ if (IS_ENABLED(CONFIG_OF))
+ of_pwmchip_remove(chip);
+
mutex_lock(&pwm_lock);
list_del_init(&chip->list);
- if (IS_ENABLED(CONFIG_OF))
- of_pwmchip_remove(chip);
-
free_pwms(chip);
mutex_unlock(&pwm_lock);
struct pwm_device *pwm = NULL;
struct of_phandle_args args;
struct device_link *dl;
- struct pwm_chip *pc;
+ struct pwm_chip *chip;
int index = 0;
int err;
return ERR_PTR(err);
}
- pc = fwnode_to_pwmchip(of_fwnode_handle(args.np));
- if (IS_ERR(pc)) {
- if (PTR_ERR(pc) != -EPROBE_DEFER)
+ chip = fwnode_to_pwmchip(of_fwnode_handle(args.np));
+ if (IS_ERR(chip)) {
+ if (PTR_ERR(chip) != -EPROBE_DEFER)
pr_err("%s(): PWM chip not found\n", __func__);
- pwm = ERR_CAST(pc);
+ pwm = ERR_CAST(chip);
goto put;
}
- pwm = pc->of_xlate(pc, &args);
+ pwm = chip->of_xlate(chip, &args);
if (IS_ERR(pwm))
goto put;
* - When APPLE_PWM_CTRL is set to 0, the output is constant low
*/
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/delay.h>
#include <linux/mfd/atmel-hlcdc.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/regmap.h>
return container_of(chip, struct atmel_hlcdc_pwm, chip);
}
-static int atmel_hlcdc_pwm_apply(struct pwm_chip *c, struct pwm_device *pwm,
+static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
const struct pwm_state *state)
{
- struct atmel_hlcdc_pwm *chip = to_atmel_hlcdc_pwm(c);
- struct atmel_hlcdc *hlcdc = chip->hlcdc;
+ struct atmel_hlcdc_pwm *atmel = to_atmel_hlcdc_pwm(chip);
+ struct atmel_hlcdc *hlcdc = atmel->hlcdc;
unsigned int status;
int ret;
u32 pwmcfg;
int pres;
- if (!chip->errata || !chip->errata->slow_clk_erratum) {
+ if (!atmel->errata || !atmel->errata->slow_clk_erratum) {
clk_freq = clk_get_rate(new_clk);
if (!clk_freq)
return -EINVAL;
}
/* Errata: cannot use slow clk on some IP revisions */
- if ((chip->errata && chip->errata->slow_clk_erratum) ||
+ if ((atmel->errata && atmel->errata->slow_clk_erratum) ||
clk_period_ns > state->period) {
new_clk = hlcdc->sys_clk;
clk_freq = clk_get_rate(new_clk);
for (pres = 0; pres <= ATMEL_HLCDC_PWMPS_MAX; pres++) {
/* Errata: cannot divide by 1 on some IP revisions */
- if (!pres && chip->errata &&
- chip->errata->div1_clk_erratum)
+ if (!pres && atmel->errata &&
+ atmel->errata->div1_clk_erratum)
continue;
if ((clk_period_ns << pres) >= state->period)
pwmcfg = ATMEL_HLCDC_PWMPS(pres);
- if (new_clk != chip->cur_clk) {
+ if (new_clk != atmel->cur_clk) {
u32 gencfg = 0;
int ret;
if (ret)
return ret;
- clk_disable_unprepare(chip->cur_clk);
- chip->cur_clk = new_clk;
+ clk_disable_unprepare(atmel->cur_clk);
+ atmel->cur_clk = new_clk;
if (new_clk == hlcdc->sys_clk)
gencfg = ATMEL_HLCDC_CLKPWMSEL;
if (ret)
return ret;
- clk_disable_unprepare(chip->cur_clk);
- chip->cur_clk = NULL;
+ clk_disable_unprepare(atmel->cur_clk);
+ atmel->cur_clk = NULL;
}
return 0;
#ifdef CONFIG_PM_SLEEP
static int atmel_hlcdc_pwm_suspend(struct device *dev)
{
- struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+ struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
/* Keep the periph clock enabled if the PWM is still running. */
- if (pwm_is_enabled(&chip->chip.pwms[0]))
- clk_disable_unprepare(chip->hlcdc->periph_clk);
+ if (pwm_is_enabled(&atmel->chip.pwms[0]))
+ clk_disable_unprepare(atmel->hlcdc->periph_clk);
return 0;
}
static int atmel_hlcdc_pwm_resume(struct device *dev)
{
- struct atmel_hlcdc_pwm *chip = dev_get_drvdata(dev);
+ struct atmel_hlcdc_pwm *atmel = dev_get_drvdata(dev);
struct pwm_state state;
int ret;
- pwm_get_state(&chip->chip.pwms[0], &state);
+ pwm_get_state(&atmel->chip.pwms[0], &state);
/* Re-enable the periph clock it was stopped during suspend. */
if (!state.enabled) {
- ret = clk_prepare_enable(chip->hlcdc->periph_clk);
+ ret = clk_prepare_enable(atmel->hlcdc->periph_clk);
if (ret)
return ret;
}
- return atmel_hlcdc_pwm_apply(&chip->chip, &chip->chip.pwms[0], &state);
+ return atmel_hlcdc_pwm_apply(&atmel->chip, &atmel->chip.pwms[0],
+ &state);
}
#endif
{
const struct of_device_id *match;
struct device *dev = &pdev->dev;
- struct atmel_hlcdc_pwm *chip;
+ struct atmel_hlcdc_pwm *atmel;
struct atmel_hlcdc *hlcdc;
int ret;
hlcdc = dev_get_drvdata(dev->parent);
- chip = devm_kzalloc(dev, sizeof(*chip), GFP_KERNEL);
- if (!chip)
+ atmel = devm_kzalloc(dev, sizeof(*atmel), GFP_KERNEL);
+ if (!atmel)
return -ENOMEM;
ret = clk_prepare_enable(hlcdc->periph_clk);
match = of_match_node(atmel_hlcdc_dt_ids, dev->parent->of_node);
if (match)
- chip->errata = match->data;
+ atmel->errata = match->data;
- chip->hlcdc = hlcdc;
- chip->chip.ops = &atmel_hlcdc_pwm_ops;
- chip->chip.dev = dev;
- chip->chip.npwm = 1;
+ atmel->hlcdc = hlcdc;
+ atmel->chip.ops = &atmel_hlcdc_pwm_ops;
+ atmel->chip.dev = dev;
+ atmel->chip.npwm = 1;
- ret = pwmchip_add(&chip->chip);
+ ret = pwmchip_add(&atmel->chip);
if (ret) {
clk_disable_unprepare(hlcdc->periph_clk);
return ret;
}
- platform_set_drvdata(pdev, chip);
+ platform_set_drvdata(pdev, atmel);
return 0;
}
static void atmel_hlcdc_pwm_remove(struct platform_device *pdev)
{
- struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
+ struct atmel_hlcdc_pwm *atmel = platform_get_drvdata(pdev);
- pwmchip_remove(&chip->chip);
+ pwmchip_remove(&atmel->chip);
- clk_disable_unprepare(chip->hlcdc->periph_clk);
+ clk_disable_unprepare(atmel->hlcdc->periph_clk);
}
static const struct of_device_id atmel_hlcdc_pwm_dt_ids[] = {
#include <linux/mfd/syscon.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
-#include <linux/of_device.h>
-#include <linux/of_irq.h>
+#include <linux/of.h>
#include <linux/regmap.h>
#include <linux/slab.h>
#include <soc/at91/atmel_tcb.h>
ATMEL_TC_BEEVT | ATMEL_TC_BSWTRG)
struct atmel_tcb_pwm_device {
- enum pwm_polarity polarity; /* PWM polarity */
unsigned div; /* PWM clock divider */
unsigned duty; /* PWM duty expressed in clk cycles */
unsigned period; /* PWM period expressed in clk cycles */
struct clk *clk;
struct clk *gclk;
struct clk *slow_clk;
- struct atmel_tcb_pwm_device *pwms[NPWM];
+ struct atmel_tcb_pwm_device pwms[NPWM];
struct atmel_tcb_channel bkup;
};
return container_of(chip, struct atmel_tcb_pwm_chip, chip);
}
-static int atmel_tcb_pwm_set_polarity(struct pwm_chip *chip,
- struct pwm_device *pwm,
- enum pwm_polarity polarity)
-{
- struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
-
- tcbpwm->polarity = polarity;
-
- return 0;
-}
-
static int atmel_tcb_pwm_request(struct pwm_chip *chip,
struct pwm_device *pwm)
{
struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm;
+ struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
unsigned cmr;
int ret;
- tcbpwm = devm_kzalloc(chip->dev, sizeof(*tcbpwm), GFP_KERNEL);
- if (!tcbpwm)
- return -ENOMEM;
-
ret = clk_prepare_enable(tcbpwmc->clk);
- if (ret) {
- devm_kfree(chip->dev, tcbpwm);
+ if (ret)
return ret;
- }
- tcbpwm->polarity = PWM_POLARITY_NORMAL;
tcbpwm->duty = 0;
tcbpwm->period = 0;
tcbpwm->div = 0;
regmap_write(tcbpwmc->regmap, ATMEL_TC_REG(tcbpwmc->channel, CMR), cmr);
spin_unlock(&tcbpwmc->lock);
- tcbpwmc->pwms[pwm->hwpwm] = tcbpwm;
-
return 0;
}
static void atmel_tcb_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
clk_disable_unprepare(tcbpwmc->clk);
- tcbpwmc->pwms[pwm->hwpwm] = NULL;
- devm_kfree(chip->dev, tcbpwm);
}
-static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void atmel_tcb_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm,
+ enum pwm_polarity polarity)
{
struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+ struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
unsigned cmr;
- enum pwm_polarity polarity = tcbpwm->polarity;
/*
* If duty is 0 the timer will be stopped and we have to
spin_unlock(&tcbpwmc->lock);
}
-static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static int atmel_tcb_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm,
+ enum pwm_polarity polarity)
{
struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+ struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
u32 cmr;
- enum pwm_polarity polarity = tcbpwm->polarity;
/*
* If duty is 0 the timer will be stopped and we have to
int duty_ns, int period_ns)
{
struct atmel_tcb_pwm_chip *tcbpwmc = to_tcb_chip(chip);
- struct atmel_tcb_pwm_device *tcbpwm = tcbpwmc->pwms[pwm->hwpwm];
+ struct atmel_tcb_pwm_device *tcbpwm = &tcbpwmc->pwms[pwm->hwpwm];
struct atmel_tcb_pwm_device *atcbpwm = NULL;
int i = 0;
int slowclk = 0;
period = div_u64(period_ns, min);
if (pwm->hwpwm == 0)
- atcbpwm = tcbpwmc->pwms[1];
+ atcbpwm = &tcbpwmc->pwms[1];
else
- atcbpwm = tcbpwmc->pwms[0];
+ atcbpwm = &tcbpwmc->pwms[0];
/*
* PWM devices provided by the TCB driver are grouped by 2.
int duty_cycle, period;
int ret;
- /* This function only sets a flag in driver data */
- atmel_tcb_pwm_set_polarity(chip, pwm, state->polarity);
-
if (!state->enabled) {
- atmel_tcb_pwm_disable(chip, pwm);
+ atmel_tcb_pwm_disable(chip, pwm, state->polarity);
return 0;
}
if (ret)
return ret;
- return atmel_tcb_pwm_enable(chip, pwm);
+ return atmel_tcb_pwm_enable(chip, pwm, state->polarity);
}
static const struct pwm_ops atmel_tcb_pwm_ops = {
struct atmel_tcb_pwm_chip *tcbpwm;
const struct atmel_tcb_config *config;
struct device_node *np = pdev->dev.of_node;
- struct regmap *regmap;
- struct clk *clk, *gclk = NULL;
- struct clk *slow_clk;
char clk_name[] = "t0_clk";
int err;
int channel;
+ tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
+ if (tcbpwm == NULL)
+ return -ENOMEM;
+
err = of_property_read_u32(np, "reg", &channel);
if (err < 0) {
dev_err(&pdev->dev,
return err;
}
- regmap = syscon_node_to_regmap(np->parent);
- if (IS_ERR(regmap))
- return PTR_ERR(regmap);
+ tcbpwm->regmap = syscon_node_to_regmap(np->parent);
+ if (IS_ERR(tcbpwm->regmap))
+ return PTR_ERR(tcbpwm->regmap);
- slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
- if (IS_ERR(slow_clk))
- return PTR_ERR(slow_clk);
+ tcbpwm->slow_clk = of_clk_get_by_name(np->parent, "slow_clk");
+ if (IS_ERR(tcbpwm->slow_clk))
+ return PTR_ERR(tcbpwm->slow_clk);
clk_name[1] += channel;
- clk = of_clk_get_by_name(np->parent, clk_name);
- if (IS_ERR(clk))
- clk = of_clk_get_by_name(np->parent, "t0_clk");
- if (IS_ERR(clk))
- return PTR_ERR(clk);
+ tcbpwm->clk = of_clk_get_by_name(np->parent, clk_name);
+ if (IS_ERR(tcbpwm->clk))
+ tcbpwm->clk = of_clk_get_by_name(np->parent, "t0_clk");
+ if (IS_ERR(tcbpwm->clk)) {
+ err = PTR_ERR(tcbpwm->clk);
+ goto err_slow_clk;
+ }
match = of_match_node(atmel_tcb_of_match, np->parent);
config = match->data;
if (config->has_gclk) {
- gclk = of_clk_get_by_name(np->parent, "gclk");
- if (IS_ERR(gclk))
- return PTR_ERR(gclk);
- }
-
- tcbpwm = devm_kzalloc(&pdev->dev, sizeof(*tcbpwm), GFP_KERNEL);
- if (tcbpwm == NULL) {
- err = -ENOMEM;
- goto err_slow_clk;
+ tcbpwm->gclk = of_clk_get_by_name(np->parent, "gclk");
+ if (IS_ERR(tcbpwm->gclk)) {
+ err = PTR_ERR(tcbpwm->gclk);
+ goto err_clk;
+ }
}
tcbpwm->chip.dev = &pdev->dev;
tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
tcbpwm->chip.npwm = NPWM;
tcbpwm->channel = channel;
- tcbpwm->regmap = regmap;
- tcbpwm->clk = clk;
- tcbpwm->gclk = gclk;
- tcbpwm->slow_clk = slow_clk;
tcbpwm->width = config->counter_width;
- err = clk_prepare_enable(slow_clk);
+ err = clk_prepare_enable(tcbpwm->slow_clk);
if (err)
- goto err_slow_clk;
+ goto err_gclk;
spin_lock_init(&tcbpwm->lock);
err_disable_clk:
clk_disable_unprepare(tcbpwm->slow_clk);
+err_gclk:
+ clk_put(tcbpwm->gclk);
+
+err_clk:
+ clk_put(tcbpwm->clk);
+
err_slow_clk:
- clk_put(slow_clk);
+ clk_put(tcbpwm->slow_clk);
return err;
}
pwmchip_remove(&tcbpwm->chip);
clk_disable_unprepare(tcbpwm->slow_clk);
- clk_put(tcbpwm->slow_clk);
+ clk_put(tcbpwm->gclk);
clk_put(tcbpwm->clk);
+ clk_put(tcbpwm->slow_clk);
}
static const struct of_device_id atmel_tcb_pwm_dt_ids[] = {
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
#define PWM_SR 0x0C
#define PWM_ISR 0x1C
/* Bit field in SR */
-#define PWM_SR_ALL_CH_ON 0x0F
+#define PWM_SR_ALL_CH_MASK 0x0F
/* The following register is PWM channel related registers */
#define PWM_CH_REG_OFFSET 0x200
};
MODULE_DEVICE_TABLE(of, atmel_pwm_dt_ids);
+static int atmel_pwm_enable_clk_if_on(struct atmel_pwm_chip *atmel_pwm, bool on)
+{
+ unsigned int i, cnt = 0;
+ unsigned long sr;
+ int ret = 0;
+
+ sr = atmel_pwm_readl(atmel_pwm, PWM_SR) & PWM_SR_ALL_CH_MASK;
+ if (!sr)
+ return 0;
+
+ cnt = bitmap_weight(&sr, atmel_pwm->chip.npwm);
+
+ if (!on)
+ goto disable_clk;
+
+ for (i = 0; i < cnt; i++) {
+ ret = clk_enable(atmel_pwm->clk);
+ if (ret) {
+ dev_err(atmel_pwm->chip.dev,
+ "failed to enable clock for pwm %pe\n",
+ ERR_PTR(ret));
+
+ cnt = i;
+ goto disable_clk;
+ }
+ }
+
+ return 0;
+
+disable_clk:
+ while (cnt--)
+ clk_disable(atmel_pwm->clk);
+
+ return ret;
+}
+
static int atmel_pwm_probe(struct platform_device *pdev)
{
struct atmel_pwm_chip *atmel_pwm;
if (IS_ERR(atmel_pwm->base))
return PTR_ERR(atmel_pwm->base);
- atmel_pwm->clk = devm_clk_get(&pdev->dev, NULL);
+ atmel_pwm->clk = devm_clk_get_prepared(&pdev->dev, NULL);
if (IS_ERR(atmel_pwm->clk))
- return PTR_ERR(atmel_pwm->clk);
-
- ret = clk_prepare(atmel_pwm->clk);
- if (ret) {
- dev_err(&pdev->dev, "failed to prepare PWM clock\n");
- return ret;
- }
+ return dev_err_probe(&pdev->dev, PTR_ERR(atmel_pwm->clk),
+ "failed to get prepared PWM clock\n");
atmel_pwm->chip.dev = &pdev->dev;
atmel_pwm->chip.ops = &atmel_pwm_ops;
atmel_pwm->chip.npwm = 4;
- ret = pwmchip_add(&atmel_pwm->chip);
+ ret = atmel_pwm_enable_clk_if_on(atmel_pwm, true);
+ if (ret < 0)
+ return ret;
+
+ ret = devm_pwmchip_add(&pdev->dev, &atmel_pwm->chip);
if (ret < 0) {
- dev_err(&pdev->dev, "failed to add PWM chip %d\n", ret);
- goto unprepare_clk;
+ dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
+ goto disable_clk;
}
- platform_set_drvdata(pdev, atmel_pwm);
+ return 0;
- return ret;
+disable_clk:
+ atmel_pwm_enable_clk_if_on(atmel_pwm, false);
-unprepare_clk:
- clk_unprepare(atmel_pwm->clk);
return ret;
}
-static void atmel_pwm_remove(struct platform_device *pdev)
-{
- struct atmel_pwm_chip *atmel_pwm = platform_get_drvdata(pdev);
-
- pwmchip_remove(&atmel_pwm->chip);
-
- clk_unprepare(atmel_pwm->clk);
-}
-
static struct platform_driver atmel_pwm_driver = {
.driver = {
.name = "atmel-pwm",
.of_match_table = of_match_ptr(atmel_pwm_dt_ids),
},
.probe = atmel_pwm_probe,
- .remove_new = atmel_pwm_remove,
};
module_platform_driver(atmel_pwm_driver);
struct clk *clk;
};
-static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *_chip)
+static inline struct kona_pwmc *to_kona_pwmc(struct pwm_chip *chip)
{
- return container_of(_chip, struct kona_pwmc, chip);
+ return container_of(chip, struct kona_pwmc, chip);
}
/*
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
struct regmap *regmap;
};
-static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *pc)
+static inline struct crystalcove_pwm *to_crc_pwm(struct pwm_chip *chip)
{
- return container_of(pc, struct crystalcove_pwm, chip);
+ return container_of(chip, struct crystalcove_pwm, chip);
}
static int crc_pwm_calc_clk_div(int period_ns)
*/
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_data/cros_ec_commands.h>
#include <linux/platform_data/cros_ec_proto.h>
#include <linux/platform_device.h>
u16 duty_cycle;
};
-static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *c)
+static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chip)
{
- return container_of(c, struct cros_ec_pwm_device, chip);
+ return container_of(chip, struct cros_ec_pwm_device, chip);
}
static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
}
static struct pwm_device *
-cros_ec_pwm_xlate(struct pwm_chip *pc, const struct of_phandle_args *args)
+cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
{
struct pwm_device *pwm;
- if (args->args[0] >= pc->npwm)
+ if (args->args[0] >= chip->npwm)
return ERR_PTR(-EINVAL);
- pwm = pwm_request_from_chip(pc, args->args[0], NULL);
+ pwm = pwm_request_from_chip(chip, args->args[0], NULL);
if (IS_ERR(pwm))
return pwm;
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/pwm.h>
#include <linux/delay.h>
#include <linux/io.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/reset.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
#include <linux/mfd/ingenic-tcu.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/regmap.h>
#include <linux/err.h>
#include <linux/mfd/lp3943.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
struct lp3943_platform_data *pdata;
};
-static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *_chip)
+static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip)
{
- return container_of(_chip, struct lp3943_pwm, chip);
+ return container_of(chip, struct lp3943_pwm, chip);
}
static struct lp3943_pwm_map *
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
if (IS_ERR(lpc18xx_pwm->base))
return PTR_ERR(lpc18xx_pwm->base);
- lpc18xx_pwm->pwm_clk = devm_clk_get(&pdev->dev, "pwm");
+ lpc18xx_pwm->pwm_clk = devm_clk_get_enabled(&pdev->dev, "pwm");
if (IS_ERR(lpc18xx_pwm->pwm_clk))
return dev_err_probe(&pdev->dev, PTR_ERR(lpc18xx_pwm->pwm_clk),
"failed to get pwm clock\n");
- ret = clk_prepare_enable(lpc18xx_pwm->pwm_clk);
- if (ret < 0)
- return dev_err_probe(&pdev->dev, ret,
- "could not prepare or enable pwm clock\n");
-
lpc18xx_pwm->clk_rate = clk_get_rate(lpc18xx_pwm->pwm_clk);
- if (!lpc18xx_pwm->clk_rate) {
- ret = dev_err_probe(&pdev->dev,
- -EINVAL, "pwm clock has no frequency\n");
- goto disable_pwmclk;
- }
+ if (!lpc18xx_pwm->clk_rate)
+ return dev_err_probe(&pdev->dev,
+ -EINVAL, "pwm clock has no frequency\n");
/*
* If clkrate is too fast, the calculations in .apply() might overflow.
*/
- if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) {
- ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
- goto disable_pwmclk;
- }
+ if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC)
+ return dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n");
mutex_init(&lpc18xx_pwm->res_lock);
mutex_init(&lpc18xx_pwm->period_lock);
lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL, val);
ret = pwmchip_add(&lpc18xx_pwm->chip);
- if (ret < 0) {
- dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
- goto disable_pwmclk;
- }
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "pwmchip_add failed\n");
platform_set_drvdata(pdev, lpc18xx_pwm);
return 0;
-
-disable_pwmclk:
- clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
- return ret;
}
static void lpc18xx_pwm_remove(struct platform_device *pdev)
val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL,
val | LPC18XX_PWM_CTRL_HALT);
-
- clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
}
static struct platform_driver lpc18xx_pwm_driver = {
if (duty_cycles > 255)
duty_cycles = 255;
- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+ val = readl(lpc32xx->base);
val &= ~0xFFFF;
val |= (period_cycles << 8) | duty_cycles;
- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+ writel(val, lpc32xx->base);
return 0;
}
if (ret)
return ret;
- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+ val = readl(lpc32xx->base);
val |= PWM_ENABLE;
- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+ writel(val, lpc32xx->base);
return 0;
}
struct lpc32xx_pwm_chip *lpc32xx = to_lpc32xx_pwm_chip(chip);
u32 val;
- val = readl(lpc32xx->base + (pwm->hwpwm << 2));
+ val = readl(lpc32xx->base);
val &= ~PWM_ENABLE;
- writel(val, lpc32xx->base + (pwm->hwpwm << 2));
+ writel(val, lpc32xx->base);
clk_disable_unprepare(lpc32xx->clk);
}
lpc32xx->chip.npwm = 1;
/* If PWM is disabled, configure the output to the default value */
- val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+ val = readl(lpc32xx->base);
val &= ~PWM_PIN_LEVEL;
- writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
+ writel(val, lpc32xx->base);
ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
if (ret < 0) {
#include <linux/module.h>
#include <linux/clk.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
#include <linux/math64.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
#include <linux/math.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/slab.h>
#include <linux/types.h>
struct ntxec_pwm {
- struct device *dev;
struct ntxec *ec;
struct pwm_chip chip;
};
struct ntxec_pwm *priv;
struct pwm_chip *chip;
- pdev->dev.of_node = pdev->dev.parent->of_node;
+ device_set_of_node_from_dev(&pdev->dev, pdev->dev.parent);
priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
priv->ec = ec;
- priv->dev = &pdev->dev;
chip = &priv->chip;
chip->dev = &pdev->dev;
* input clock (PWMCR_SD is set) and the output is driven to inactive.
*/
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#define pwm_of_match NULL
#endif
-static const struct platform_device_id *pxa_pwm_get_id_dt(struct device *dev)
-{
- const struct of_device_id *id = of_match_device(pwm_of_match, dev);
-
- return id ? id->data : NULL;
-}
-
static int pwm_probe(struct platform_device *pdev)
{
const struct platform_device_id *id = platform_get_device_id(pdev);
int ret = 0;
if (IS_ENABLED(CONFIG_OF) && id == NULL)
- id = pxa_pwm_get_id_dt(&pdev->dev);
+ id = of_device_get_match_data(&pdev->dev);
if (id == NULL)
return -EINVAL;
u32 enable_conf;
};
-static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *c)
+static inline struct rockchip_pwm_chip *to_rockchip_pwm_chip(struct pwm_chip *chip)
{
- return container_of(c, struct rockchip_pwm_chip, chip);
+ return container_of(chip, struct rockchip_pwm_chip, chip);
}
static int rockchip_pwm_get_state(struct pwm_chip *chip,
* struct rz_mtu3_channel_io_map - MTU3 pwm channel map
*
* @base_pwm_number: First PWM of a channel
- * @num: number of IOs on the HW channel.
+ * @num_channel_ios: number of IOs on the HW channel.
*/
struct rz_mtu3_channel_io_map {
u8 base_pwm_number;
*/
#include <linux/clk.h>
#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
};
static inline
-struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *c)
+struct pwm_sifive_ddata *pwm_sifive_chip_to_ddata(struct pwm_chip *chip)
{
- return container_of(c, struct pwm_sifive_ddata, chip);
+ return container_of(chip, struct pwm_sifive_ddata, chip);
}
static int pwm_sifive_request(struct pwm_chip *chip, struct pwm_device *pwm)
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/pwm.h>
#include <linux/regmap.h>
regmap_write((priv)->regmap, (priv)->offset + (reg), (val))
struct sl28cpld_pwm {
- struct pwm_chip pwm_chip;
+ struct pwm_chip chip;
struct regmap *regmap;
u32 offset;
};
-#define sl28cpld_pwm_from_chip(_chip) \
- container_of(_chip, struct sl28cpld_pwm, pwm_chip)
+
+static inline struct sl28cpld_pwm *sl28cpld_pwm_from_chip(struct pwm_chip *chip)
+{
+ return container_of(chip, struct sl28cpld_pwm, chip);
+}
static int sl28cpld_pwm_get_state(struct pwm_chip *chip,
struct pwm_device *pwm,
}
/* Initialize the pwm_chip structure */
- chip = &priv->pwm_chip;
+ chip = &priv->chip;
chip->dev = &pdev->dev;
chip->ops = &sl28cpld_pwm_ops;
chip->npwm = 1;
- ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip);
+ ret = devm_pwmchip_add(&pdev->dev, chip);
if (ret) {
dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
ERR_PTR(ret));
#include <linux/err.h>
#include <linux/io.h>
#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
priv->chip.ops = &stm32pwm_ops;
priv->chip.npwm = stm32_pwm_detect_channels(priv);
- ret = pwmchip_add(&priv->chip);
+ ret = devm_pwmchip_add(dev, &priv->chip);
if (ret < 0)
return ret;
return 0;
}
-static void stm32_pwm_remove(struct platform_device *pdev)
-{
- struct stm32_pwm *priv = platform_get_drvdata(pdev);
- unsigned int i;
-
- for (i = 0; i < priv->chip.npwm; i++)
- pwm_disable(&priv->chip.pwms[i]);
-
- pwmchip_remove(&priv->chip);
-}
-
static int __maybe_unused stm32_pwm_suspend(struct device *dev)
{
struct stm32_pwm *priv = dev_get_drvdata(dev);
static struct platform_driver stm32_pwm_driver = {
.probe = stm32_pwm_probe,
- .remove_new = stm32_pwm_remove,
.driver = {
.name = "stm32-pwm",
.of_match_table = stm32_pwm_of_match,
return 0;
}
-static void stmpe_24xx_pwm_disable(struct pwm_chip *chip,
- struct pwm_device *pwm)
+static int stmpe_24xx_pwm_disable(struct pwm_chip *chip,
+ struct pwm_device *pwm)
{
struct stmpe_pwm *stmpe_pwm = to_stmpe_pwm(chip);
u8 value;
if (ret < 0) {
dev_err(chip->dev, "error reading PWM#%u control\n",
pwm->hwpwm);
- return;
+ return ret;
}
value = ret & ~BIT(pwm->hwpwm);
ret = stmpe_reg_write(stmpe_pwm->stmpe, STMPE24XX_PWMCS, value);
- if (ret) {
+ if (ret)
dev_err(chip->dev, "error writing PWM#%u control\n",
pwm->hwpwm);
- return;
- }
+ return ret;
}
/* STMPE 24xx PWM instructions */
/* Make sure we are disabled */
if (pwm_is_enabled(pwm)) {
- stmpe_24xx_pwm_disable(chip, pwm);
+ ret = stmpe_24xx_pwm_disable(chip, pwm);
+ if (ret)
+ return ret;
} else {
/* Connect the PWM to the pin */
pin = pwm->hwpwm;
if (!state->enabled) {
if (pwm->state.enabled)
- stmpe_24xx_pwm_disable(chip, pwm);
+ return stmpe_24xx_pwm_disable(chip, pwm);
return 0;
}
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/reset.h>
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/pm_opp.h>
#include <linux/pwm.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/pm_runtime.h>
#include <linux/pwm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
/* ECAP registers and bits definitions */
#define CAP1 0x08
#include <linux/err.h>
#include <linux/clk.h>
#include <linux/pm_runtime.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
/* EHRPWM registers and bits definitions */
#include <linux/err.h>
#include <linux/io.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pwm.h>
* Copyright (C) 2010 Alexey Charkov <alchark@gmail.com>
*/
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <asm/div64.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
-
/*
* SoC architecture allocates register space for 4 PWMs but only
* 2 are currently implemented.
.ramp_mask = TPS6287X_CTRL1_VRAMP,
.ramp_delay_table = tps6287x_ramp_table,
.n_ramp_values = ARRAY_SIZE(tps6287x_ramp_table),
- .n_voltages = 256,
+ .n_voltages = 256 * ARRAY_SIZE(tps6287x_voltage_ranges),
.linear_ranges = tps6287x_voltage_ranges,
.n_linear_ranges = ARRAY_SIZE(tps6287x_voltage_ranges),
.linear_range_selectors_bitfield = tps6287x_voltage_range_sel,
if (irq < 0)
return -EINVAL;
- irq_data[*irq_idx + j].dev = tps->dev;
- irq_data[*irq_idx + j].type = irq_type;
- irq_data[*irq_idx + j].rdev = rdev;
+ irq_data[*irq_idx].dev = tps->dev;
+ irq_data[*irq_idx].type = irq_type;
+ irq_data[*irq_idx].rdev = rdev;
error = devm_request_threaded_irq(tps->dev, irq, NULL,
- tps6594_regulator_irq_handler,
- IRQF_ONESHOT,
- irq_type->irq_name,
- &irq_data[*irq_idx]);
- (*irq_idx)++;
+ tps6594_regulator_irq_handler, IRQF_ONESHOT,
+ irq_type->irq_name, &irq_data[*irq_idx]);
if (error) {
dev_err(tps->dev, "tps6594 failed to request %s IRQ %d: %d\n",
irq_type->irq_name, irq, error);
return error;
}
+ (*irq_idx)++;
}
return 0;
}
int error, i, irq, multi, delta;
int irq_idx = 0;
int buck_idx = 0;
- int ext_reg_irq_nb = 2;
-
+ size_t ext_reg_irq_nb = 2;
+ size_t reg_irq_nb;
enum {
MULTI_BUCK12,
MULTI_BUCK123,
}
}
- if (tps->chip_id == LP8764)
+ if (tps->chip_id == LP8764) {
/* There is only 4 buck on LP8764 */
buck_configured[4] = 1;
+ reg_irq_nb = size_mul(REGS_INT_NB, (BUCK_NB - 1));
+ } else {
+ reg_irq_nb = size_mul(REGS_INT_NB, (size_add(BUCK_NB, LDO_NB)));
+ }
- irq_data = devm_kmalloc_array(tps->dev,
- REGS_INT_NB * sizeof(struct tps6594_regulator_irq_data),
- ARRAY_SIZE(tps6594_bucks_irq_types) +
- ARRAY_SIZE(tps6594_ldos_irq_types),
- GFP_KERNEL);
+ irq_data = devm_kmalloc_array(tps->dev, reg_irq_nb,
+ sizeof(struct tps6594_regulator_irq_data), GFP_KERNEL);
if (!irq_data)
return -ENOMEM;
select REGMAP_SPI if SPI_MASTER
select WATCHDOG_CORE if WATCHDOG
help
- If you say yes here you get support for the NXP PCF2127/29 RTC
+ If you say yes here you get support for the NXP PCF2127/29/31 RTC
chips with integrated quartz crystal for industrial applications.
- Both chips also have watchdog timer and tamper switch detection
+ These chips also have watchdog timer and tamper switch detection
features.
PCF2127 has an additional feature of 512 bytes battery backed
config RTC_DRV_BQ4802
tristate "TI BQ4802"
depends on HAS_IOMEM && HAS_IOPORT
+ depends on SPARC || COMPILE_TEST
help
If you say Y here you will get support for the TI
BQ4802 RTC chip.
err = rtc_valid_tm(&alarm->time);
done:
- if (err)
+ if (err && alarm->enabled)
dev_warn(&rtc->dev, "invalid alarm value: %ptR\n",
&alarm->time);
#include <linux/i2c.h>
#include <linux/kstrtox.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/rtc.h>
#include <linux/watchdog.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
.alarm = ALARM2,
};
-#ifdef CONFIG_OF
static const struct of_device_id armada38x_rtc_of_match_table[] = {
{
.compatible = "marvell,armada-380-rtc",
{}
};
MODULE_DEVICE_TABLE(of, armada38x_rtc_of_match_table);
-#endif
static __init int armada38x_rtc_probe(struct platform_device *pdev)
{
.driver = {
.name = "armada38x-rtc",
.pm = &armada38x_rtc_pm_ops,
- .of_match_table = of_match_ptr(armada38x_rtc_of_match_table),
+ .of_match_table = armada38x_rtc_of_match_table,
},
};
static struct platform_driver aspeed_rtc_driver = {
.driver = {
.name = "aspeed-rtc",
- .of_match_table = of_match_ptr(aspeed_rtc_match),
+ .of_match_table = aspeed_rtc_match,
},
};
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
.driver = {
.name = "at91_rtc",
.pm = &at91_rtc_pm_ops,
- .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+ .of_match_table = at91_rtc_dt_ids,
},
};
.driver = {
.name = "rtc-at91sam9",
.pm = &at91_rtc_pm_ops,
- .of_match_table = of_match_ptr(at91_rtc_dt_ids),
+ .of_match_table = at91_rtc_dt_ids,
},
};
#define INITSECTION __init
#endif
+#define SECS_PER_DAY (24 * 60 * 60)
+#define SECS_PER_MONTH (28 * SECS_PER_DAY)
+#define SECS_PER_YEAR (365 * SECS_PER_DAY)
+
static int INITSECTION
cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
{
goto cleanup0;
}
+ if (cmos_rtc.mon_alrm)
+ cmos_rtc.rtc->alarm_offset_max = SECS_PER_YEAR - 1;
+ else if (cmos_rtc.day_alrm)
+ cmos_rtc.rtc->alarm_offset_max = SECS_PER_MONTH - 1;
+ else
+ cmos_rtc.rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
rename_region(ports, dev_name(&cmos_rtc.rtc->dev));
if (!mc146818_does_rtc_work()) {
ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
if (ret < 0) {
- if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) {
- /*
- * RTC chips on some older Chromebooks can only handle
- * alarms up to 24h in the future. Try to set an alarm
- * below that limit to avoid suspend failures.
- */
- ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
- SECS_PER_DAY - 1);
- }
-
- if (ret < 0) {
- dev_err(dev, "error setting alarm in %u seconds: %d\n",
- alarm_offset, ret);
- return ret;
- }
+ dev_err(dev, "error setting alarm in %u seconds: %d\n",
+ alarm_offset, ret);
+ /*
+ * The EC code returns -EINVAL if the alarm time is too
+ * far in the future. Convert it to the expected error code.
+ */
+ if (ret == -EINVAL)
+ ret = -ERANGE;
+ return ret;
}
return 0;
cros_ec_rtc->rtc->ops = &cros_ec_rtc_ops;
cros_ec_rtc->rtc->range_max = U32_MAX;
+ /*
+ * The RTC on some older Chromebooks can only handle alarms less than
+ * 24 hours in the future. The only way to find out is to try to set an
+ * alarm further in the future. If that fails, assume that the RTC
+ * connected to the EC can only handle less than 24 hours of alarm
+ * window.
+ */
+ ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, SECS_PER_DAY * 2);
+ if (ret == -EINVAL)
+ cros_ec_rtc->rtc->alarm_offset_max = SECS_PER_DAY - 1;
+
+ (void)cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
+ EC_RTC_ALARM_CLEAR);
+
ret = devm_rtc_register_device(cros_ec_rtc->rtc);
if (ret)
return ret;
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/slab.h>
dev_err(&pdev->dev, "Failed to request ALARM IRQ %d: %d\n",
irq_alarm, ret);
+ ret = dev_pm_set_wake_irq(&pdev->dev, irq_alarm);
+ if (ret)
+ dev_warn(&pdev->dev,
+ "Failed to set IRQ %d as a wake IRQ: %d\n",
+ irq_alarm, ret);
+
device_init_wakeup(&pdev->dev, true);
return devm_rtc_register_device(rtc->rtc_dev);
/* make sure alarm fires within the next 24 hours */
if (later <= now)
return -EINVAL;
- if ((later - now) > 24 * 60 * 60)
- return -EDOM;
+ if ((later - now) > ds1305->rtc->alarm_offset_max)
+ return -ERANGE;
/* disable alarm if needed */
if (ds1305->ctrl[0] & DS1305_AEI0) {
ds1305->rtc->ops = &ds1305_ops;
ds1305->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
ds1305->rtc->range_max = RTC_TIMESTAMP_END_2099;
+ ds1305->rtc->alarm_offset_max = 24 * 60 * 60;
ds1305_nvmem_cfg.priv = ds1305;
status = devm_rtc_register_device(ds1305->rtc);
match = device_get_match_data(&client->dev);
if (match) {
- ds1307->type = (enum ds_type)match;
+ ds1307->type = (uintptr_t)match;
chip = &chips[ds1307->type];
} else if (id) {
chip = &chips[id->driver_data];
#include <linux/jiffies.h>
#include <linux/rtc.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/types.h>
#include <linux/bcd.h>
-#include <linux/platform_data/rtc-ds2404.h>
#include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
#include <linux/slab.h>
#include <linux/io.h>
#define DS2404_CLK 1
#define DS2404_DQ 2
-struct ds2404_gpio {
- const char *name;
- unsigned int gpio;
-};
-
struct ds2404 {
- struct ds2404_gpio *gpio;
+ struct device *dev;
+ struct gpio_desc *rst_gpiod;
+ struct gpio_desc *clk_gpiod;
+ struct gpio_desc *dq_gpiod;
struct rtc_device *rtc;
};
-static struct ds2404_gpio ds2404_gpio[] = {
- { "RTC RST", 0 },
- { "RTC CLK", 0 },
- { "RTC DQ", 0 },
-};
-
-static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev,
- struct ds2404_platform_data *pdata)
+static int ds2404_gpio_map(struct ds2404 *chip, struct platform_device *pdev)
{
- int i, err;
-
- ds2404_gpio[DS2404_RST].gpio = pdata->gpio_rst;
- ds2404_gpio[DS2404_CLK].gpio = pdata->gpio_clk;
- ds2404_gpio[DS2404_DQ].gpio = pdata->gpio_dq;
-
- for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++) {
- err = gpio_request(ds2404_gpio[i].gpio, ds2404_gpio[i].name);
- if (err) {
- dev_err(&pdev->dev, "error mapping gpio %s: %d\n",
- ds2404_gpio[i].name, err);
- goto err_request;
- }
- if (i != DS2404_DQ)
- gpio_direction_output(ds2404_gpio[i].gpio, 1);
- }
+ struct device *dev = &pdev->dev;
- chip->gpio = ds2404_gpio;
- return 0;
+ /* This will de-assert RESET, declare this GPIO as GPIOD_ACTIVE_LOW */
+ chip->rst_gpiod = devm_gpiod_get(dev, "rst", GPIOD_OUT_LOW);
+ if (IS_ERR(chip->rst_gpiod))
+ return PTR_ERR(chip->rst_gpiod);
-err_request:
- while (--i >= 0)
- gpio_free(ds2404_gpio[i].gpio);
- return err;
-}
+ chip->clk_gpiod = devm_gpiod_get(dev, "clk", GPIOD_OUT_HIGH);
+ if (IS_ERR(chip->clk_gpiod))
+ return PTR_ERR(chip->clk_gpiod);
-static void ds2404_gpio_unmap(void *data)
-{
- int i;
+ chip->dq_gpiod = devm_gpiod_get(dev, "dq", GPIOD_ASIS);
+ if (IS_ERR(chip->dq_gpiod))
+ return PTR_ERR(chip->dq_gpiod);
- for (i = 0; i < ARRAY_SIZE(ds2404_gpio); i++)
- gpio_free(ds2404_gpio[i].gpio);
+ return 0;
}
-static void ds2404_reset(struct device *dev)
+static void ds2404_reset(struct ds2404 *chip)
{
- gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 0);
+ gpiod_set_value(chip->rst_gpiod, 1);
udelay(1000);
- gpio_set_value(ds2404_gpio[DS2404_RST].gpio, 1);
- gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
- gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 0);
+ gpiod_set_value(chip->rst_gpiod, 0);
+ gpiod_set_value(chip->clk_gpiod, 0);
+ gpiod_direction_output(chip->dq_gpiod, 0);
udelay(10);
}
-static void ds2404_write_byte(struct device *dev, u8 byte)
+static void ds2404_write_byte(struct ds2404 *chip, u8 byte)
{
int i;
- gpio_direction_output(ds2404_gpio[DS2404_DQ].gpio, 1);
+ gpiod_direction_output(chip->dq_gpiod, 1);
for (i = 0; i < 8; i++) {
- gpio_set_value(ds2404_gpio[DS2404_DQ].gpio, byte & (1 << i));
+ gpiod_set_value(chip->dq_gpiod, byte & (1 << i));
udelay(10);
- gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+ gpiod_set_value(chip->clk_gpiod, 1);
udelay(10);
- gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+ gpiod_set_value(chip->clk_gpiod, 0);
udelay(10);
}
}
-static u8 ds2404_read_byte(struct device *dev)
+static u8 ds2404_read_byte(struct ds2404 *chip)
{
int i;
u8 ret = 0;
- gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
+ gpiod_direction_input(chip->dq_gpiod);
for (i = 0; i < 8; i++) {
- gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 0);
+ gpiod_set_value(chip->clk_gpiod, 0);
udelay(10);
- if (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+ if (gpiod_get_value(chip->dq_gpiod))
ret |= 1 << i;
- gpio_set_value(ds2404_gpio[DS2404_CLK].gpio, 1);
+ gpiod_set_value(chip->clk_gpiod, 1);
udelay(10);
}
return ret;
}
-static void ds2404_read_memory(struct device *dev, u16 offset,
+static void ds2404_read_memory(struct ds2404 *chip, u16 offset,
int length, u8 *out)
{
- ds2404_reset(dev);
- ds2404_write_byte(dev, DS2404_READ_MEMORY_CMD);
- ds2404_write_byte(dev, offset & 0xff);
- ds2404_write_byte(dev, (offset >> 8) & 0xff);
+ ds2404_reset(chip);
+ ds2404_write_byte(chip, DS2404_READ_MEMORY_CMD);
+ ds2404_write_byte(chip, offset & 0xff);
+ ds2404_write_byte(chip, (offset >> 8) & 0xff);
while (length--)
- *out++ = ds2404_read_byte(dev);
+ *out++ = ds2404_read_byte(chip);
}
-static void ds2404_write_memory(struct device *dev, u16 offset,
+static void ds2404_write_memory(struct ds2404 *chip, u16 offset,
int length, u8 *out)
{
int i;
u8 ta01, ta02, es;
- ds2404_reset(dev);
- ds2404_write_byte(dev, DS2404_WRITE_SCRATCHPAD_CMD);
- ds2404_write_byte(dev, offset & 0xff);
- ds2404_write_byte(dev, (offset >> 8) & 0xff);
+ ds2404_reset(chip);
+ ds2404_write_byte(chip, DS2404_WRITE_SCRATCHPAD_CMD);
+ ds2404_write_byte(chip, offset & 0xff);
+ ds2404_write_byte(chip, (offset >> 8) & 0xff);
for (i = 0; i < length; i++)
- ds2404_write_byte(dev, out[i]);
+ ds2404_write_byte(chip, out[i]);
- ds2404_reset(dev);
- ds2404_write_byte(dev, DS2404_READ_SCRATCHPAD_CMD);
+ ds2404_reset(chip);
+ ds2404_write_byte(chip, DS2404_READ_SCRATCHPAD_CMD);
- ta01 = ds2404_read_byte(dev);
- ta02 = ds2404_read_byte(dev);
- es = ds2404_read_byte(dev);
+ ta01 = ds2404_read_byte(chip);
+ ta02 = ds2404_read_byte(chip);
+ es = ds2404_read_byte(chip);
for (i = 0; i < length; i++) {
- if (out[i] != ds2404_read_byte(dev)) {
- dev_err(dev, "read invalid data\n");
+ if (out[i] != ds2404_read_byte(chip)) {
+ dev_err(chip->dev, "read invalid data\n");
return;
}
}
- ds2404_reset(dev);
- ds2404_write_byte(dev, DS2404_COPY_SCRATCHPAD_CMD);
- ds2404_write_byte(dev, ta01);
- ds2404_write_byte(dev, ta02);
- ds2404_write_byte(dev, es);
+ ds2404_reset(chip);
+ ds2404_write_byte(chip, DS2404_COPY_SCRATCHPAD_CMD);
+ ds2404_write_byte(chip, ta01);
+ ds2404_write_byte(chip, ta02);
+ ds2404_write_byte(chip, es);
- gpio_direction_input(ds2404_gpio[DS2404_DQ].gpio);
- while (gpio_get_value(ds2404_gpio[DS2404_DQ].gpio))
+ while (gpiod_get_value(chip->dq_gpiod))
;
}
-static void ds2404_enable_osc(struct device *dev)
+static void ds2404_enable_osc(struct ds2404 *chip)
{
u8 in[1] = { 0x10 }; /* enable oscillator */
- ds2404_write_memory(dev, 0x201, 1, in);
+
+ ds2404_write_memory(chip, 0x201, 1, in);
}
static int ds2404_read_time(struct device *dev, struct rtc_time *dt)
{
+ struct ds2404 *chip = dev_get_drvdata(dev);
unsigned long time = 0;
__le32 hw_time = 0;
- ds2404_read_memory(dev, 0x203, 4, (u8 *)&hw_time);
+ ds2404_read_memory(chip, 0x203, 4, (u8 *)&hw_time);
time = le32_to_cpu(hw_time);
rtc_time64_to_tm(time, dt);
static int ds2404_set_time(struct device *dev, struct rtc_time *dt)
{
+ struct ds2404 *chip = dev_get_drvdata(dev);
u32 time = cpu_to_le32(rtc_tm_to_time64(dt));
- ds2404_write_memory(dev, 0x203, 4, (u8 *)&time);
+ ds2404_write_memory(chip, 0x203, 4, (u8 *)&time);
return 0;
}
static int rtc_probe(struct platform_device *pdev)
{
- struct ds2404_platform_data *pdata = dev_get_platdata(&pdev->dev);
struct ds2404 *chip;
int retval = -EBUSY;
if (!chip)
return -ENOMEM;
+ chip->dev = &pdev->dev;
+
chip->rtc = devm_rtc_allocate_device(&pdev->dev);
if (IS_ERR(chip->rtc))
return PTR_ERR(chip->rtc);
- retval = ds2404_gpio_map(chip, pdev, pdata);
+ retval = ds2404_gpio_map(chip, pdev);
if (retval)
return retval;
- retval = devm_add_action_or_reset(&pdev->dev, ds2404_gpio_unmap, chip);
- if (retval)
- return retval;
-
- dev_info(&pdev->dev, "using GPIOs RST:%d, CLK:%d, DQ:%d\n",
- chip->gpio[DS2404_RST].gpio, chip->gpio[DS2404_CLK].gpio,
- chip->gpio[DS2404_DQ].gpio);
-
platform_set_drvdata(pdev, chip);
chip->rtc->ops = &ds2404_rtc_ops;
if (retval)
return retval;
- ds2404_enable_osc(&pdev->dev);
+ ds2404_enable_osc(chip);
return 0;
}
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/fsl/ftm.h>
#include <linux/rtc.h>
*/
#include <linux/bcd.h>
+#include <linux/bitfield.h>
+#include <linux/clk-provider.h>
#include <linux/err.h>
#include <linux/hwmon.h>
#include <linux/i2c.h>
#define ISL12022_REG_SR 0x07
#define ISL12022_REG_INT 0x08
+#define ISL12022_REG_PWR_VBAT 0x0a
+
#define ISL12022_REG_BETA 0x0d
#define ISL12022_REG_TEMP_L 0x28
#define ISL12022_SR_LBAT75 (1 << 1)
#define ISL12022_INT_WRTC (1 << 6)
+#define ISL12022_INT_FO_MASK GENMASK(3, 0)
+#define ISL12022_INT_FO_OFF 0x0
+#define ISL12022_INT_FO_32K 0x1
+
+#define ISL12022_REG_VB85_MASK GENMASK(5, 3)
+#define ISL12022_REG_VB75_MASK GENMASK(2, 0)
#define ISL12022_BETA_TSE (1 << 7)
if (ret)
return ret;
- if (buf[ISL12022_REG_SR] & (ISL12022_SR_LBAT85 | ISL12022_SR_LBAT75)) {
- dev_warn(dev,
- "voltage dropped below %u%%, date and time is not reliable.\n",
- buf[ISL12022_REG_SR] & ISL12022_SR_LBAT85 ? 85 : 75);
- }
-
dev_dbg(dev,
"raw data is sec=%02x, min=%02x, hr=%02x, mday=%02x, mon=%02x, year=%02x, wday=%02x, sr=%02x, int=%02x",
buf[ISL12022_REG_SC],
return regmap_bulk_write(regmap, ISL12022_REG_SC, buf, sizeof(buf));
}
+static int isl12022_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
+{
+ struct regmap *regmap = dev_get_drvdata(dev);
+ u32 user, val;
+ int ret;
+
+ switch (cmd) {
+ case RTC_VL_READ:
+ ret = regmap_read(regmap, ISL12022_REG_SR, &val);
+ if (ret)
+ return ret;
+
+ user = 0;
+ if (val & ISL12022_SR_LBAT85)
+ user |= RTC_VL_BACKUP_LOW;
+
+ if (val & ISL12022_SR_LBAT75)
+ user |= RTC_VL_BACKUP_EMPTY;
+
+ return put_user(user, (u32 __user *)arg);
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+}
+
static const struct rtc_class_ops isl12022_rtc_ops = {
+ .ioctl = isl12022_rtc_ioctl,
.read_time = isl12022_rtc_read_time,
.set_time = isl12022_rtc_set_time,
};
.use_single_write = true,
};
+static int isl12022_register_clock(struct device *dev)
+{
+ struct regmap *regmap = dev_get_drvdata(dev);
+ struct clk_hw *hw;
+ int ret;
+
+ if (!device_property_present(dev, "#clock-cells")) {
+ /*
+ * Disabling the F_OUT pin reduces the power
+ * consumption in battery mode by ~25%.
+ */
+ regmap_update_bits(regmap, ISL12022_REG_INT, ISL12022_INT_FO_MASK,
+ ISL12022_INT_FO_OFF);
+
+ return 0;
+ }
+
+ if (!IS_ENABLED(CONFIG_COMMON_CLK))
+ return 0;
+
+ /*
+ * For now, only support a fixed clock of 32768Hz (the reset default).
+ */
+ ret = regmap_update_bits(regmap, ISL12022_REG_INT,
+ ISL12022_INT_FO_MASK, ISL12022_INT_FO_32K);
+ if (ret)
+ return ret;
+
+ hw = devm_clk_hw_register_fixed_rate(dev, "isl12022", NULL, 0, 32768);
+ if (IS_ERR(hw))
+ return PTR_ERR(hw);
+
+ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get, hw);
+}
+
+static const u32 trip_levels[2][7] = {
+ { 2125000, 2295000, 2550000, 2805000, 3060000, 4250000, 4675000 },
+ { 1875000, 2025000, 2250000, 2475000, 2700000, 3750000, 4125000 },
+};
+
+static void isl12022_set_trip_levels(struct device *dev)
+{
+ struct regmap *regmap = dev_get_drvdata(dev);
+ u32 levels[2] = {0, 0};
+ int ret, i, j, x[2];
+ u8 val, mask;
+
+ device_property_read_u32_array(dev, "isil,battery-trip-levels-microvolt",
+ levels, 2);
+
+ for (i = 0; i < 2; i++) {
+ for (j = 0; j < ARRAY_SIZE(trip_levels[i]) - 1; j++) {
+ if (levels[i] <= trip_levels[i][j])
+ break;
+ }
+ x[i] = j;
+ }
+
+ val = FIELD_PREP(ISL12022_REG_VB85_MASK, x[0]) |
+ FIELD_PREP(ISL12022_REG_VB75_MASK, x[1]);
+ mask = ISL12022_REG_VB85_MASK | ISL12022_REG_VB75_MASK;
+
+ ret = regmap_update_bits(regmap, ISL12022_REG_PWR_VBAT, mask, val);
+ if (ret)
+ dev_warn(dev, "unable to set battery alarm levels: %d\n", ret);
+
+ /*
+ * Force a write of the TSE bit in the BETA register, in order
+ * to trigger an update of the LBAT75 and LBAT85 bits in the
+ * status register. In battery backup mode, those bits have
+ * another meaning, so without this, they may contain stale
+ * values for up to a minute after power-on.
+ */
+ regmap_write_bits(regmap, ISL12022_REG_BETA,
+ ISL12022_BETA_TSE, ISL12022_BETA_TSE);
+}
+
static int isl12022_probe(struct i2c_client *client)
{
struct rtc_device *rtc;
struct regmap *regmap;
+ int ret;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
return -ENODEV;
dev_set_drvdata(&client->dev, regmap);
+ ret = isl12022_register_clock(&client->dev);
+ if (ret)
+ return ret;
+
+ isl12022_set_trip_levels(&client->dev);
isl12022_hwmon_register(&client->dev);
rtc = devm_rtc_allocate_device(&client->dev);
#include <linux/mutex.h>
#include <linux/nvmem-provider.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/rtc.h>
#include <linux/slab.h>
}
}
-static int isl12026_probe_new(struct i2c_client *client)
+static int isl12026_probe(struct i2c_client *client)
{
struct isl12026 *priv;
int ret;
.name = "rtc-isl12026",
.of_match_table = isl12026_dt_match,
},
- .probe = isl12026_probe_new,
+ .probe = isl12026_probe,
.remove = isl12026_remove,
};
#include <linux/clk.h>
#include <linux/i2c.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/of_irq.h>
#include <linux/rtc.h>
static int isl1208_set_xtoscb(struct i2c_client *client, int sr, int xtosb_val)
{
/* Do nothing if bit is already set to desired value */
- if ((sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
+ if (!!(sr & ISL1208_REG_SR_XTOSCB) == xtosb_val)
return 0;
if (xtosb_val)
i2c_set_clientdata(client, isl1208);
/* Determine which chip we have */
- if (client->dev.of_node) {
- isl1208->config = of_device_get_match_data(&client->dev);
- if (!isl1208->config)
- return -ENODEV;
- } else {
- const struct i2c_device_id *id = i2c_match_id(isl1208_id, client);
-
- if (!id)
- return -ENODEV;
- isl1208->config = (struct isl1208_config *)id->driver_data;
- }
+ isl1208->config = i2c_get_match_data(client);
+ if (!isl1208->config)
+ return -ENODEV;
rc = isl1208_clk_present(client, "xin");
if (rc < 0)
rc = isl1208_setup_irq(client, client->irq);
if (rc)
return rc;
-
} else {
clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, isl1208->rtc->features);
}
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_wakeirq.h>
#include <linux/property.h>
if (!rtc)
return -ENOMEM;
- rtc->type = (enum jz4740_rtc_type)device_get_match_data(dev);
+ rtc->type = (uintptr_t)device_get_match_data(dev);
irq = platform_get_irq(pdev, 0);
if (irq < 0)
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/rtc.h>
#include <linux/slab.h>
#include <linux/mutex.h>
*/
#include <linux/module.h>
+#include <linux/mod_devicetable.h>
#include <linux/rtc.h>
#include <linux/platform_device.h>
#include <linux/bcd.h>
return 0;
}
+static const struct of_device_id m48t86_rtc_of_ids[] = {
+ { .compatible = "st,m48t86" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, m48t86_rtc_of_ids);
+
static struct platform_driver m48t86_rtc_platform_driver = {
.driver = {
.name = "rtc-m48t86",
+ .of_match_table = m48t86_rtc_of_ids,
},
.probe = m48t86_rtc_probe,
};
#include <linux/module.h>
#include <linux/rtc.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/mfd/mt6397/core.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/clk.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
#include <linux/pm_wakeirq.h>
#include <linux/clk.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#define RTC_INPUT_CLK_32768HZ (0x00 << 5)
#define RTC_INPUT_CLK_32000HZ (0x01 << 5)
static struct i2c_driver nct3018y_driver = {
.driver = {
.name = "rtc-nct3018y",
- .of_match_table = of_match_ptr(nct3018y_of_match),
+ .of_match_table = nct3018y_of_match,
},
.probe = nct3018y_probe,
.id_table = nct3018y_id,
}
rtc->irq_timer = platform_get_irq(pdev, 0);
- if (rtc->irq_timer <= 0)
- return -ENOENT;
+ if (rtc->irq_timer < 0)
+ return rtc->irq_timer;
rtc->irq_alarm = platform_get_irq(pdev, 1);
- if (rtc->irq_alarm <= 0)
- return -ENOENT;
+ if (rtc->irq_alarm < 0)
+ return rtc->irq_alarm;
rtc->clk = devm_clk_get(&pdev->dev, "ext-clk");
if (!IS_ERR(rtc->clk))
// SPDX-License-Identifier: GPL-2.0-only
/*
- * An I2C and SPI driver for the NXP PCF2127/29 RTC
+ * An I2C and SPI driver for the NXP PCF2127/29/31 RTC
* Copyright 2013 Til-Technologies
*
* Author: Renaud Cerrato <r.cerrato@til-technologies.fr>
* Watchdog and tamper functions
* Author: Bruno Thomsen <bruno.thomsen@gmail.com>
*
+ * PCF2131 support
+ * Author: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+ *
* based on the other drivers in this same directory.
*
- * Datasheet: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ * Datasheets: https://www.nxp.com/docs/en/data-sheet/PCF2127.pdf
+ * https://www.nxp.com/docs/en/data-sheet/PCF2131DS.pdf
*/
#include <linux/i2c.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_irq.h>
+#include <linux/of_device.h>
#include <linux/regmap.h>
#include <linux/watchdog.h>
#define PCF2127_REG_CTRL1 0x00
#define PCF2127_BIT_CTRL1_POR_OVRD BIT(3)
#define PCF2127_BIT_CTRL1_TSF1 BIT(4)
+#define PCF2127_BIT_CTRL1_STOP BIT(5)
/* Control register 2 */
#define PCF2127_REG_CTRL2 0x01
#define PCF2127_BIT_CTRL2_AIE BIT(1)
#define PCF2127_BIT_CTRL3_BF BIT(3)
#define PCF2127_BIT_CTRL3_BTSE BIT(4)
/* Time and date registers */
-#define PCF2127_REG_SC 0x03
+#define PCF2127_REG_TIME_BASE 0x03
#define PCF2127_BIT_SC_OSF BIT(7)
-#define PCF2127_REG_MN 0x04
-#define PCF2127_REG_HR 0x05
-#define PCF2127_REG_DM 0x06
-#define PCF2127_REG_DW 0x07
-#define PCF2127_REG_MO 0x08
-#define PCF2127_REG_YR 0x09
/* Alarm registers */
-#define PCF2127_REG_ALARM_SC 0x0A
-#define PCF2127_REG_ALARM_MN 0x0B
-#define PCF2127_REG_ALARM_HR 0x0C
-#define PCF2127_REG_ALARM_DM 0x0D
-#define PCF2127_REG_ALARM_DW 0x0E
+#define PCF2127_REG_ALARM_BASE 0x0A
#define PCF2127_BIT_ALARM_AE BIT(7)
/* CLKOUT control register */
#define PCF2127_REG_CLKOUT 0x0f
#define PCF2127_BIT_WD_CTL_CD0 BIT(6)
#define PCF2127_BIT_WD_CTL_CD1 BIT(7)
#define PCF2127_REG_WD_VAL 0x11
-/* Tamper timestamp registers */
-#define PCF2127_REG_TS_CTRL 0x12
+/* Tamper timestamp1 registers */
+#define PCF2127_REG_TS1_BASE 0x12
#define PCF2127_BIT_TS_CTRL_TSOFF BIT(6)
#define PCF2127_BIT_TS_CTRL_TSM BIT(7)
-#define PCF2127_REG_TS_SC 0x13
-#define PCF2127_REG_TS_MN 0x14
-#define PCF2127_REG_TS_HR 0x15
-#define PCF2127_REG_TS_DM 0x16
-#define PCF2127_REG_TS_MO 0x17
-#define PCF2127_REG_TS_YR 0x18
/*
* RAM registers
* PCF2127 has 512 bytes general-purpose static RAM (SRAM) that is
* battery backed and can survive a power outage.
- * PCF2129 doesn't have this feature.
+ * PCF2129/31 doesn't have this feature.
*/
#define PCF2127_REG_RAM_ADDR_MSB 0x1A
#define PCF2127_REG_RAM_WRT_CMD 0x1C
/* Watchdog timer value constants */
#define PCF2127_WD_VAL_STOP 0
-#define PCF2127_WD_VAL_MIN 2
-#define PCF2127_WD_VAL_MAX 255
-#define PCF2127_WD_VAL_DEFAULT 60
+/* PCF2127/29 watchdog timer value constants */
+#define PCF2127_WD_CLOCK_HZ_X1000 1000 /* 1Hz */
+#define PCF2127_WD_MIN_HW_HEARTBEAT_MS 500
+/* PCF2131 watchdog timer value constants */
+#define PCF2131_WD_CLOCK_HZ_X1000 250 /* 1/4Hz */
+#define PCF2131_WD_MIN_HW_HEARTBEAT_MS 4000
+
+#define PCF2127_WD_DEFAULT_TIMEOUT_S 60
/* Mask for currently enabled interrupts */
#define PCF2127_CTRL1_IRQ_MASK (PCF2127_BIT_CTRL1_TSF1)
PCF2127_BIT_CTRL2_WDTF | \
PCF2127_BIT_CTRL2_TSF2)
+#define PCF2127_MAX_TS_SUPPORTED 4
+
+/* Control register 4 */
+#define PCF2131_REG_CTRL4 0x03
+#define PCF2131_BIT_CTRL4_TSF4 BIT(4)
+#define PCF2131_BIT_CTRL4_TSF3 BIT(5)
+#define PCF2131_BIT_CTRL4_TSF2 BIT(6)
+#define PCF2131_BIT_CTRL4_TSF1 BIT(7)
+/* Control register 5 */
+#define PCF2131_REG_CTRL5 0x04
+#define PCF2131_BIT_CTRL5_TSIE4 BIT(4)
+#define PCF2131_BIT_CTRL5_TSIE3 BIT(5)
+#define PCF2131_BIT_CTRL5_TSIE2 BIT(6)
+#define PCF2131_BIT_CTRL5_TSIE1 BIT(7)
+/* Software reset register */
+#define PCF2131_REG_SR_RESET 0x05
+#define PCF2131_SR_RESET_READ_PATTERN (BIT(2) | BIT(5))
+#define PCF2131_SR_RESET_CPR_CMD (PCF2131_SR_RESET_READ_PATTERN | BIT(7))
+/* Time and date registers */
+#define PCF2131_REG_TIME_BASE 0x07
+/* Alarm registers */
+#define PCF2131_REG_ALARM_BASE 0x0E
+/* CLKOUT control register */
+#define PCF2131_REG_CLKOUT 0x13
+/* Watchdog registers */
+#define PCF2131_REG_WD_CTL 0x35
+#define PCF2131_REG_WD_VAL 0x36
+/* Tamper timestamp1 registers */
+#define PCF2131_REG_TS1_BASE 0x14
+/* Tamper timestamp2 registers */
+#define PCF2131_REG_TS2_BASE 0x1B
+/* Tamper timestamp3 registers */
+#define PCF2131_REG_TS3_BASE 0x22
+/* Tamper timestamp4 registers */
+#define PCF2131_REG_TS4_BASE 0x29
+/* Interrupt mask registers */
+#define PCF2131_REG_INT_A_MASK1 0x31
+#define PCF2131_REG_INT_A_MASK2 0x32
+#define PCF2131_REG_INT_B_MASK1 0x33
+#define PCF2131_REG_INT_B_MASK2 0x34
+#define PCF2131_BIT_INT_BLIE BIT(0)
+#define PCF2131_BIT_INT_BIE BIT(1)
+#define PCF2131_BIT_INT_AIE BIT(2)
+#define PCF2131_BIT_INT_WD_CD BIT(3)
+#define PCF2131_BIT_INT_SI BIT(4)
+#define PCF2131_BIT_INT_MI BIT(5)
+#define PCF2131_CTRL2_IRQ_MASK ( \
+ PCF2127_BIT_CTRL2_AF | \
+ PCF2127_BIT_CTRL2_WDTF)
+#define PCF2131_CTRL4_IRQ_MASK ( \
+ PCF2131_BIT_CTRL4_TSF4 | \
+ PCF2131_BIT_CTRL4_TSF3 | \
+ PCF2131_BIT_CTRL4_TSF2 | \
+ PCF2131_BIT_CTRL4_TSF1)
+
+enum pcf21xx_type {
+ PCF2127,
+ PCF2129,
+ PCF2131,
+ PCF21XX_LAST_ID
+};
+
+struct pcf21xx_ts_config {
+ u8 reg_base; /* Base register to read timestamp values. */
+
+ /*
+ * If the TS input pin is driven to GND, an interrupt can be generated
+ * (supported by all variants).
+ */
+ u8 gnd_detect_reg; /* Interrupt control register address. */
+ u8 gnd_detect_bit; /* Interrupt bit. */
+
+ /*
+ * If the TS input pin is driven to an intermediate level between GND
+ * and supply, an interrupt can be generated (optional feature depending
+ * on variant).
+ */
+ u8 inter_detect_reg; /* Interrupt control register address. */
+ u8 inter_detect_bit; /* Interrupt bit. */
+
+ u8 ie_reg; /* Interrupt enable control register. */
+ u8 ie_bit; /* Interrupt enable bit. */
+};
+
+struct pcf21xx_config {
+ int type; /* IC variant */
+ int max_register;
+ unsigned int has_nvmem:1;
+ unsigned int has_bit_wd_ctl_cd0:1;
+ unsigned int wd_val_reg_readable:1; /* If watchdog value register can be read. */
+ unsigned int has_int_a_b:1; /* PCF2131 supports two interrupt outputs. */
+ u8 reg_time_base; /* Time/date base register. */
+ u8 regs_alarm_base; /* Alarm function base registers. */
+ u8 reg_wd_ctl; /* Watchdog control register. */
+ u8 reg_wd_val; /* Watchdog value register. */
+ u8 reg_clkout; /* Clkout register. */
+ int wdd_clock_hz_x1000; /* Watchdog clock in Hz multiplicated by 1000 */
+ int wdd_min_hw_heartbeat_ms;
+ unsigned int ts_count;
+ struct pcf21xx_ts_config ts[PCF2127_MAX_TS_SUPPORTED];
+ struct attribute_group attribute_group;
+};
+
struct pcf2127 {
struct rtc_device *rtc;
struct watchdog_device wdd;
struct regmap *regmap;
- time64_t ts;
- bool ts_valid;
+ const struct pcf21xx_config *cfg;
bool irq_enabled;
+ time64_t ts[PCF2127_MAX_TS_SUPPORTED]; /* Timestamp values. */
+ bool ts_valid[PCF2127_MAX_TS_SUPPORTED]; /* Timestamp valid indication. */
};
/*
static int pcf2127_rtc_read_time(struct device *dev, struct rtc_time *tm)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
- unsigned char buf[10];
+ unsigned char buf[7];
int ret;
/*
* Avoid reading CTRL2 register as it causes WD_VAL register
* value to reset to 0 which means watchdog is stopped.
*/
- ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL3,
- (buf + PCF2127_REG_CTRL3),
- ARRAY_SIZE(buf) - PCF2127_REG_CTRL3);
+ ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->reg_time_base,
+ buf, sizeof(buf));
if (ret) {
dev_err(dev, "%s: read error\n", __func__);
return ret;
}
- if (buf[PCF2127_REG_CTRL3] & PCF2127_BIT_CTRL3_BLF)
- dev_info(dev,
- "low voltage detected, check/replace RTC battery.\n");
-
/* Clock integrity is not guaranteed when OSF flag is set. */
- if (buf[PCF2127_REG_SC] & PCF2127_BIT_SC_OSF) {
+ if (buf[0] & PCF2127_BIT_SC_OSF) {
/*
* no need clear the flag here,
* it will be cleared once the new date is saved
}
dev_dbg(dev,
- "%s: raw data is cr3=%02x, sec=%02x, min=%02x, hr=%02x, "
+ "%s: raw data is sec=%02x, min=%02x, hr=%02x, "
"mday=%02x, wday=%02x, mon=%02x, year=%02x\n",
- __func__, buf[PCF2127_REG_CTRL3], buf[PCF2127_REG_SC],
- buf[PCF2127_REG_MN], buf[PCF2127_REG_HR],
- buf[PCF2127_REG_DM], buf[PCF2127_REG_DW],
- buf[PCF2127_REG_MO], buf[PCF2127_REG_YR]);
-
- tm->tm_sec = bcd2bin(buf[PCF2127_REG_SC] & 0x7F);
- tm->tm_min = bcd2bin(buf[PCF2127_REG_MN] & 0x7F);
- tm->tm_hour = bcd2bin(buf[PCF2127_REG_HR] & 0x3F); /* rtc hr 0-23 */
- tm->tm_mday = bcd2bin(buf[PCF2127_REG_DM] & 0x3F);
- tm->tm_wday = buf[PCF2127_REG_DW] & 0x07;
- tm->tm_mon = bcd2bin(buf[PCF2127_REG_MO] & 0x1F) - 1; /* rtc mn 1-12 */
- tm->tm_year = bcd2bin(buf[PCF2127_REG_YR]);
+ __func__, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6]);
+
+ tm->tm_sec = bcd2bin(buf[0] & 0x7F);
+ tm->tm_min = bcd2bin(buf[1] & 0x7F);
+ tm->tm_hour = bcd2bin(buf[2] & 0x3F);
+ tm->tm_mday = bcd2bin(buf[3] & 0x3F);
+ tm->tm_wday = buf[4] & 0x07;
+ tm->tm_mon = bcd2bin(buf[5] & 0x1F) - 1;
+ tm->tm_year = bcd2bin(buf[6]);
tm->tm_year += 100;
dev_dbg(dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
/* year */
buf[i++] = bin2bcd(tm->tm_year - 100);
- /* write register's data */
- err = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_SC, buf, i);
+ /* Write access to time registers:
+ * PCF2127/29: no special action required.
+ * PCF2131: requires setting the STOP and CPR bits. STOP bit needs to
+ * be cleared after time registers are updated.
+ */
+ if (pcf2127->cfg->type == PCF2131) {
+ err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+ PCF2127_BIT_CTRL1_STOP,
+ PCF2127_BIT_CTRL1_STOP);
+ if (err) {
+ dev_dbg(dev, "setting STOP bit failed\n");
+ return err;
+ }
+
+ err = regmap_write(pcf2127->regmap, PCF2131_REG_SR_RESET,
+ PCF2131_SR_RESET_CPR_CMD);
+ if (err) {
+ dev_dbg(dev, "sending CPR cmd failed\n");
+ return err;
+ }
+ }
+
+ /* write time register's data */
+ err = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->reg_time_base, buf, i);
if (err) {
- dev_err(dev,
- "%s: err=%d", __func__, err);
+ dev_dbg(dev, "%s: err=%d", __func__, err);
return err;
}
+ if (pcf2127->cfg->type == PCF2131) {
+ /* Clear STOP bit (PCF2131 only) after write is completed. */
+ err = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+ PCF2127_BIT_CTRL1_STOP, 0);
+ if (err) {
+ dev_dbg(dev, "clearing STOP bit failed\n");
+ return err;
+ }
+ }
+
return 0;
}
static int pcf2127_wdt_ping(struct watchdog_device *wdd)
{
+ int wd_val;
struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
- return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL, wdd->timeout);
+ /*
+ * Compute counter value of WATCHDG_TIM_VAL to obtain desired period
+ * in seconds, depending on the source clock frequency.
+ */
+ wd_val = ((wdd->timeout * pcf2127->cfg->wdd_clock_hz_x1000) / 1000) + 1;
+
+ return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val, wd_val);
}
/*
{
struct pcf2127 *pcf2127 = watchdog_get_drvdata(wdd);
- return regmap_write(pcf2127->regmap, PCF2127_REG_WD_VAL,
+ return regmap_write(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
PCF2127_WD_VAL_STOP);
}
.set_timeout = pcf2127_wdt_set_timeout,
};
+/*
+ * Compute watchdog period, t, in seconds, from the WATCHDG_TIM_VAL register
+ * value, n, and the clock frequency, f1000, in Hz x 1000.
+ *
+ * The PCF2127/29 datasheet gives t as:
+ * t = n / f
+ * The PCF2131 datasheet gives t as:
+ * t = (n - 1) / f
+ * For both variants, the watchdog is triggered when the WATCHDG_TIM_VAL reaches
+ * the value 1, and not zero. Consequently, the equation from the PCF2131
+ * datasheet seems to be the correct one for both variants.
+ */
+static int pcf2127_watchdog_get_period(int n, int f1000)
+{
+ return (1000 * (n - 1)) / f1000;
+}
+
static int pcf2127_watchdog_init(struct device *dev, struct pcf2127 *pcf2127)
{
- u32 wdd_timeout;
int ret;
if (!IS_ENABLED(CONFIG_WATCHDOG) ||
pcf2127->wdd.parent = dev;
pcf2127->wdd.info = &pcf2127_wdt_info;
pcf2127->wdd.ops = &pcf2127_watchdog_ops;
- pcf2127->wdd.min_timeout = PCF2127_WD_VAL_MIN;
- pcf2127->wdd.max_timeout = PCF2127_WD_VAL_MAX;
- pcf2127->wdd.timeout = PCF2127_WD_VAL_DEFAULT;
- pcf2127->wdd.min_hw_heartbeat_ms = 500;
+
+ pcf2127->wdd.min_timeout =
+ pcf2127_watchdog_get_period(
+ 2, pcf2127->cfg->wdd_clock_hz_x1000);
+ pcf2127->wdd.max_timeout =
+ pcf2127_watchdog_get_period(
+ 255, pcf2127->cfg->wdd_clock_hz_x1000);
+ pcf2127->wdd.timeout = PCF2127_WD_DEFAULT_TIMEOUT_S;
+
+ dev_dbg(dev, "%s clock = %d Hz / 1000\n", __func__,
+ pcf2127->cfg->wdd_clock_hz_x1000);
+
+ pcf2127->wdd.min_hw_heartbeat_ms = pcf2127->cfg->wdd_min_hw_heartbeat_ms;
pcf2127->wdd.status = WATCHDOG_NOWAYOUT_INIT_STATUS;
watchdog_set_drvdata(&pcf2127->wdd, pcf2127);
/* Test if watchdog timer is started by bootloader */
- ret = regmap_read(pcf2127->regmap, PCF2127_REG_WD_VAL, &wdd_timeout);
- if (ret)
- return ret;
+ if (pcf2127->cfg->wd_val_reg_readable) {
+ u32 wdd_timeout;
- if (wdd_timeout)
- set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+ ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_wd_val,
+ &wdd_timeout);
+ if (ret)
+ return ret;
+
+ if (wdd_timeout)
+ set_bit(WDOG_HW_RUNNING, &pcf2127->wdd.status);
+ }
return devm_watchdog_register_device(dev, &pcf2127->wdd);
}
if (ret)
return ret;
- ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
- sizeof(buf));
+ ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+ buf, sizeof(buf));
if (ret)
return ret;
buf[3] = bin2bcd(alrm->time.tm_mday);
buf[4] = PCF2127_BIT_ALARM_AE; /* Do not match on week day */
- ret = regmap_bulk_write(pcf2127->regmap, PCF2127_REG_ALARM_SC, buf,
- sizeof(buf));
+ ret = regmap_bulk_write(pcf2127->regmap, pcf2127->cfg->regs_alarm_base,
+ buf, sizeof(buf));
if (ret)
return ret;
}
/*
- * This function reads ctrl2 register, caller is responsible for calling
- * pcf2127_wdt_active_ping()
+ * This function reads one timestamp function data, caller is responsible for
+ * calling pcf2127_wdt_active_ping()
*/
-static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts)
+static int pcf2127_rtc_ts_read(struct device *dev, time64_t *ts,
+ int ts_id)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
struct rtc_time tm;
int ret;
- unsigned char data[25];
+ unsigned char data[7];
- ret = regmap_bulk_read(pcf2127->regmap, PCF2127_REG_CTRL1, data,
- sizeof(data));
+ ret = regmap_bulk_read(pcf2127->regmap, pcf2127->cfg->ts[ts_id].reg_base,
+ data, sizeof(data));
if (ret) {
dev_err(dev, "%s: read error ret=%d\n", __func__, ret);
return ret;
}
dev_dbg(dev,
- "%s: raw data is cr1=%02x, cr2=%02x, cr3=%02x, ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
- __func__, data[PCF2127_REG_CTRL1], data[PCF2127_REG_CTRL2],
- data[PCF2127_REG_CTRL3], data[PCF2127_REG_TS_SC],
- data[PCF2127_REG_TS_MN], data[PCF2127_REG_TS_HR],
- data[PCF2127_REG_TS_DM], data[PCF2127_REG_TS_MO],
- data[PCF2127_REG_TS_YR]);
-
- tm.tm_sec = bcd2bin(data[PCF2127_REG_TS_SC] & 0x7F);
- tm.tm_min = bcd2bin(data[PCF2127_REG_TS_MN] & 0x7F);
- tm.tm_hour = bcd2bin(data[PCF2127_REG_TS_HR] & 0x3F);
- tm.tm_mday = bcd2bin(data[PCF2127_REG_TS_DM] & 0x3F);
+ "%s: raw data is ts_sc=%02x, ts_mn=%02x, ts_hr=%02x, ts_dm=%02x, ts_mo=%02x, ts_yr=%02x\n",
+ __func__, data[1], data[2], data[3], data[4], data[5], data[6]);
+
+ tm.tm_sec = bcd2bin(data[1] & 0x7F);
+ tm.tm_min = bcd2bin(data[2] & 0x7F);
+ tm.tm_hour = bcd2bin(data[3] & 0x3F);
+ tm.tm_mday = bcd2bin(data[4] & 0x3F);
/* TS_MO register (month) value range: 1-12 */
- tm.tm_mon = bcd2bin(data[PCF2127_REG_TS_MO] & 0x1F) - 1;
- tm.tm_year = bcd2bin(data[PCF2127_REG_TS_YR]);
+ tm.tm_mon = bcd2bin(data[5] & 0x1F) - 1;
+ tm.tm_year = bcd2bin(data[6]);
if (tm.tm_year < 70)
tm.tm_year += 100; /* assume we are in 1970...2069 */
return 0;
};
-static void pcf2127_rtc_ts_snapshot(struct device *dev)
+static void pcf2127_rtc_ts_snapshot(struct device *dev, int ts_id)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
int ret;
+ if (ts_id >= pcf2127->cfg->ts_count)
+ return;
+
/* Let userspace read the first timestamp */
- if (pcf2127->ts_valid)
+ if (pcf2127->ts_valid[ts_id])
return;
- ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts);
+ ret = pcf2127_rtc_ts_read(dev, &pcf2127->ts[ts_id], ts_id);
if (!ret)
- pcf2127->ts_valid = true;
+ pcf2127->ts_valid[ts_id] = true;
}
static irqreturn_t pcf2127_rtc_irq(int irq, void *dev)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
- unsigned int ctrl1, ctrl2;
+ unsigned int ctrl2;
int ret = 0;
- ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
- if (ret)
- return IRQ_NONE;
-
ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
if (ret)
return IRQ_NONE;
- if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
- return IRQ_NONE;
+ if (pcf2127->cfg->ts_count == 1) {
+ /* PCF2127/29 */
+ unsigned int ctrl1;
+
+ ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
+ if (ret)
+ return IRQ_NONE;
+
+ if (!(ctrl1 & PCF2127_CTRL1_IRQ_MASK || ctrl2 & PCF2127_CTRL2_IRQ_MASK))
+ return IRQ_NONE;
+
+ if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
+ pcf2127_rtc_ts_snapshot(dev, 0);
+
+ if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
+ regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
+ ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+
+ if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
+ regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+ ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+ } else {
+ /* PCF2131. */
+ unsigned int ctrl4;
+
+ ret = regmap_read(pcf2127->regmap, PCF2131_REG_CTRL4, &ctrl4);
+ if (ret)
+ return IRQ_NONE;
+
+ if (!(ctrl4 & PCF2131_CTRL4_IRQ_MASK || ctrl2 & PCF2131_CTRL2_IRQ_MASK))
+ return IRQ_NONE;
- if (ctrl1 & PCF2127_BIT_CTRL1_TSF1 || ctrl2 & PCF2127_BIT_CTRL2_TSF2)
- pcf2127_rtc_ts_snapshot(dev);
+ if (ctrl4 & PCF2131_CTRL4_IRQ_MASK) {
+ int i;
+ int tsf_bit = PCF2131_BIT_CTRL4_TSF1; /* Start at bit 7. */
- if (ctrl1 & PCF2127_CTRL1_IRQ_MASK)
- regmap_write(pcf2127->regmap, PCF2127_REG_CTRL1,
- ctrl1 & ~PCF2127_CTRL1_IRQ_MASK);
+ for (i = 0; i < pcf2127->cfg->ts_count; i++) {
+ if (ctrl4 & tsf_bit)
+ pcf2127_rtc_ts_snapshot(dev, i);
- if (ctrl2 & PCF2127_CTRL2_IRQ_MASK)
- regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
- ctrl2 & ~PCF2127_CTRL2_IRQ_MASK);
+ tsf_bit = tsf_bit >> 1;
+ }
+
+ regmap_write(pcf2127->regmap, PCF2131_REG_CTRL4,
+ ctrl4 & ~PCF2131_CTRL4_IRQ_MASK);
+ }
+
+ if (ctrl2 & PCF2131_CTRL2_IRQ_MASK)
+ regmap_write(pcf2127->regmap, PCF2127_REG_CTRL2,
+ ctrl2 & ~PCF2131_CTRL2_IRQ_MASK);
+ }
if (ctrl2 & PCF2127_BIT_CTRL2_AF)
rtc_update_irq(pcf2127->rtc, 1, RTC_IRQF | RTC_AF);
/* sysfs interface */
-static ssize_t timestamp0_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
+static ssize_t timestamp_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count, int ts_id)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
int ret;
+ if (ts_id >= pcf2127->cfg->ts_count)
+ return 0;
+
if (pcf2127->irq_enabled) {
- pcf2127->ts_valid = false;
+ pcf2127->ts_valid[ts_id] = false;
} else {
- ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
- PCF2127_BIT_CTRL1_TSF1, 0);
+ /* Always clear GND interrupt bit. */
+ ret = regmap_update_bits(pcf2127->regmap,
+ pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+ pcf2127->cfg->ts[ts_id].gnd_detect_bit,
+ 0);
+
if (ret) {
- dev_err(dev, "%s: update ctrl1 ret=%d\n", __func__, ret);
+ dev_err(dev, "%s: update TS gnd detect ret=%d\n", __func__, ret);
return ret;
}
- ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
- PCF2127_BIT_CTRL2_TSF2, 0);
- if (ret) {
- dev_err(dev, "%s: update ctrl2 ret=%d\n", __func__, ret);
- return ret;
+ if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+ /* Clear intermediate level interrupt bit if supported. */
+ ret = regmap_update_bits(pcf2127->regmap,
+ pcf2127->cfg->ts[ts_id].inter_detect_reg,
+ pcf2127->cfg->ts[ts_id].inter_detect_bit,
+ 0);
+ if (ret) {
+ dev_err(dev, "%s: update TS intermediate level detect ret=%d\n",
+ __func__, ret);
+ return ret;
+ }
}
ret = pcf2127_wdt_active_ping(&pcf2127->wdd);
}
return count;
+}
+
+static ssize_t timestamp0_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return timestamp_store(dev, attr, buf, count, 0);
};
-static ssize_t timestamp0_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t timestamp1_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return timestamp_store(dev, attr, buf, count, 1);
+};
+
+static ssize_t timestamp2_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return timestamp_store(dev, attr, buf, count, 2);
+};
+
+static ssize_t timestamp3_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return timestamp_store(dev, attr, buf, count, 3);
+};
+
+static ssize_t timestamp_show(struct device *dev,
+ struct device_attribute *attr, char *buf,
+ int ts_id)
{
struct pcf2127 *pcf2127 = dev_get_drvdata(dev->parent);
- unsigned int ctrl1, ctrl2;
int ret;
time64_t ts;
+ if (ts_id >= pcf2127->cfg->ts_count)
+ return 0;
+
if (pcf2127->irq_enabled) {
- if (!pcf2127->ts_valid)
+ if (!pcf2127->ts_valid[ts_id])
return 0;
- ts = pcf2127->ts;
+ ts = pcf2127->ts[ts_id];
} else {
- ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL1, &ctrl1);
- if (ret)
- return 0;
+ u8 valid_low = 0;
+ u8 valid_inter = 0;
+ unsigned int ctrl;
- ret = regmap_read(pcf2127->regmap, PCF2127_REG_CTRL2, &ctrl2);
+ /* Check if TS input pin is driven to GND, supported by all
+ * variants.
+ */
+ ret = regmap_read(pcf2127->regmap,
+ pcf2127->cfg->ts[ts_id].gnd_detect_reg,
+ &ctrl);
if (ret)
return 0;
- if (!(ctrl1 & PCF2127_BIT_CTRL1_TSF1) &&
- !(ctrl2 & PCF2127_BIT_CTRL2_TSF2))
+ valid_low = ctrl & pcf2127->cfg->ts[ts_id].gnd_detect_bit;
+
+ if (pcf2127->cfg->ts[ts_id].inter_detect_bit) {
+ /* Check if TS input pin is driven to intermediate level
+ * between GND and supply, if supported by variant.
+ */
+ ret = regmap_read(pcf2127->regmap,
+ pcf2127->cfg->ts[ts_id].inter_detect_reg,
+ &ctrl);
+ if (ret)
+ return 0;
+
+ valid_inter = ctrl & pcf2127->cfg->ts[ts_id].inter_detect_bit;
+ }
+
+ if (!valid_low && !valid_inter)
return 0;
- ret = pcf2127_rtc_ts_read(dev->parent, &ts);
+ ret = pcf2127_rtc_ts_read(dev->parent, &ts, ts_id);
if (ret)
return 0;
return ret;
}
return sprintf(buf, "%llu\n", (unsigned long long)ts);
+}
+
+static ssize_t timestamp0_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return timestamp_show(dev, attr, buf, 0);
+};
+
+static ssize_t timestamp1_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return timestamp_show(dev, attr, buf, 1);
+};
+
+static ssize_t timestamp2_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return timestamp_show(dev, attr, buf, 2);
+};
+
+static ssize_t timestamp3_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return timestamp_show(dev, attr, buf, 3);
};
static DEVICE_ATTR_RW(timestamp0);
+static DEVICE_ATTR_RW(timestamp1);
+static DEVICE_ATTR_RW(timestamp2);
+static DEVICE_ATTR_RW(timestamp3);
static struct attribute *pcf2127_attrs[] = {
&dev_attr_timestamp0.attr,
NULL
};
-static const struct attribute_group pcf2127_attr_group = {
- .attrs = pcf2127_attrs,
+static struct attribute *pcf2131_attrs[] = {
+ &dev_attr_timestamp0.attr,
+ &dev_attr_timestamp1.attr,
+ &dev_attr_timestamp2.attr,
+ &dev_attr_timestamp3.attr,
+ NULL
};
+static struct pcf21xx_config pcf21xx_cfg[] = {
+ [PCF2127] = {
+ .type = PCF2127,
+ .max_register = 0x1d,
+ .has_nvmem = 1,
+ .has_bit_wd_ctl_cd0 = 1,
+ .wd_val_reg_readable = 1,
+ .has_int_a_b = 0,
+ .reg_time_base = PCF2127_REG_TIME_BASE,
+ .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+ .reg_wd_ctl = PCF2127_REG_WD_CTL,
+ .reg_wd_val = PCF2127_REG_WD_VAL,
+ .reg_clkout = PCF2127_REG_CLKOUT,
+ .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+ .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+ .ts_count = 1,
+ .ts[0] = {
+ .reg_base = PCF2127_REG_TS1_BASE,
+ .gnd_detect_reg = PCF2127_REG_CTRL1,
+ .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+ .inter_detect_reg = PCF2127_REG_CTRL2,
+ .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+ .ie_reg = PCF2127_REG_CTRL2,
+ .ie_bit = PCF2127_BIT_CTRL2_TSIE,
+ },
+ .attribute_group = {
+ .attrs = pcf2127_attrs,
+ },
+ },
+ [PCF2129] = {
+ .type = PCF2129,
+ .max_register = 0x19,
+ .has_nvmem = 0,
+ .has_bit_wd_ctl_cd0 = 0,
+ .wd_val_reg_readable = 1,
+ .has_int_a_b = 0,
+ .reg_time_base = PCF2127_REG_TIME_BASE,
+ .regs_alarm_base = PCF2127_REG_ALARM_BASE,
+ .reg_wd_ctl = PCF2127_REG_WD_CTL,
+ .reg_wd_val = PCF2127_REG_WD_VAL,
+ .reg_clkout = PCF2127_REG_CLKOUT,
+ .wdd_clock_hz_x1000 = PCF2127_WD_CLOCK_HZ_X1000,
+ .wdd_min_hw_heartbeat_ms = PCF2127_WD_MIN_HW_HEARTBEAT_MS,
+ .ts_count = 1,
+ .ts[0] = {
+ .reg_base = PCF2127_REG_TS1_BASE,
+ .gnd_detect_reg = PCF2127_REG_CTRL1,
+ .gnd_detect_bit = PCF2127_BIT_CTRL1_TSF1,
+ .inter_detect_reg = PCF2127_REG_CTRL2,
+ .inter_detect_bit = PCF2127_BIT_CTRL2_TSF2,
+ .ie_reg = PCF2127_REG_CTRL2,
+ .ie_bit = PCF2127_BIT_CTRL2_TSIE,
+ },
+ .attribute_group = {
+ .attrs = pcf2127_attrs,
+ },
+ },
+ [PCF2131] = {
+ .type = PCF2131,
+ .max_register = 0x36,
+ .has_nvmem = 0,
+ .has_bit_wd_ctl_cd0 = 0,
+ .wd_val_reg_readable = 0,
+ .has_int_a_b = 1,
+ .reg_time_base = PCF2131_REG_TIME_BASE,
+ .regs_alarm_base = PCF2131_REG_ALARM_BASE,
+ .reg_wd_ctl = PCF2131_REG_WD_CTL,
+ .reg_wd_val = PCF2131_REG_WD_VAL,
+ .reg_clkout = PCF2131_REG_CLKOUT,
+ .wdd_clock_hz_x1000 = PCF2131_WD_CLOCK_HZ_X1000,
+ .wdd_min_hw_heartbeat_ms = PCF2131_WD_MIN_HW_HEARTBEAT_MS,
+ .ts_count = 4,
+ .ts[0] = {
+ .reg_base = PCF2131_REG_TS1_BASE,
+ .gnd_detect_reg = PCF2131_REG_CTRL4,
+ .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF1,
+ .inter_detect_bit = 0,
+ .ie_reg = PCF2131_REG_CTRL5,
+ .ie_bit = PCF2131_BIT_CTRL5_TSIE1,
+ },
+ .ts[1] = {
+ .reg_base = PCF2131_REG_TS2_BASE,
+ .gnd_detect_reg = PCF2131_REG_CTRL4,
+ .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF2,
+ .inter_detect_bit = 0,
+ .ie_reg = PCF2131_REG_CTRL5,
+ .ie_bit = PCF2131_BIT_CTRL5_TSIE2,
+ },
+ .ts[2] = {
+ .reg_base = PCF2131_REG_TS3_BASE,
+ .gnd_detect_reg = PCF2131_REG_CTRL4,
+ .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF3,
+ .inter_detect_bit = 0,
+ .ie_reg = PCF2131_REG_CTRL5,
+ .ie_bit = PCF2131_BIT_CTRL5_TSIE3,
+ },
+ .ts[3] = {
+ .reg_base = PCF2131_REG_TS4_BASE,
+ .gnd_detect_reg = PCF2131_REG_CTRL4,
+ .gnd_detect_bit = PCF2131_BIT_CTRL4_TSF4,
+ .inter_detect_bit = 0,
+ .ie_reg = PCF2131_REG_CTRL5,
+ .ie_bit = PCF2131_BIT_CTRL5_TSIE4,
+ },
+ .attribute_group = {
+ .attrs = pcf2131_attrs,
+ },
+ },
+};
+
+/*
+ * Enable timestamp function and corresponding interrupt(s).
+ */
+static int pcf2127_enable_ts(struct device *dev, int ts_id)
+{
+ struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+ int ret;
+
+ if (ts_id >= pcf2127->cfg->ts_count) {
+ dev_err(dev, "%s: invalid tamper detection ID (%d)\n",
+ __func__, ts_id);
+ return -EINVAL;
+ }
+
+ /* Enable timestamp function. */
+ ret = regmap_update_bits(pcf2127->regmap,
+ pcf2127->cfg->ts[ts_id].reg_base,
+ PCF2127_BIT_TS_CTRL_TSOFF |
+ PCF2127_BIT_TS_CTRL_TSM,
+ PCF2127_BIT_TS_CTRL_TSM);
+ if (ret) {
+ dev_err(dev, "%s: tamper detection config (ts%d_ctrl) failed\n",
+ __func__, ts_id);
+ return ret;
+ }
+
+ /*
+ * Enable interrupt generation when TSF timestamp flag is set.
+ * Interrupt signals are open-drain outputs and can be left floating if
+ * unused.
+ */
+ ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->ts[ts_id].ie_reg,
+ pcf2127->cfg->ts[ts_id].ie_bit,
+ pcf2127->cfg->ts[ts_id].ie_bit);
+ if (ret) {
+ dev_err(dev, "%s: tamper detection TSIE%d config failed\n",
+ __func__, ts_id);
+ return ret;
+ }
+
+ return ret;
+}
+
+/* Route all interrupt sources to INT A pin. */
+static int pcf2127_configure_interrupt_pins(struct device *dev)
+{
+ struct pcf2127 *pcf2127 = dev_get_drvdata(dev);
+ int ret;
+
+ /* Mask bits need to be cleared to enable corresponding
+ * interrupt source.
+ */
+ ret = regmap_write(pcf2127->regmap,
+ PCF2131_REG_INT_A_MASK1, 0);
+ if (ret)
+ return ret;
+
+ ret = regmap_write(pcf2127->regmap,
+ PCF2131_REG_INT_A_MASK2, 0);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
static int pcf2127_probe(struct device *dev, struct regmap *regmap,
- int alarm_irq, const char *name, bool is_pcf2127)
+ int alarm_irq, const struct pcf21xx_config *config)
{
struct pcf2127 *pcf2127;
int ret = 0;
return -ENOMEM;
pcf2127->regmap = regmap;
+ pcf2127->cfg = config;
dev_set_drvdata(dev, pcf2127);
pcf2127->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
pcf2127->rtc->range_max = RTC_TIMESTAMP_END_2099;
pcf2127->rtc->set_start_time = true; /* Sets actual start to 1970 */
- set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
- clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+
+ /*
+ * PCF2127/29 do not work correctly when setting alarms at 1s intervals.
+ * PCF2131 is ok.
+ */
+ if (pcf2127->cfg->type == PCF2127 || pcf2127->cfg->type == PCF2129) {
+ set_bit(RTC_FEATURE_ALARM_RES_2S, pcf2127->rtc->features);
+ clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, pcf2127->rtc->features);
+ }
+
clear_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
if (alarm_irq > 0) {
set_bit(RTC_FEATURE_ALARM, pcf2127->rtc->features);
}
- if (is_pcf2127) {
+ if (pcf2127->cfg->has_int_a_b) {
+ /* Configure int A/B pins, independently of alarm_irq. */
+ ret = pcf2127_configure_interrupt_pins(dev);
+ if (ret) {
+ dev_err(dev, "failed to configure interrupt pins\n");
+ return ret;
+ }
+ }
+
+ if (pcf2127->cfg->has_nvmem) {
struct nvmem_config nvmem_cfg = {
.priv = pcf2127,
.reg_read = pcf2127_nvmem_read,
* The "Power-On Reset Override" facility prevents the RTC to do a reset
* after power on. For normal operation the PORO must be disabled.
*/
- regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
+ ret = regmap_clear_bits(pcf2127->regmap, PCF2127_REG_CTRL1,
PCF2127_BIT_CTRL1_POR_OVRD);
+ if (ret < 0)
+ return ret;
- ret = regmap_read(pcf2127->regmap, PCF2127_REG_CLKOUT, &val);
+ ret = regmap_read(pcf2127->regmap, pcf2127->cfg->reg_clkout, &val);
if (ret < 0)
return ret;
if (!(val & PCF2127_BIT_CLKOUT_OTPR)) {
- ret = regmap_set_bits(pcf2127->regmap, PCF2127_REG_CLKOUT,
+ ret = regmap_set_bits(pcf2127->regmap, pcf2127->cfg->reg_clkout,
PCF2127_BIT_CLKOUT_OTPR);
if (ret < 0)
return ret;
/*
* Watchdog timer enabled and reset pin /RST activated when timed out.
- * Select 1Hz clock source for watchdog timer.
+ * Select 1Hz clock source for watchdog timer (1/4Hz for PCF2131).
* Note: Countdown timer disabled and not available.
- * For pca2129, pcf2129, only bit[7] is for Symbol WD_CD
+ * For pca2129, pcf2129 and pcf2131, only bit[7] is for Symbol WD_CD
* of register watchdg_tim_ctl. The bit[6] is labeled
* as T. Bits labeled as T must always be written with
* logic 0.
*/
- ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_WD_CTL,
+ ret = regmap_update_bits(pcf2127->regmap, pcf2127->cfg->reg_wd_ctl,
PCF2127_BIT_WD_CTL_CD1 |
PCF2127_BIT_WD_CTL_CD0 |
PCF2127_BIT_WD_CTL_TF1 |
PCF2127_BIT_WD_CTL_TF0,
PCF2127_BIT_WD_CTL_CD1 |
- (is_pcf2127 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
+ (pcf2127->cfg->has_bit_wd_ctl_cd0 ? PCF2127_BIT_WD_CTL_CD0 : 0) |
PCF2127_BIT_WD_CTL_TF1);
if (ret) {
dev_err(dev, "%s: watchdog config (wd_ctl) failed\n", __func__);
}
/*
- * Enable timestamp function and store timestamp of first trigger
- * event until TSF1 and TSF2 interrupt flags are cleared.
- */
- ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_TS_CTRL,
- PCF2127_BIT_TS_CTRL_TSOFF |
- PCF2127_BIT_TS_CTRL_TSM,
- PCF2127_BIT_TS_CTRL_TSM);
- if (ret) {
- dev_err(dev, "%s: tamper detection config (ts_ctrl) failed\n",
- __func__);
- return ret;
- }
-
- /*
- * Enable interrupt generation when TSF1 or TSF2 timestamp flags
- * are set. Interrupt signal is an open-drain output and can be
- * left floating if unused.
+ * Enable timestamp functions 1 to 4.
*/
- ret = regmap_update_bits(pcf2127->regmap, PCF2127_REG_CTRL2,
- PCF2127_BIT_CTRL2_TSIE,
- PCF2127_BIT_CTRL2_TSIE);
- if (ret) {
- dev_err(dev, "%s: tamper detection config (ctrl2) failed\n",
- __func__);
- return ret;
+ for (int i = 0; i < pcf2127->cfg->ts_count; i++) {
+ ret = pcf2127_enable_ts(dev, i);
+ if (ret)
+ return ret;
}
- ret = rtc_add_group(pcf2127->rtc, &pcf2127_attr_group);
+ ret = rtc_add_group(pcf2127->rtc, &pcf2127->cfg->attribute_group);
if (ret) {
dev_err(dev, "%s: tamper sysfs registering failed\n",
__func__);
#ifdef CONFIG_OF
static const struct of_device_id pcf2127_of_match[] = {
- { .compatible = "nxp,pcf2127" },
- { .compatible = "nxp,pcf2129" },
- { .compatible = "nxp,pca2129" },
+ { .compatible = "nxp,pcf2127", .data = &pcf21xx_cfg[PCF2127] },
+ { .compatible = "nxp,pcf2129", .data = &pcf21xx_cfg[PCF2129] },
+ { .compatible = "nxp,pca2129", .data = &pcf21xx_cfg[PCF2129] },
+ { .compatible = "nxp,pcf2131", .data = &pcf21xx_cfg[PCF2131] },
{}
};
MODULE_DEVICE_TABLE(of, pcf2127_of_match);
static struct i2c_driver pcf2127_i2c_driver;
static const struct i2c_device_id pcf2127_i2c_id[] = {
- { "pcf2127", 1 },
- { "pcf2129", 0 },
- { "pca2129", 0 },
+ { "pcf2127", PCF2127 },
+ { "pcf2129", PCF2129 },
+ { "pca2129", PCF2129 },
+ { "pcf2131", PCF2131 },
{ }
};
MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id);
static int pcf2127_i2c_probe(struct i2c_client *client)
{
- const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client);
struct regmap *regmap;
- static const struct regmap_config config = {
+ static struct regmap_config config = {
.reg_bits = 8,
.val_bits = 8,
- .max_register = 0x1d,
};
+ const struct pcf21xx_config *variant;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
return -ENODEV;
+ if (client->dev.of_node) {
+ variant = of_device_get_match_data(&client->dev);
+ if (!variant)
+ return -ENODEV;
+ } else {
+ enum pcf21xx_type type =
+ i2c_match_id(pcf2127_i2c_id, client)->driver_data;
+
+ if (type >= PCF21XX_LAST_ID)
+ return -ENODEV;
+ variant = &pcf21xx_cfg[type];
+ }
+
+ config.max_register = variant->max_register,
+
regmap = devm_regmap_init(&client->dev, &pcf2127_i2c_regmap,
&client->dev, &config);
if (IS_ERR(regmap)) {
return PTR_ERR(regmap);
}
- return pcf2127_probe(&client->dev, regmap, client->irq,
- pcf2127_i2c_driver.driver.name, id->driver_data);
+ return pcf2127_probe(&client->dev, regmap, client->irq, variant);
}
static struct i2c_driver pcf2127_i2c_driver = {
#if IS_ENABLED(CONFIG_SPI_MASTER)
static struct spi_driver pcf2127_spi_driver;
+static const struct spi_device_id pcf2127_spi_id[];
static int pcf2127_spi_probe(struct spi_device *spi)
{
- static const struct regmap_config config = {
+ static struct regmap_config config = {
.reg_bits = 8,
.val_bits = 8,
.read_flag_mask = 0xa0,
.write_flag_mask = 0x20,
- .max_register = 0x1d,
};
struct regmap *regmap;
+ const struct pcf21xx_config *variant;
+
+ if (spi->dev.of_node) {
+ variant = of_device_get_match_data(&spi->dev);
+ if (!variant)
+ return -ENODEV;
+ } else {
+ enum pcf21xx_type type = spi_get_device_id(spi)->driver_data;
+
+ if (type >= PCF21XX_LAST_ID)
+ return -ENODEV;
+ variant = &pcf21xx_cfg[type];
+ }
+
+ config.max_register = variant->max_register,
regmap = devm_regmap_init_spi(spi, &config);
if (IS_ERR(regmap)) {
return PTR_ERR(regmap);
}
- return pcf2127_probe(&spi->dev, regmap, spi->irq,
- pcf2127_spi_driver.driver.name,
- spi_get_device_id(spi)->driver_data);
+ return pcf2127_probe(&spi->dev, regmap, spi->irq, variant);
}
static const struct spi_device_id pcf2127_spi_id[] = {
- { "pcf2127", 1 },
- { "pcf2129", 0 },
- { "pca2129", 0 },
+ { "pcf2127", PCF2127 },
+ { "pcf2129", PCF2129 },
+ { "pca2129", PCF2129 },
+ { "pcf2131", PCF2131 },
{ }
};
MODULE_DEVICE_TABLE(spi, pcf2127_spi_id);
module_exit(pcf2127_exit)
MODULE_AUTHOR("Renaud Cerrato <r.cerrato@til-technologies.fr>");
-MODULE_DESCRIPTION("NXP PCF2127/29 RTC driver");
+MODULE_DESCRIPTION("NXP PCF2127/29/31 RTC driver");
MODULE_LICENSE("GPL v2");
#include <linux/bcd.h>
#include <linux/rtc.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/pm_wakeirq.h>
#include <linux/regmap.h>
}
#endif
-enum pcf85063_type {
- PCF85063,
- PCF85063TP,
- PCF85063A,
- RV8263,
- PCF85063_LAST_ID
+static const struct pcf85063_config config_pcf85063 = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
+ },
};
-static struct pcf85063_config pcf85063_cfg[] = {
- [PCF85063] = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
- },
- },
- [PCF85063TP] = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x0a,
- },
- },
- [PCF85063A] = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
+static const struct pcf85063_config config_pcf85063tp = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x0a,
},
- [RV8263] = {
- .regmap = {
- .reg_bits = 8,
- .val_bits = 8,
- .max_register = 0x11,
- },
- .has_alarms = 1,
- .force_cap_7000 = 1,
+};
+
+static const struct pcf85063_config config_pcf85063a = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
},
+ .has_alarms = 1,
};
-static const struct i2c_device_id pcf85063_ids[];
+static const struct pcf85063_config config_rv8263 = {
+ .regmap = {
+ .reg_bits = 8,
+ .val_bits = 8,
+ .max_register = 0x11,
+ },
+ .has_alarms = 1,
+ .force_cap_7000 = 1,
+};
static int pcf85063_probe(struct i2c_client *client)
{
if (!pcf85063)
return -ENOMEM;
- if (client->dev.of_node) {
- config = of_device_get_match_data(&client->dev);
- if (!config)
- return -ENODEV;
- } else {
- enum pcf85063_type type =
- i2c_match_id(pcf85063_ids, client)->driver_data;
- if (type >= PCF85063_LAST_ID)
- return -ENODEV;
- config = &pcf85063_cfg[type];
- }
+ config = i2c_get_match_data(client);
+ if (!config)
+ return -ENODEV;
pcf85063->regmap = devm_regmap_init_i2c(client, &config->regmap);
if (IS_ERR(pcf85063->regmap))
}
static const struct i2c_device_id pcf85063_ids[] = {
- { "pca85073a", PCF85063A },
- { "pcf85063", PCF85063 },
- { "pcf85063tp", PCF85063TP },
- { "pcf85063a", PCF85063A },
- { "rv8263", RV8263 },
+ { "pca85073a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+ { "pcf85063", .driver_data = (kernel_ulong_t)&config_pcf85063 },
+ { "pcf85063tp", .driver_data = (kernel_ulong_t)&config_pcf85063tp },
+ { "pcf85063a", .driver_data = (kernel_ulong_t)&config_pcf85063a },
+ { "rv8263", .driver_data = (kernel_ulong_t)&config_rv8263 },
{}
};
MODULE_DEVICE_TABLE(i2c, pcf85063_ids);
#ifdef CONFIG_OF
static const struct of_device_id pcf85063_of_match[] = {
- { .compatible = "nxp,pca85073a", .data = &pcf85063_cfg[PCF85063A] },
- { .compatible = "nxp,pcf85063", .data = &pcf85063_cfg[PCF85063] },
- { .compatible = "nxp,pcf85063tp", .data = &pcf85063_cfg[PCF85063TP] },
- { .compatible = "nxp,pcf85063a", .data = &pcf85063_cfg[PCF85063A] },
- { .compatible = "microcrystal,rv8263", .data = &pcf85063_cfg[RV8263] },
+ { .compatible = "nxp,pca85073a", .data = &config_pcf85063a },
+ { .compatible = "nxp,pcf85063", .data = &config_pcf85063 },
+ { .compatible = "nxp,pcf85063tp", .data = &config_pcf85063tp },
+ { .compatible = "nxp,pcf85063a", .data = &config_pcf85063a },
+ { .compatible = "microcrystal,rv8263", .data = &config_rv8263 },
{}
};
MODULE_DEVICE_TABLE(of, pcf85063_of_match);
#include <linux/errno.h>
#include <linux/bcd.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/regmap.h>
/*
},
};
int ret, i, err;
+ bool wakeup_source;
if (data)
config = data;
pcf85363->rtc->ops = &rtc_ops;
pcf85363->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
pcf85363->rtc->range_max = RTC_TIMESTAMP_END_2099;
- clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+
+ wakeup_source = device_property_read_bool(&client->dev,
+ "wakeup-source");
+ if (client->irq > 0 || wakeup_source) {
+ regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
+ regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
+ PIN_IO_INTA_OUT, PIN_IO_INTAPM);
+ }
if (client->irq > 0) {
unsigned long irqflags = IRQF_TRIGGER_LOW;
if (dev_fwnode(&client->dev))
irqflags = 0;
-
- regmap_write(pcf85363->regmap, CTRL_FLAGS, 0);
- regmap_update_bits(pcf85363->regmap, CTRL_PIN_IO,
- PIN_IO_INTA_OUT, PIN_IO_INTAPM);
ret = devm_request_threaded_irq(&client->dev, client->irq,
NULL, pcf85363_rtc_handle_irq,
irqflags | IRQF_ONESHOT,
"pcf85363", client);
- if (ret)
- dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
- else
- set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+ if (ret) {
+ dev_warn(&client->dev,
+ "unable to request IRQ, alarms disabled\n");
+ client->irq = 0;
+ }
+ }
+
+ if (client->irq > 0 || wakeup_source) {
+ device_init_wakeup(&client->dev, true);
+ set_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
+ } else {
+ clear_bit(RTC_FEATURE_ALARM, pcf85363->rtc->features);
}
ret = devm_rtc_register_device(pcf85363->rtc);
#include <linux/io.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include "rtc-sa1100.h"
#include <linux/bcd.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
/*
* Ricoh has a family of I2C based RTCs, which differ only slightly from
rs5c372->client = client;
i2c_set_clientdata(client, rs5c372);
if (client->dev.of_node) {
- rs5c372->type = (enum rtc_type)
- of_device_get_match_data(&client->dev);
+ rs5c372->type = (uintptr_t)of_device_get_match_data(&client->dev);
} else {
const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client);
rs5c372->type = id->driver_data;
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
.max_register = 0x37,
};
+static u8 rv3028_set_trickle_charger(struct rv3028_data *rv3028,
+ struct i2c_client *client)
+{
+ int ret, val_old, val;
+ u32 ohms, chargeable;
+
+ ret = regmap_read(rv3028->regmap, RV3028_BACKUP, &val_old);
+ if (ret < 0)
+ return ret;
+
+ /* mask out only trickle charger bits */
+ val_old = val_old & (RV3028_BACKUP_TCE | RV3028_BACKUP_TCR_MASK);
+ val = val_old;
+
+ /* setup trickle charger */
+ if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
+ &ohms)) {
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
+ if (ohms == rv3028_trickle_resistors[i])
+ break;
+
+ if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
+ /* enable trickle charger and its resistor */
+ val = RV3028_BACKUP_TCE | i;
+ } else {
+ dev_warn(&client->dev, "invalid trickle resistor value\n");
+ }
+ }
+
+ if (!device_property_read_u32(&client->dev, "aux-voltage-chargeable",
+ &chargeable)) {
+ switch (chargeable) {
+ case 0:
+ val &= ~RV3028_BACKUP_TCE;
+ break;
+ case 1:
+ val |= RV3028_BACKUP_TCE;
+ break;
+ default:
+ dev_warn(&client->dev,
+ "unsupported aux-voltage-chargeable value\n");
+ break;
+ }
+ }
+
+ /* only update EEPROM if changes are necessary */
+ if (val_old != val) {
+ ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
+ RV3028_BACKUP_TCR_MASK, val);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
static int rv3028_probe(struct i2c_client *client)
{
struct rv3028_data *rv3028;
int ret, status;
- u32 ohms;
struct nvmem_config nvmem_cfg = {
.name = "rv3028_nvram",
.word_size = 1,
if (ret)
return ret;
- /* setup trickle charger */
- if (!device_property_read_u32(&client->dev, "trickle-resistor-ohms",
- &ohms)) {
- int i;
-
- for (i = 0; i < ARRAY_SIZE(rv3028_trickle_resistors); i++)
- if (ohms == rv3028_trickle_resistors[i])
- break;
-
- if (i < ARRAY_SIZE(rv3028_trickle_resistors)) {
- ret = rv3028_update_cfg(rv3028, RV3028_BACKUP, RV3028_BACKUP_TCE |
- RV3028_BACKUP_TCR_MASK, RV3028_BACKUP_TCE | i);
- if (ret)
- return ret;
- } else {
- dev_warn(&client->dev, "invalid trickle resistor value\n");
- }
- }
+ ret = rv3028_set_trickle_charger(rv3028, client);
+ if (ret)
+ return ret;
ret = rtc_add_group(rv3028->rtc, &rv3028_attr_group);
if (ret)
#include <linux/kernel.h>
#include <linux/log2.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/rtc.h>
#define RV8803_I2C_TRY_COUNT 4
mutex_init(&rv8803->flags_lock);
rv8803->client = client;
if (client->dev.of_node) {
- rv8803->type = (enum rv8803_type)
- of_device_get_match_data(&client->dev);
+ rv8803->type = (uintptr_t)of_device_get_match_data(&client->dev);
} else {
const struct i2c_device_id *id = i2c_match_id(rv8803_id, client);
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/spi/spi.h>
#include <linux/i2c.h>
#include <linux/i2c.h>
#include <linux/bcd.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
#include <linux/log2.h>
#include <linux/init.h>
#include <linux/iopoll.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/rtc.h>
return ret;
/* We cannot set alarms more than one week ahead */
- farest = rtc_tm_to_time64(&tm_now) + (7 * 86400);
+ farest = rtc_tm_to_time64(&tm_now) + rtc->rtcdev->alarm_offset_max;
alarm = rtc_tm_to_time64(tm);
if (time_after(alarm, farest))
return -ERANGE;
rtc->rtcdev->range_min = RTC_TIMESTAMP_BEGIN_2000;
rtc->rtcdev->range_max = RTC_TIMESTAMP_END_2099;
+ rtc->rtcdev->alarm_offset_max = 7 * 86400;
rtc->rtcdev->ops = &rzn1_rtc_ops;
set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features);
clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features);
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/bcd.h>
#include <linux/clk.h>
+#include <linux/errno.h>
#include <linux/iopoll.h>
#include <linux/ioport.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/pm_wakeirq.h>
#include <linux/regmap.h>
#include <linux/rtc.h>
/* Max STM32 RTC register offset is 0x3FC */
#define UNDEF_REG 0xFFFF
+/* STM32 RTC driver time helpers */
+#define SEC_PER_DAY (24 * 60 * 60)
+
struct stm32_rtc;
struct stm32_rtc_registers {
void (*clear_events)(struct stm32_rtc *rtc, unsigned int flags);
bool has_pclk;
bool need_dbp;
+ bool need_accuracy;
};
struct stm32_rtc {
* slowest rtc_ck frequency may be 32kHz and highest should be
* 1MHz, we poll every 10 us with a timeout of 100ms.
*/
- return readl_relaxed_poll_timeout_atomic(
- rtc->base + regs->isr,
- isr, (isr & STM32_RTC_ISR_INITF),
- 10, 100000);
+ return readl_relaxed_poll_timeout_atomic(rtc->base + regs->isr, isr,
+ (isr & STM32_RTC_ISR_INITF),
+ 10, 100000);
}
return 0;
return 0;
}
-static int stm32_rtc_valid_alrm(struct stm32_rtc *rtc, struct rtc_time *tm)
+static int stm32_rtc_valid_alrm(struct device *dev, struct rtc_time *tm)
{
- const struct stm32_rtc_registers *regs = &rtc->data->regs;
- int cur_day, cur_mon, cur_year, cur_hour, cur_min, cur_sec;
- unsigned int dr = readl_relaxed(rtc->base + regs->dr);
- unsigned int tr = readl_relaxed(rtc->base + regs->tr);
-
- cur_day = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
- cur_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
- cur_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
- cur_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
- cur_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
- cur_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
+ static struct rtc_time now;
+ time64_t max_alarm_time64;
+ int max_day_forward;
+ int next_month;
+ int next_year;
/*
* Assuming current date is M-D-Y H:M:S.
* RTC alarm can't be set on a specific month and year.
* So the valid alarm range is:
* M-D-Y H:M:S < alarm <= (M+1)-D-Y H:M:S
- * with a specific case for December...
*/
- if ((((tm->tm_year > cur_year) &&
- (tm->tm_mon == 0x1) && (cur_mon == 0x12)) ||
- ((tm->tm_year == cur_year) &&
- (tm->tm_mon <= cur_mon + 1))) &&
- ((tm->tm_mday > cur_day) ||
- ((tm->tm_mday == cur_day) &&
- ((tm->tm_hour > cur_hour) ||
- ((tm->tm_hour == cur_hour) && (tm->tm_min > cur_min)) ||
- ((tm->tm_hour == cur_hour) && (tm->tm_min == cur_min) &&
- (tm->tm_sec >= cur_sec))))))
- return 0;
+ stm32_rtc_read_time(dev, &now);
+
+ /*
+ * Find the next month and the year of the next month.
+ * Note: tm_mon and next_month are from 0 to 11
+ */
+ next_month = now.tm_mon + 1;
+ if (next_month == 12) {
+ next_month = 0;
+ next_year = now.tm_year + 1;
+ } else {
+ next_year = now.tm_year;
+ }
- return -EINVAL;
+ /* Find the maximum limit of alarm in days. */
+ max_day_forward = rtc_month_days(now.tm_mon, now.tm_year)
+ - now.tm_mday
+ + min(rtc_month_days(next_month, next_year), now.tm_mday);
+
+ /* Convert to timestamp and compare the alarm time and its upper limit */
+ max_alarm_time64 = rtc_tm_to_time64(&now) + max_day_forward * SEC_PER_DAY;
+ return rtc_tm_to_time64(tm) <= max_alarm_time64 ? 0 : -EINVAL;
}
static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
unsigned int cr, isr, alrmar;
int ret = 0;
- tm2bcd(tm);
-
/*
* RTC alarm can't be set on a specific date, unless this date is
* up to the same day of month next month.
*/
- if (stm32_rtc_valid_alrm(rtc, tm) < 0) {
+ if (stm32_rtc_valid_alrm(dev, tm) < 0) {
dev_err(dev, "Alarm can be set only on upcoming month.\n");
return -EINVAL;
}
+ tm2bcd(tm);
+
alrmar = 0;
/* tm_year and tm_mon are not used because not supported by RTC */
alrmar |= (tm->tm_mday << STM32_RTC_ALRMXR_DATE_SHIFT) &
static const struct stm32_rtc_data stm32_rtc_data = {
.has_pclk = false,
.need_dbp = true,
+ .need_accuracy = false,
.regs = {
.tr = 0x00,
.dr = 0x04,
static const struct stm32_rtc_data stm32h7_rtc_data = {
.has_pclk = true,
.need_dbp = true,
+ .need_accuracy = false,
.regs = {
.tr = 0x00,
.dr = 0x04,
static const struct stm32_rtc_data stm32mp1_data = {
.has_pclk = true,
.need_dbp = false,
+ .need_accuracy = true,
.regs = {
.tr = 0x00,
.dr = 0x04,
const struct stm32_rtc_registers *regs = &rtc->data->regs;
unsigned int prer, pred_a, pred_s, pred_a_max, pred_s_max, cr;
unsigned int rate;
- int ret = 0;
+ int ret;
rate = clk_get_rate(rtc->rtc_ck);
pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
pred_s_max = STM32_RTC_PRER_PRED_S >> STM32_RTC_PRER_PRED_S_SHIFT;
- for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
- pred_s = (rate / (pred_a + 1)) - 1;
+ if (rate > (pred_a_max + 1) * (pred_s_max + 1)) {
+ dev_err(&pdev->dev, "rtc_ck rate is too high: %dHz\n", rate);
+ return -EINVAL;
+ }
+
+ if (rtc->data->need_accuracy) {
+ for (pred_a = 0; pred_a <= pred_a_max; pred_a++) {
+ pred_s = (rate / (pred_a + 1)) - 1;
+
+ if (pred_s <= pred_s_max && ((pred_s + 1) * (pred_a + 1)) == rate)
+ break;
+ }
+ } else {
+ for (pred_a = pred_a_max; pred_a + 1 > 0; pred_a--) {
+ pred_s = (rate / (pred_a + 1)) - 1;
- if (((pred_s + 1) * (pred_a + 1)) == rate)
- break;
+ if (((pred_s + 1) * (pred_a + 1)) == rate)
+ break;
+ }
}
/*
* Can't find a 1Hz, so give priority to RTC power consumption
* by choosing the higher possible value for prediv_a
*/
- if ((pred_s > pred_s_max) || (pred_a > pred_a_max)) {
+ if (pred_s > pred_s_max || pred_a > pred_a_max) {
pred_a = pred_a_max;
pred_s = (rate / (pred_a + 1)) - 1;
"fast" : "slow");
}
+ cr = readl_relaxed(rtc->base + regs->cr);
+
+ prer = readl_relaxed(rtc->base + regs->prer);
+ prer &= STM32_RTC_PRER_PRED_S | STM32_RTC_PRER_PRED_A;
+
+ pred_s = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) &
+ STM32_RTC_PRER_PRED_S;
+ pred_a = (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) &
+ STM32_RTC_PRER_PRED_A;
+
+ /* quit if there is nothing to initialize */
+ if ((cr & STM32_RTC_CR_FMT) == 0 && prer == (pred_s | pred_a))
+ return 0;
+
stm32_rtc_wpr_unlock(rtc);
ret = stm32_rtc_enter_init_mode(rtc);
goto end;
}
- prer = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) & STM32_RTC_PRER_PRED_S;
- writel_relaxed(prer, rtc->base + regs->prer);
- prer |= (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) & STM32_RTC_PRER_PRED_A;
- writel_relaxed(prer, rtc->base + regs->prer);
+ writel_relaxed(pred_s, rtc->base + regs->prer);
+ writel_relaxed(pred_a | pred_s, rtc->base + regs->prer);
/* Force 24h time format */
- cr = readl_relaxed(rtc->base + regs->cr);
cr &= ~STM32_RTC_CR_FMT;
writel_relaxed(cr, rtc->base + regs->cr);
rtc->rtc_ck = devm_clk_get(&pdev->dev, NULL);
} else {
rtc->pclk = devm_clk_get(&pdev->dev, "pclk");
- if (IS_ERR(rtc->pclk)) {
- dev_err(&pdev->dev, "no pclk clock");
- return PTR_ERR(rtc->pclk);
- }
+ if (IS_ERR(rtc->pclk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(rtc->pclk), "no pclk clock");
+
rtc->rtc_ck = devm_clk_get(&pdev->dev, "rtc_ck");
}
- if (IS_ERR(rtc->rtc_ck)) {
- dev_err(&pdev->dev, "no rtc_ck clock");
- return PTR_ERR(rtc->rtc_ck);
- }
+ if (IS_ERR(rtc->rtc_ck))
+ return dev_err_probe(&pdev->dev, PTR_ERR(rtc->rtc_ck), "no rtc_ck clock");
if (rtc->data->has_pclk) {
ret = clk_prepare_enable(rtc->pclk);
device_init_wakeup(&pdev->dev, false);
}
-#ifdef CONFIG_PM_SLEEP
static int stm32_rtc_suspend(struct device *dev)
{
struct stm32_rtc *rtc = dev_get_drvdata(dev);
return ret;
}
-#endif
-static SIMPLE_DEV_PM_OPS(stm32_rtc_pm_ops,
- stm32_rtc_suspend, stm32_rtc_resume);
+static const struct dev_pm_ops stm32_rtc_pm_ops = {
+ NOIRQ_SYSTEM_SLEEP_PM_OPS(stm32_rtc_suspend, stm32_rtc_resume)
+};
static struct platform_driver stm32_rtc_driver = {
.probe = stm32_rtc_probe,
#include <linux/delay.h>
#include <linux/rtc.h>
#include <linux/slab.h>
-#include <linux/of_device.h>
#include <linux/of.h>
#include <linux/stmp_device.h>
#include <linux/stmp3xxx_rtc_wdt.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
#include <linux/slab.h>
if (ret)
return ret;
- dev_info(&pdev->dev, "RTC enabled\n");
-
return 0;
}
sp_rtc->irq = platform_get_irq(plat_dev, 0);
if (sp_rtc->irq < 0)
- return dev_err_probe(&plat_dev->dev, sp_rtc->irq, "platform_get_irq failed\n");
+ return sp_rtc->irq;
ret = devm_request_irq(&plat_dev->dev, sp_rtc->irq, sp_rtc_irq_handler,
IRQF_TRIGGER_RISING, "rtc irq", plat_dev);
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/rtc.h>
#include <linux/types.h>
#include <linux/delay.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/sys_soc.h>
#include <linux/property.h>
rtc->rtc->ops = &tps6586x_rtc_ops;
rtc->rtc->range_max = (1ULL << 30) - 1; /* 30-bit seconds */
+ rtc->rtc->alarm_offset_max = ALM1_VALID_RANGE_IN_SEC;
rtc->rtc->start_secs = mktime64(2009, 1, 1, 0, 0, 0);
rtc->rtc->set_start_time = true;
platform_set_drvdata(pdev, tps_rtc);
irq = platform_get_irq(pdev, 0);
- if (irq <= 0) {
- dev_warn(&pdev->dev, "Wake up is not possible as irq = %d\n",
- irq);
- return -ENXIO;
- }
+ if (irq < 0)
+ return irq;
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
tps65910_rtc_interrupt, IRQF_TRIGGER_LOW,
.alarm_irq_enable = twl_rtc_alarm_irq_enable,
};
+static int twl_nvram_read(void *priv, unsigned int offset, void *val,
+ size_t bytes)
+{
+ return twl_i2c_read((long)priv, val, offset, bytes);
+}
+
+static int twl_nvram_write(void *priv, unsigned int offset, void *val,
+ size_t bytes)
+{
+ return twl_i2c_write((long)priv, val, offset, bytes);
+}
+
/*----------------------------------------------------------------------*/
static int twl_rtc_probe(struct platform_device *pdev)
{
struct twl_rtc *twl_rtc;
+ struct nvmem_config nvmem_cfg;
struct device_node *np = pdev->dev.of_node;
int ret = -EINVAL;
int irq = platform_get_irq(pdev, 0);
REG_INT_MSK_STS_A);
}
- dev_info(&pdev->dev, "Enabling TWL-RTC\n");
ret = twl_rtc_write_u8(twl_rtc, BIT_RTC_CTRL_REG_STOP_RTC_M,
REG_RTC_CTRL_REG);
if (ret < 0)
twl_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&twl_rtc_ops, THIS_MODULE);
- if (IS_ERR(twl_rtc->rtc)) {
- dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
- PTR_ERR(twl_rtc->rtc));
+ if (IS_ERR(twl_rtc->rtc))
return PTR_ERR(twl_rtc->rtc);
- }
ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
twl_rtc_interrupt,
return ret;
}
+ memset(&nvmem_cfg, 0, sizeof(nvmem_cfg));
+ nvmem_cfg.name = "twl-secured-";
+ nvmem_cfg.type = NVMEM_TYPE_BATTERY_BACKED;
+ nvmem_cfg.reg_read = twl_nvram_read,
+ nvmem_cfg.reg_write = twl_nvram_write,
+ nvmem_cfg.word_size = 1;
+ nvmem_cfg.stride = 1;
+ if (twl_class_is_4030()) {
+ /* 20 bytes SECURED_REG area */
+ nvmem_cfg.size = 20;
+ nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+ devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+ /* 8 bytes BACKUP area */
+ nvmem_cfg.name = "twl-backup-";
+ nvmem_cfg.size = 8;
+ nvmem_cfg.priv = (void *)TWL4030_MODULE_BACKUP;
+ devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+ } else {
+ /* 8 bytes SECURED_REG area */
+ nvmem_cfg.size = 8;
+ nvmem_cfg.priv = (void *)TWL_MODULE_SECURED_REG;
+ devm_rtc_nvmem_register(twl_rtc->rtc, &nvmem_cfg);
+ }
+
return 0;
}
/* enable the RTC if it's not already enabled */
power5 = wm8350_reg_read(wm8350, WM8350_POWER_MGMT_5);
if (!(power5 & WM8350_RTC_TICK_ENA)) {
- dev_info(wm8350->dev, "Starting RTC\n");
-
wm8350_reg_unlock(wm8350);
ret = wm8350_set_bits(wm8350, WM8350_POWER_MGMT_5,
wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350",
&wm8350_rtc_ops, THIS_MODULE);
- if (IS_ERR(wm_rtc->rtc)) {
- ret = PTR_ERR(wm_rtc->rtc);
- dev_err(&pdev->dev, "failed to register RTC: %d\n", ret);
- return ret;
- }
+ if (IS_ERR(wm_rtc->rtc))
+ return PTR_ERR(wm_rtc->rtc);
ret = wm8350_register_irq(wm8350, WM8350_IRQ_RTC_SEC,
wm8350_rtc_update_handler, 0,
segment_unload(entry->segment_name);
}
list_del(&dev_info->lh);
+ up_write(&dcssblk_devices_sem);
dax_remove_host(dev_info->gd);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd);
put_disk(dev_info->gd);
- up_write(&dcssblk_devices_sem);
if (device_remove_file_self(dev, attr)) {
device_unregister(dev);
}
list_del(&dev_info->lh);
+ /* unload all related segments */
+ list_for_each_entry(entry, &dev_info->seg_list, lh)
+ segment_unload(entry->segment_name);
+ up_write(&dcssblk_devices_sem);
+
dax_remove_host(dev_info->gd);
kill_dax(dev_info->dax_dev);
put_dax(dev_info->dax_dev);
del_gendisk(dev_info->gd);
put_disk(dev_info->gd);
- /* unload all related segments */
- list_for_each_entry(entry, &dev_info->seg_list, lh)
- segment_unload(entry->segment_name);
-
- up_write(&dcssblk_devices_sem);
-
device_unregister(&dev_info->dev);
put_device(&dev_info->dev);
static inline u8 mon_mca_type(struct mon_msg *monmsg, u8 index)
{
- return *((u8 *) mon_mca_start(monmsg) + monmsg->mca_offset + index);
+ return *((u8 *)__va(mon_mca_start(monmsg)) + monmsg->mca_offset + index);
}
static inline u32 mon_mca_size(struct mon_msg *monmsg)
static inline u32 mon_rec_start(struct mon_msg *monmsg)
{
- return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 4));
+ return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 4));
}
static inline u32 mon_rec_end(struct mon_msg *monmsg)
{
- return *((u32 *) (mon_mca_start(monmsg) + monmsg->mca_offset + 8));
+ return *((u32 *)(__va(mon_mca_start(monmsg)) + monmsg->mca_offset + 8));
}
static int mon_check_mca(struct mon_msg *monmsg)
mce_start = mon_mca_start(monmsg) + monmsg->mca_offset;
if ((monmsg->pos >= mce_start) && (monmsg->pos < mce_start + 12)) {
count = min(count, (size_t) mce_start + 12 - monmsg->pos);
- ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
- count);
+ ret = copy_to_user(data, __va(monmsg->pos), count);
if (ret)
return -EFAULT;
monmsg->pos += count;
if (monmsg->pos <= mon_rec_end(monmsg)) {
count = min(count, (size_t) mon_rec_end(monmsg) - monmsg->pos
+ 1);
- ret = copy_to_user(data, (void *) (unsigned long) monmsg->pos,
- count);
+ ret = copy_to_user(data, __va(monmsg->pos), count);
if (ret)
return -EFAULT;
monmsg->pos += count;
return -ENOMEM;
airq->flags |= AIRQ_PTR_ALLOCATED;
}
- if (!airq->lsi_mask)
- airq->lsi_mask = 0xff;
snprintf(dbf_txt, sizeof(dbf_txt), "rairq:%p", airq);
CIO_TRACE_EVENT(4, dbf_txt);
isc_register(airq->isc);
head = &airq_lists[tpi_info->isc];
rcu_read_lock();
hlist_for_each_entry_rcu(airq, head, list)
- if ((*airq->lsi_ptr & airq->lsi_mask) != 0)
+ if (*airq->lsi_ptr != 0)
airq->handler(airq, tpi_info);
rcu_read_unlock();
{
dev_t devt;
int i, rc = 0;
- char nodename[ZCDN_MAX_NAME];
struct zcdn_device *zcdndev;
if (mutex_lock_interruptible(&ap_perms_mutex))
zcdndev->device.devt = devt;
zcdndev->device.groups = zcdn_dev_attr_groups;
if (name[0])
- strncpy(nodename, name, sizeof(nodename));
+ rc = dev_set_name(&zcdndev->device, "%s", name);
else
- snprintf(nodename, sizeof(nodename),
- ZCRYPT_NAME "_%d", (int)MINOR(devt));
- nodename[sizeof(nodename) - 1] = '\0';
- if (dev_set_name(&zcdndev->device, nodename)) {
- rc = -EINVAL;
+ rc = dev_set_name(&zcdndev->device, ZCRYPT_NAME "_%d", (int)MINOR(devt));
+ if (rc) {
+ kfree(zcdndev);
goto unlockout;
}
rc = device_register(&zcdndev->device);
info->airq.handler = virtio_airq_handler;
info->summary_indicator_idx = index;
info->airq.lsi_ptr = get_summary_indicator(info);
- info->airq.lsi_mask = 0xff;
info->airq.isc = VIRTIO_AIRQ_ISC;
rc = register_adapter_interrupt(&info->airq);
if (rc) {
* However we don't need to issue a hard reset here for these
* reasons:
* a. When probing the device, libsas/libata already issues a
- * hard reset in sas_probe_sata() -> ata_sas_async_probe().
+ * hard reset in sas_probe_sata() -> ata_port_probe().
* Note that in hisi_sas_debug_I_T_nexus_reset() we take care
* to issue a hard reset by checking the dev status (== INIT).
* b. When resetting the controller, this is simply unnecessary.
.qc_prep = ata_noop_qc_prep,
.qc_issue = sas_ata_qc_issue,
.qc_fill_rtf = sas_ata_qc_fill_rtf,
- .port_start = ata_sas_port_start,
- .port_stop = ata_sas_port_stop,
.set_dmamode = sas_ata_set_dmamode,
.sched_eh = sas_ata_sched_eh,
.end_eh = sas_ata_end_eh,
ap->private_data = found_dev;
ap->cbl = ATA_CBL_SATA;
ap->scsi_host = shost;
- rc = ata_sas_port_init(ap);
- if (rc)
- goto destroy_port;
rc = ata_sas_tport_add(ata_host->dev, ap);
if (rc)
return 0;
destroy_port:
- ata_sas_port_destroy(ap);
+ kfree(ap);
free_host:
ata_host_put(ata_host);
return rc;
if (!dev_is_sata(dev))
continue;
- ata_sas_async_probe(dev->sata_dev.ap);
+ ata_port_probe(dev->sata_dev.ap);
}
mutex_unlock(&port->ha->disco_mutex);
if (dev_is_sata(dev) && dev->sata_dev.ap) {
ata_sas_tport_delete(dev->sata_dev.ap);
- ata_sas_port_destroy(dev->sata_dev.ap);
+ kfree(dev->sata_dev.ap);
ata_host_put(dev->sata_dev.ata_host);
dev->sata_dev.ata_host = NULL;
dev->sata_dev.ap = NULL;
struct reset_control *rstc;
struct completion done;
+ struct completion dma_rx_done;
const u8 *tx_buf;
u8 *rx_buf;
return SUN6I_MAX_XFER_SIZE - 1;
}
+static void sun6i_spi_dma_rx_cb(void *param)
+{
+ struct sun6i_spi *sspi = param;
+
+ complete(&sspi->dma_rx_done);
+}
+
static int sun6i_spi_prepare_dma(struct sun6i_spi *sspi,
struct spi_transfer *tfr)
{
struct dma_slave_config rxconf = {
.direction = DMA_DEV_TO_MEM,
.src_addr = sspi->dma_addr_rx,
- .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ .src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE,
.src_maxburst = 8,
};
DMA_PREP_INTERRUPT);
if (!rxdesc)
return -EINVAL;
+ rxdesc->callback_param = sspi;
+ rxdesc->callback = sun6i_spi_dma_rx_cb;
}
txdesc = NULL;
return -EINVAL;
reinit_completion(&sspi->done);
+ reinit_completion(&sspi->dma_rx_done);
sspi->tx_buf = tfr->tx_buf;
sspi->rx_buf = tfr->rx_buf;
sspi->len = tfr->len;
start = jiffies;
timeout = wait_for_completion_timeout(&sspi->done,
msecs_to_jiffies(tx_time));
+
+ if (!use_dma) {
+ sun6i_spi_drain_fifo(sspi);
+ } else {
+ if (timeout && rx_len) {
+ /*
+ * Even though RX on the peripheral side has finished
+ * RX DMA might still be in flight
+ */
+ timeout = wait_for_completion_timeout(&sspi->dma_rx_done,
+ timeout);
+ if (!timeout)
+ dev_warn(&master->dev, "RX DMA timeout\n");
+ }
+ }
+
end = jiffies;
if (!timeout) {
dev_warn(&master->dev,
/* Transfer complete */
if (status & SUN6I_INT_CTL_TC) {
sun6i_spi_write(sspi, SUN6I_INT_STA_REG, SUN6I_INT_CTL_TC);
- sun6i_spi_drain_fifo(sspi);
complete(&sspi->done);
return IRQ_HANDLED;
}
}
init_completion(&sspi->done);
+ init_completion(&sspi->dma_rx_done);
sspi->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL);
if (IS_ERR(sspi->rstc)) {
{
struct gb_connection *connection;
struct gb_pwm_chip *pwmc;
- struct pwm_chip *pwm;
+ struct pwm_chip *chip;
int ret;
pwmc = kzalloc(sizeof(*pwmc), GFP_KERNEL);
if (ret)
goto exit_connection_disable;
- pwm = &pwmc->chip;
+ chip = &pwmc->chip;
- pwm->dev = &gbphy_dev->dev;
- pwm->ops = &gb_pwm_ops;
- pwm->npwm = pwmc->pwm_max + 1;
+ chip->dev = &gbphy_dev->dev;
+ chip->ops = &gb_pwm_ops;
+ chip->npwm = pwmc->pwm_max + 1;
- ret = pwmchip_add(pwm);
+ ret = pwmchip_add(chip);
if (ret) {
dev_err(&gbphy_dev->dev,
"failed to register PWM: %d\n", ret);
#include <linux/io.h>
#include <linux/module.h>
#include <linux/mutex.h>
+#include <linux/of.h>
#include <linux/of_platform.h>
+#include <linux/platform_device.h>
/*
* USB Control Register
/* Not booted with device tree or no phandle link to the node */
bl->props.power = def_value ? FB_BLANK_UNBLANK
: FB_BLANK_POWERDOWN;
- else if (gpiod_get_direction(gbl->gpiod) == 0 &&
- gpiod_get_value_cansleep(gbl->gpiod) == 0)
+ else if (gpiod_get_value_cansleep(gbl->gpiod) == 0)
bl->props.power = FB_BLANK_POWERDOWN;
else
bl->props.power = FB_BLANK_UNBLANK;
static struct platform_driver led_bl_driver = {
.driver = {
.name = "led-backlight",
- .of_match_table = of_match_ptr(led_bl_of_match),
+ .of_match_table = led_bl_of_match,
},
.probe = led_bl_probe,
.remove_new = led_bl_remove,
struct device *dev;
struct lp855x_platform_data *pdata;
struct pwm_device *pwm;
+ bool needs_pwm_init;
struct regulator *supply; /* regulator for VDD input */
struct regulator *enable; /* regulator for EN/VDDIO input */
};
return ret;
}
-static void lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
+static int lp855x_pwm_ctrl(struct lp855x *lp, int br, int max_br)
{
struct pwm_state state;
- pwm_get_state(lp->pwm, &state);
+ if (lp->needs_pwm_init) {
+ pwm_init_state(lp->pwm, &state);
+ /* Legacy platform data compatibility */
+ if (lp->pdata->period_ns > 0)
+ state.period = lp->pdata->period_ns;
+ lp->needs_pwm_init = false;
+ } else {
+ pwm_get_state(lp->pwm, &state);
+ }
state.duty_cycle = div_u64(br * state.period, max_br);
state.enabled = state.duty_cycle;
- pwm_apply_state(lp->pwm, &state);
+ return pwm_apply_state(lp->pwm, &state);
}
static int lp855x_bl_update_status(struct backlight_device *bl)
brightness = 0;
if (lp->mode == PWM_BASED)
- lp855x_pwm_ctrl(lp, brightness, bl->props.max_brightness);
+ return lp855x_pwm_ctrl(lp, brightness,
+ bl->props.max_brightness);
else if (lp->mode == REGISTER_BASED)
- lp855x_write_byte(lp, lp->cfg->reg_brightness, (u8)brightness);
-
- return 0;
+ return lp855x_write_byte(lp, lp->cfg->reg_brightness,
+ (u8)brightness);
+ return -EINVAL;
}
static const struct backlight_ops lp855x_bl_ops = {
const struct i2c_device_id *id = i2c_client_get_device_id(cl);
const struct acpi_device_id *acpi_id = NULL;
struct device *dev = &cl->dev;
- struct pwm_state pwmstate;
struct lp855x *lp;
int ret;
else
return dev_err_probe(dev, ret, "getting PWM\n");
+ lp->needs_pwm_init = false;
lp->mode = REGISTER_BASED;
dev_dbg(dev, "mode: register based\n");
} else {
- pwm_init_state(lp->pwm, &pwmstate);
- /* Legacy platform data compatibility */
- if (lp->pdata->period_ns > 0)
- pwmstate.period = lp->pdata->period_ns;
- pwm_apply_state(lp->pwm, &pwmstate);
-
+ lp->needs_pwm_init = true;
lp->mode = PWM_BASED;
dev_dbg(dev, "mode: PWM based\n");
}
#include <linux/backlight.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_address.h>
+#include <linux/platform_device.h>
#include <linux/regmap.h>
/* From DT binding */
config XILINX_WINDOW_WATCHDOG
tristate "Xilinx window watchdog timer"
depends on HAS_IOMEM
- depends on ARM64
+ depends on ARM64 || COMPILE_TEST
select WATCHDOG_CORE
help
Window watchdog driver for the versal_wwdt IP core.
config MLX_WDT
tristate "Mellanox Watchdog"
- depends on MELLANOX_PLATFORM
+ depends on MELLANOX_PLATFORM || COMPILE_TEST
select WATCHDOG_CORE
select REGMAP
help
config IXP4XX_WATCHDOG
tristate "IXP4xx Watchdog"
- depends on ARCH_IXP4XX
+ depends on ARCH_IXP4XX || (ARM && COMPILE_TEST)
select WATCHDOG_CORE
help
Say Y here if to include support for the watchdog timer
config SA1100_WATCHDOG
tristate "SA1100/PXA2xx watchdog"
- depends on ARCH_SA1100 || ARCH_PXA
+ depends on ARCH_SA1100 || ARCH_PXA || COMPILE_TEST
help
Watchdog timer embedded into SA11x0 and PXA2xx chips. This will
reboot your system when timeout is reached.
config IMX_SC_WDT
tristate "IMX SC Watchdog"
depends on HAVE_ARM_SMCCC
- depends on IMX_SCU
+ depends on IMX_SCU || COMPILE_TEST
select WATCHDOG_CORE
help
This is the driver for the system controller watchdog
config STM32_WATCHDOG
tristate "STM32 Independent WatchDoG (IWDG) support"
- depends on ARCH_STM32
+ depends on ARCH_STM32 || COMPILE_TEST
select WATCHDOG_CORE
default y
help
config ADVANTECH_WDT
tristate "Advantech SBC Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
If you are configuring a Linux kernel for the Advantech single-board
computer, say `Y' here to support its built-in watchdog timer
config ADVANTECH_EC_WDT
tristate "Advantech Embedded Controller Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
+ select ISA_BUS_API
+ select WATCHDOG_CORE
help
This driver supports Advantech products with ITE based Embedded Controller.
It does not support Advantech products with other ECs or without EC.
config ALIM1535_WDT
tristate "ALi M1535 PMU Watchdog Timer"
- depends on X86 && PCI
+ depends on (X86 || COMPILE_TEST) && PCI
help
This is the driver for the hardware watchdog on the ALi M1535 PMU.
config EBC_C384_WDT
tristate "WinSystems EBC-C384 Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
select ISA_BUS_API
select WATCHDOG_CORE
help
config EXAR_WDT
tristate "Exar Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
select WATCHDOG_CORE
help
Enables watchdog timer support for the watchdog timer present
config F71808E_WDT
tristate "Fintek F718xx, F818xx Super I/O Watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
select WATCHDOG_CORE
help
This is the driver for the hardware watchdog on the Fintek F71808E,
config SP5100_TCO
tristate "AMD/ATI SP5100 TCO Timer/Watchdog"
- depends on X86 && PCI
+ depends on (X86 || COMPILE_TEST) && PCI
select WATCHDOG_CORE
help
Hardware watchdog driver for the AMD/ATI SP5100 chipset. The TCO
config SBC_FITPC2_WATCHDOG
tristate "Compulab SBC-FITPC2 watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the built-in watchdog timer on the fit-PC2,
fit-PC2i, CM-iAM single-board computers made by Compulab.
config EUROTECH_WDT
tristate "Eurotech CPU-1220/1410 Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
Enable support for the watchdog timer on the Eurotech CPU-1220 and
CPU-1410 cards. These are PC/104 SBCs. Spec sheets and product
config IB700_WDT
tristate "IB700 SBC Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog on the IB700 Single
Board Computer produced by TMC Technology (www.tmc-uk.com). This
config IBMASR
tristate "IBM Automatic Server Restart"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the IBM Automatic Server Restart watchdog
timer built-in into some eServer xSeries machines.
config WAFER_WDT
tristate "ICP Single Board Computer Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is a driver for the hardware watchdog on the ICP Single
Board Computer. This driver is working on (at least) the following
config IE6XX_WDT
tristate "Intel Atom E6xx Watchdog"
- depends on X86 && PCI
+ depends on (X86 || COMPILE_TEST) && PCI
select WATCHDOG_CORE
select MFD_CORE
select LPC_SCH
config IT8712F_WDT
tristate "IT8712F (Smart Guardian) Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the built-in watchdog timer on the IT8712F
Super I/0 chipset used on many motherboards.
config IT87_WDT
tristate "IT87 Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
select WATCHDOG_CORE
help
This is the driver for the hardware watchdog on the ITE IT8607,
config HP_WATCHDOG
tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
select WATCHDOG_CORE
- depends on (ARM64 || X86) && PCI
+ depends on (ARM64 || X86 || COMPILE_TEST) && PCI
help
A software monitoring watchdog and NMI handling driver. This driver
will detect lockups and provide a stack trace. This is a driver that
config SC1200_WDT
tristate "National Semiconductor PC87307/PC97307 (ala SC1200) Watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is a driver for National Semiconductor PC87307/PC97307 hardware
watchdog cards as found on the SC1200. This watchdog is mainly used
config PC87413_WDT
tristate "NS PC87413 watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog on the PC87413 chipset
This watchdog simply watches your kernel to make sure it doesn't
config NV_TCO
tristate "nVidia TCO Timer/Watchdog"
- depends on X86 && PCI
+ depends on (X86 || COMPILE_TEST) && PCI
help
Hardware driver for the TCO timer built into the nVidia Hub family
(such as the MCP51). The TCO (Total Cost of Ownership) timer is a
config 60XX_WDT
tristate "SBC-60XX Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This driver can be used with the watchdog timer found on some
single board computers, namely the 6010 PII based computer.
config CPU5_WDT
tristate "SMA CPU5 Watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
TBD.
To compile this driver as a module, choose M here: the
config SMSC_SCH311X_WDT
tristate "SMSC SCH311X Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog timer on the
SMSC SCH3112, SCH3114 and SCH3116 Super IO chipset
config SMSC37B787_WDT
tristate "Winbond SMsC37B787 Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog component on the
Winbond SMsC37B787 chipset as used on the NetRunner Mainboard
config TQMX86_WDT
tristate "TQ-Systems TQMX86 Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
select WATCHDOG_CORE
help
This is the driver for the hardware watchdog timer in the TQMX86 IO
config VIA_WDT
tristate "VIA Watchdog Timer"
- depends on X86 && PCI
+ depends on (X86 || COMPILE_TEST) && PCI
select WATCHDOG_CORE
help
This is the driver for the hardware watchdog timer on VIA
config W83627HF_WDT
tristate "Watchdog timer for W83627HF/W83627DHG and compatibles"
- depends on X86
+ depends on X86 || COMPILE_TEST
select WATCHDOG_CORE
help
This is the driver for the hardware watchdog on the following
config W83877F_WDT
tristate "W83877F (EMACS) Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog on the W83877F chipset
as used in EMACS PC-104 motherboards (and likely others). This
config W83977F_WDT
tristate "W83977F (PCM-5335) Watchdog Timer"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the hardware watchdog on the W83977F I/O chip
as used in AAEON's PCM-5335 SBC (and likely others). This
config MACHZ_WDT
tristate "ZF MachZ Watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
If you are using a ZF Micro MachZ processor, say Y here, otherwise
N. This is the driver for the watchdog timer built-in on that
config SBC_EPX_C3_WATCHDOG
tristate "Winsystems SBC EPX-C3 watchdog"
- depends on X86
+ depends on X86 || COMPILE_TEST
help
This is the driver for the built-in watchdog timer on the EPX-C3
Single-board computer made by Winsystems, Inc.
config JZ4740_WDT
tristate "Ingenic jz4740 SoC hardware watchdog"
- depends on MIPS
+ depends on MIPS || COMPILE_TEST
depends on COMMON_CLK
select WATCHDOG_CORE
select MFD_SYSCON
from the first interrupt, it is then only poked when the
device is written.
+config MARVELL_GTI_WDT
+ tristate "Marvell GTI Watchdog driver"
+ depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT)
+ default y
+ select WATCHDOG_CORE
+ help
+ Marvell GTI hardware supports watchdog timer. First timeout
+ works as watchdog pretimeout and installed interrupt handler
+ will be called on first timeout. Hardware can generate interrupt
+ to SCP on second timeout but it is not enabled, so second
+ timeout is ignored. If device poke does not happen then system
+ will reboot on third timeout.
+
config BCM2835_WDT
tristate "Broadcom BCM2835 hardware watchdog"
depends on ARCH_BCM2835 || (OF && COMPILE_TEST)
config BCM_KONA_WDT_DEBUG
bool "DEBUGFS support for BCM Kona Watchdog"
- depends on BCM_KONA_WDT
+ depends on BCM_KONA_WDT || COMPILE_TEST
help
If enabled, adds /sys/kernel/debug/bcm_kona_wdt/info which provides
access to the driver's internal data structures as well as watchdog
config LOONGSON1_WDT
tristate "Loongson1 SoC hardware watchdog"
- depends on MACH_LOONGSON32
+ depends on MACH_LOONGSON32 || COMPILE_TEST
select WATCHDOG_CORE
help
Hardware driver for the Loongson1 SoC Watchdog Timer.
config GXP_WATCHDOG
tristate "HPE GXP watchdog support"
- depends on ARCH_HPE_GXP
+ depends on ARCH_HPE_GXP || COMPILE_TEST
select WATCHDOG_CORE
help
Say Y here to include support for the watchdog timer
obj-$(CONFIG_MSC313E_WATCHDOG) += msc313e_wdt.o
obj-$(CONFIG_APPLE_WATCHDOG) += apple_wdt.o
obj-$(CONFIG_SUNPLUS_WATCHDOG) += sunplus_wdt.o
+obj-$(CONFIG_MARVELL_GTI_WDT) += marvell_gti_wdt.o
# X86 (i386 + ia64 + x86_64) Architecture
obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/types.h>
#include <linux/mfd/syscon.h>
#include <linux/mfd/syscon/atmel-st.h>
#include <linux/miscdevice.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <linux/uaccess.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#define WDT_DEFAULT_TIME 5 /* seconds */
#define WDT_MAX_TIME 256 /* seconds */
#include <linux/mutex.h>
#include <linux/io.h>
#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <asm/irq.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/mod_devicetable.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/watchdog.h>
ftwdt010_wdt_resume)
};
-#ifdef CONFIG_OF
static const struct of_device_id ftwdt010_wdt_match[] = {
{ .compatible = "faraday,ftwdt010" },
{ .compatible = "cortina,gemini-watchdog" },
{},
};
MODULE_DEVICE_TABLE(of, ftwdt010_wdt_match);
-#endif
static struct platform_driver ftwdt010_wdt_driver = {
.probe = ftwdt010_wdt_probe,
.driver = {
.name = "ftwdt010-wdt",
- .of_match_table = of_match_ptr(ftwdt010_wdt_match),
+ .of_match_table = ftwdt010_wdt_match,
.pm = &ftwdt010_wdt_dev_pm_ops,
},
};
#include <linux/fs.h>
#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/watchdog.h>
*/
imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
imx2_wdt_ping(wdog);
- dev_crit(&pdev->dev, "Device shutdown: Expect reboot!\n");
+ dev_crit(&pdev->dev, "Device shutdown.\n");
}
}
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/watchdog.h>
MODULE_AUTHOR("David Cohen <david.a.cohen@linux.intel.com>");
MODULE_DESCRIPTION("Watchdog Driver for Intel MID platform");
MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:intel_mid_wdt");
#include <linux/module.h>
#include <linux/bitops.h>
#include <linux/watchdog.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <linux/clk.h>
#include <linux/io.h>
*/
#include <linux/clk.h>
+#include <linux/io.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/platform_device.h>
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Marvell GTI Watchdog driver
+ *
+ * Copyright (C) 2023 Marvell.
+ */
+
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/*
+ * Hardware supports following mode of operation:
+ * 1) Interrupt Only:
+ * This will generate the interrupt to arm core whenever timeout happens.
+ *
+ * 2) Interrupt + del3t (Interrupt to firmware (SCP processor)).
+ * This will generate interrupt to arm core on 1st timeout happens
+ * This will generate interrupt to SCP processor on 2nd timeout happens
+ *
+ * 3) Interrupt + Interrupt to SCP processor (called delt3t) + reboot.
+ * This will generate interrupt to arm core on 1st timeout happens
+ * Will generate interrupt to SCP processor on 2nd timeout happens,
+ * if interrupt is configured.
+ * Reboot on 3rd timeout.
+ *
+ * Driver will use hardware in mode-3 above so that system can reboot in case
+ * a hardware hang. Also h/w is configured not to generate SCP interrupt, so
+ * effectively 2nd timeout is ignored within hardware.
+ *
+ * First timeout is effectively watchdog pretimeout.
+ */
+
+/* GTI CWD Watchdog (GTI_CWD_WDOG) Register */
+#define GTI_CWD_WDOG(reg_offset) (0x8 * (reg_offset))
+#define GTI_CWD_WDOG_MODE_INT_DEL3T_RST 0x3
+#define GTI_CWD_WDOG_MODE_MASK GENMASK_ULL(1, 0)
+#define GTI_CWD_WDOG_LEN_SHIFT 4
+#define GTI_CWD_WDOG_LEN_MASK GENMASK_ULL(19, 4)
+#define GTI_CWD_WDOG_CNT_SHIFT 20
+#define GTI_CWD_WDOG_CNT_MASK GENMASK_ULL(43, 20)
+
+/* GTI CWD Watchdog Interrupt (GTI_CWD_INT) Register */
+#define GTI_CWD_INT 0x200
+#define GTI_CWD_INT_PENDING_STATUS(bit) BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Clear (GTI_CWD_INT_ENA_CLR) Register */
+#define GTI_CWD_INT_ENA_CLR 0x210
+#define GTI_CWD_INT_ENA_CLR_VAL(bit) BIT_ULL(bit)
+
+/* GTI CWD Watchdog Interrupt Enable Set (GTI_CWD_INT_ENA_SET) Register */
+#define GTI_CWD_INT_ENA_SET 0x218
+#define GTI_CWD_INT_ENA_SET_VAL(bit) BIT_ULL(bit)
+
+/* GTI CWD Watchdog Poke (GTI_CWD_POKE) Registers */
+#define GTI_CWD_POKE(reg_offset) (0x10000 + 0x8 * (reg_offset))
+#define GTI_CWD_POKE_VAL 1
+
+struct gti_match_data {
+ u32 gti_num_timers;
+};
+
+static const struct gti_match_data match_data_octeontx2 = {
+ .gti_num_timers = 54,
+};
+
+static const struct gti_match_data match_data_cn10k = {
+ .gti_num_timers = 64,
+};
+
+struct gti_wdt_priv {
+ struct watchdog_device wdev;
+ void __iomem *base;
+ u32 clock_freq;
+ struct clk *sclk;
+ /* wdt_timer_idx used for timer to be used for system watchdog */
+ u32 wdt_timer_idx;
+ const struct gti_match_data *data;
+};
+
+static irqreturn_t gti_wdt_interrupt(int irq, void *data)
+{
+ struct watchdog_device *wdev = data;
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ /* Clear Interrupt Pending Status */
+ writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+ priv->base + GTI_CWD_INT);
+
+ watchdog_notify_pretimeout(wdev);
+
+ return IRQ_HANDLED;
+}
+
+static int gti_wdt_ping(struct watchdog_device *wdev)
+{
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+
+ writeq(GTI_CWD_POKE_VAL,
+ priv->base + GTI_CWD_POKE(priv->wdt_timer_idx));
+
+ return 0;
+}
+
+static int gti_wdt_start(struct watchdog_device *wdev)
+{
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+ u64 regval;
+
+ if (!wdev->pretimeout)
+ return -EINVAL;
+
+ set_bit(WDOG_HW_RUNNING, &wdev->status);
+
+ /* Clear any pending interrupt */
+ writeq(GTI_CWD_INT_PENDING_STATUS(priv->wdt_timer_idx),
+ priv->base + GTI_CWD_INT);
+
+ /* Enable Interrupt */
+ writeq(GTI_CWD_INT_ENA_SET_VAL(priv->wdt_timer_idx),
+ priv->base + GTI_CWD_INT_ENA_SET);
+
+ /* Set (Interrupt + SCP interrupt (DEL3T) + core domain reset) Mode */
+ regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+ regval |= GTI_CWD_WDOG_MODE_INT_DEL3T_RST;
+ writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+ return 0;
+}
+
+static int gti_wdt_stop(struct watchdog_device *wdev)
+{
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+ u64 regval;
+
+ /* Disable Interrupt */
+ writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx),
+ priv->base + GTI_CWD_INT_ENA_CLR);
+
+ /* Set GTI_CWD_WDOG.Mode = 0 to stop the timer */
+ regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+ regval &= ~GTI_CWD_WDOG_MODE_MASK;
+ writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+ return 0;
+}
+
+static int gti_wdt_settimeout(struct watchdog_device *wdev,
+ unsigned int timeout)
+{
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+ u64 timeout_wdog, regval;
+
+ /* Update new timeout */
+ wdev->timeout = timeout;
+
+ /* Pretimeout is 1/3 of timeout */
+ wdev->pretimeout = timeout / 3;
+
+ /* Get clock cycles from pretimeout */
+ timeout_wdog = (u64)priv->clock_freq * wdev->pretimeout;
+
+ /* Watchdog counts in 1024 cycle steps */
+ timeout_wdog = timeout_wdog >> 10;
+
+ /* GTI_CWD_WDOG.CNT: reload counter is 16-bit */
+ timeout_wdog = (timeout_wdog + 0xff) >> 8;
+ if (timeout_wdog >= 0x10000)
+ timeout_wdog = 0xffff;
+
+ /*
+ * GTI_CWD_WDOG.LEN is 24bit, lower 8-bits should be zero and
+ * upper 16-bits are same as GTI_CWD_WDOG.CNT
+ */
+ regval = readq(priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+ regval &= GTI_CWD_WDOG_MODE_MASK;
+ regval |= (timeout_wdog << (GTI_CWD_WDOG_CNT_SHIFT + 8)) |
+ (timeout_wdog << GTI_CWD_WDOG_LEN_SHIFT);
+ writeq(regval, priv->base + GTI_CWD_WDOG(priv->wdt_timer_idx));
+
+ return 0;
+}
+
+static int gti_wdt_set_pretimeout(struct watchdog_device *wdev,
+ unsigned int timeout)
+{
+ struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
+ struct watchdog_device *wdog_dev = &priv->wdev;
+
+ /* pretimeout should 1/3 of max_timeout */
+ if (timeout * 3 <= wdog_dev->max_timeout)
+ return gti_wdt_settimeout(wdev, timeout * 3);
+
+ return -EINVAL;
+}
+
+static void gti_clk_disable_unprepare(void *data)
+{
+ clk_disable_unprepare(data);
+}
+
+static int gti_wdt_get_cntfrq(struct platform_device *pdev,
+ struct gti_wdt_priv *priv)
+{
+ int err;
+
+ priv->sclk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(priv->sclk))
+ return PTR_ERR(priv->sclk);
+
+ err = devm_add_action_or_reset(&pdev->dev,
+ gti_clk_disable_unprepare, priv->sclk);
+ if (err)
+ return err;
+
+ priv->clock_freq = clk_get_rate(priv->sclk);
+ if (!priv->clock_freq)
+ return -EINVAL;
+
+ return 0;
+}
+
+static const struct watchdog_info gti_wdt_ident = {
+ .identity = "Marvell GTI watchdog",
+ .options = WDIOF_SETTIMEOUT | WDIOF_PRETIMEOUT | WDIOF_KEEPALIVEPING |
+ WDIOF_MAGICCLOSE | WDIOF_CARDRESET,
+};
+
+static const struct watchdog_ops gti_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = gti_wdt_start,
+ .stop = gti_wdt_stop,
+ .ping = gti_wdt_ping,
+ .set_timeout = gti_wdt_settimeout,
+ .set_pretimeout = gti_wdt_set_pretimeout,
+};
+
+static int gti_wdt_probe(struct platform_device *pdev)
+{
+ struct gti_wdt_priv *priv;
+ struct device *dev = &pdev->dev;
+ struct watchdog_device *wdog_dev;
+ u64 max_pretimeout;
+ u32 wdt_idx;
+ int irq;
+ int err;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return dev_err_probe(&pdev->dev, PTR_ERR(priv->base),
+ "reg property not valid/found\n");
+
+ err = gti_wdt_get_cntfrq(pdev, priv);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "GTI clock frequency not valid/found");
+
+ priv->data = of_device_get_match_data(dev);
+
+ /* default use last timer for watchdog */
+ priv->wdt_timer_idx = priv->data->gti_num_timers - 1;
+
+ err = of_property_read_u32(dev->of_node, "marvell,wdt-timer-index",
+ &wdt_idx);
+ if (!err) {
+ if (wdt_idx >= priv->data->gti_num_timers)
+ return dev_err_probe(&pdev->dev, err,
+ "GTI wdog timer index not valid");
+
+ priv->wdt_timer_idx = wdt_idx;
+ }
+
+ wdog_dev = &priv->wdev;
+ wdog_dev->info = >i_wdt_ident,
+ wdog_dev->ops = >i_wdt_ops,
+ wdog_dev->parent = dev;
+ /*
+ * Watchdog counter is 24 bit where lower 8 bits are zeros
+ * This counter decrements every 1024 clock cycles.
+ */
+ max_pretimeout = (GTI_CWD_WDOG_CNT_MASK >> GTI_CWD_WDOG_CNT_SHIFT);
+ max_pretimeout &= ~0xFFUL;
+ max_pretimeout = (max_pretimeout * 1024) / priv->clock_freq;
+ wdog_dev->pretimeout = max_pretimeout;
+
+ /* Maximum timeout is 3 times the pretimeout */
+ wdog_dev->max_timeout = max_pretimeout * 3;
+ /* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */
+ wdog_dev->min_timeout = 3;
+ wdog_dev->timeout = wdog_dev->pretimeout;
+
+ watchdog_set_drvdata(wdog_dev, priv);
+ platform_set_drvdata(pdev, priv);
+ gti_wdt_settimeout(wdog_dev, wdog_dev->timeout);
+ watchdog_stop_on_reboot(wdog_dev);
+ watchdog_stop_on_unregister(wdog_dev);
+
+ err = devm_watchdog_register_device(dev, wdog_dev);
+ if (err)
+ return err;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n");
+
+ err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0,
+ pdev->name, &priv->wdev);
+ if (err)
+ return dev_err_probe(dev, err, "Failed to register interrupt handler\n");
+
+ dev_info(dev, "Watchdog enabled (timeout=%d sec)\n", wdog_dev->timeout);
+ return 0;
+}
+
+static const struct of_device_id gti_wdt_of_match[] = {
+ { .compatible = "marvell,cn9670-wdt", .data = &match_data_octeontx2},
+ { .compatible = "marvell,cn10624-wdt", .data = &match_data_cn10k},
+ { },
+};
+MODULE_DEVICE_TABLE(of, gti_wdt_of_match);
+
+static struct platform_driver gti_wdt_driver = {
+ .driver = {
+ .name = "gti-wdt",
+ .of_match_table = gti_wdt_of_match,
+ },
+ .probe = gti_wdt_probe,
+};
+module_platform_driver(gti_wdt_driver);
+
+MODULE_AUTHOR("Bharat Bhushan <bbhushan2@marvell.com>");
+MODULE_DESCRIPTION("Marvell GTI watchdog driver");
+MODULE_LICENSE("GPL");
static struct mcb_driver men_z069_driver = {
.driver = {
.name = "z069-wdt",
- .owner = THIS_MODULE,
},
.probe = men_z069_probe,
.remove = men_z069_remove,
#define GXBB_WDT_CTRL_CLKDIV_EN BIT(25)
#define GXBB_WDT_CTRL_CLK_EN BIT(24)
-#define GXBB_WDT_CTRL_EE_RESET BIT(21)
#define GXBB_WDT_CTRL_EN BIT(18)
#define GXBB_WDT_CTRL_DIV_MASK (BIT(18) - 1)
struct clk *clk;
};
+struct wdt_params {
+ u32 rst;
+};
+
static int meson_gxbb_wdt_start(struct watchdog_device *wdt_dev)
{
struct meson_gxbb_wdt *data = watchdog_get_drvdata(wdt_dev);
SET_SYSTEM_SLEEP_PM_OPS(meson_gxbb_wdt_suspend, meson_gxbb_wdt_resume)
};
+static const struct wdt_params gxbb_params = {
+ .rst = BIT(21),
+};
+
+static const struct wdt_params t7_params = {
+ .rst = BIT(22),
+};
+
static const struct of_device_id meson_gxbb_wdt_dt_ids[] = {
- { .compatible = "amlogic,meson-gxbb-wdt", },
+ { .compatible = "amlogic,meson-gxbb-wdt", .data = &gxbb_params, },
+ { .compatible = "amlogic,t7-wdt", .data = &t7_params, },
{ /* sentinel */ },
};
MODULE_DEVICE_TABLE(of, meson_gxbb_wdt_dt_ids);
{
struct device *dev = &pdev->dev;
struct meson_gxbb_wdt *data;
+ struct wdt_params *params;
u32 ctrl_reg;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (IS_ERR(data->clk))
return PTR_ERR(data->clk);
+ params = (struct wdt_params *)of_device_get_match_data(dev);
+
platform_set_drvdata(pdev, data);
data->wdt_dev.parent = dev;
/* Setup with 1ms timebase */
ctrl_reg |= ((clk_get_rate(data->clk) / 1000) &
GXBB_WDT_CTRL_DIV_MASK) |
- GXBB_WDT_CTRL_EE_RESET |
+ params->rst |
GXBB_WDT_CTRL_CLK_EN |
GXBB_WDT_CTRL_CLKDIV_EN;
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/kernel.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/watchdog.h>
#include <linux/io.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/reset-controller.h>
#include <linux/types.h>
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/ioport.h>
#include <linux/watchdog.h>
#include <linux/io.h>
#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
/* Register offsets for the Wdt device */
#define XWT_TWCSR0_OFFSET 0x0 /* Control/Status Register0 */
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/watchdog.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/watchdog.h>
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_platform.h>
#define DRV_NAME "PIKA-WDT"
.probe = pm8916_wdt_probe,
.driver = {
.name = "pm8916-wdt",
- .of_match_table = of_match_ptr(pm8916_wdt_id_table),
+ .of_match_table = pm8916_wdt_id_table,
.pm = &pm8916_wdt_pm_ops,
},
};
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/watchdog.h>
-#include <linux/of_device.h>
enum wdt_reg {
WDT_RST,
#include <linux/mfd/rave-sp.h>
#include <linux/module.h>
#include <linux/nvmem-consumer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/slab.h>
#include <linux/miscdevice.h>
#include <linux/watchdog.h>
#include <linux/of.h>
-#include <linux/of_device.h>
+#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/types.h>
#define DWDST BIT(1)
+#define PON_REASON_SOF_NUM 0xBBBBCCCC
+#define PON_REASON_MAGIC_NUM 0xDDDDDDDD
+#define PON_REASON_EOF_NUM 0xCCCCBBBB
+#define RESERVED_MEM_MIN_SIZE 12
+
static int heartbeat = DEFAULT_HEARTBEAT;
/*
struct rti_wdt_device *wdt;
struct clk *clk;
u32 last_ping = 0;
+ struct device_node *node;
+ u32 reserved_mem_size;
+ struct resource res;
+ u32 *vaddr;
+ u64 paddr;
wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
if (!wdt)
}
}
+ node = of_parse_phandle(pdev->dev.of_node, "memory-region", 0);
+ if (node) {
+ ret = of_address_to_resource(node, 0, &res);
+ if (ret) {
+ dev_err(dev, "No memory address assigned to the region.\n");
+ goto err_iomap;
+ }
+
+ /*
+ * If reserved memory is defined for watchdog reset cause.
+ * Readout the Power-on(PON) reason and pass to bootstatus.
+ */
+ paddr = res.start;
+ reserved_mem_size = resource_size(&res);
+ if (reserved_mem_size < RESERVED_MEM_MIN_SIZE) {
+ dev_err(dev, "The size of reserved memory is too small.\n");
+ ret = -EINVAL;
+ goto err_iomap;
+ }
+
+ vaddr = memremap(paddr, reserved_mem_size, MEMREMAP_WB);
+ if (!vaddr) {
+ dev_err(dev, "Failed to map memory-region.\n");
+ ret = -ENOMEM;
+ goto err_iomap;
+ }
+
+ if (vaddr[0] == PON_REASON_SOF_NUM &&
+ vaddr[1] == PON_REASON_MAGIC_NUM &&
+ vaddr[2] == PON_REASON_EOF_NUM) {
+ wdd->bootstatus |= WDIOF_CARDRESET;
+ }
+ memset(vaddr, 0, reserved_mem_size);
+ memunmap(vaddr);
+ }
+
watchdog_init_timeout(wdd, heartbeat, dev);
ret = watchdog_register_device(wdd);
#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/delay.h>
+#include <linux/io.h>
#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/watchdog.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/slab.h>
#include <linux/err.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
#include <linux/delay.h>
static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
{
struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+ unsigned long flags;
- spin_lock(&wdt->lock);
+ spin_lock_irqsave(&wdt->lock, flags);
writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
- spin_unlock(&wdt->lock);
+ spin_unlock_irqrestore(&wdt->lock, flags);
return 0;
}
static int s3c2410wdt_stop(struct watchdog_device *wdd)
{
struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+ unsigned long flags;
- spin_lock(&wdt->lock);
+ spin_lock_irqsave(&wdt->lock, flags);
__s3c2410wdt_stop(wdt);
- spin_unlock(&wdt->lock);
+ spin_unlock_irqrestore(&wdt->lock, flags);
return 0;
}
{
unsigned long wtcon;
struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+ unsigned long flags;
- spin_lock(&wdt->lock);
+ spin_lock_irqsave(&wdt->lock, flags);
__s3c2410wdt_stop(wdt);
writel(wdt->count, wdt->reg_base + S3C2410_WTDAT);
writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
writel(wtcon, wdt->reg_base + S3C2410_WTCON);
- spin_unlock(&wdt->lock);
+ spin_unlock_irqrestore(&wdt->lock, flags);
return 0;
}
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
struct sama5d4_wdt *wdt;
void __iomem *regs;
u32 irq = 0;
+ u32 reg;
int ret;
wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
watchdog_init_timeout(wdd, wdt_timeout, dev);
+ reg = wdt_read(wdt, AT91_WDT_MR);
+ if (!(reg & AT91_WDT_WDDIS)) {
+ wdt->mr &= ~AT91_WDT_WDDIS;
+ set_bit(WDOG_HW_RUNNING, &wdd->status);
+ }
+
ret = sama5d4_wdt_init(wdt);
if (ret)
return ret;
#include <linux/io.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/interrupt.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/uaccess.h>
#include <linux/watchdog.h>
#include <linux/clk.h>
#include <linux/iopoll.h>
#include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
#include <linux/watchdog.h>
starfive_wdt_pm_stop(&wdt->wdd);
}
-#ifdef CONFIG_PM_SLEEP
static int starfive_wdt_suspend(struct device *dev)
{
struct starfive_wdt *wdt = dev_get_drvdata(dev);
return starfive_wdt_start(wdt);
}
-#endif /* CONFIG_PM_SLEEP */
-#ifdef CONFIG_PM
static int starfive_wdt_runtime_suspend(struct device *dev)
{
struct starfive_wdt *wdt = dev_get_drvdata(dev);
return starfive_wdt_enable_clock(wdt);
}
-#endif /* CONFIG_PM */
static const struct dev_pm_ops starfive_wdt_pm_ops = {
- SET_RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
- SET_SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
+ RUNTIME_PM_OPS(starfive_wdt_runtime_suspend, starfive_wdt_runtime_resume, NULL)
+ SYSTEM_SLEEP_PM_OPS(starfive_wdt_suspend, starfive_wdt_resume)
};
static const struct of_device_id starfive_wdt_match[] = {
.shutdown = starfive_wdt_shutdown,
.driver = {
.name = "starfive-wdt",
- .pm = &starfive_wdt_pm_ops,
+ .pm = pm_ptr(&starfive_wdt_pm_ops),
.of_match_table = starfive_wdt_match,
},
};
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/watchdog.h>
.probe = stm32_iwdg_probe,
.driver = {
.name = "iwdg",
- .of_match_table = of_match_ptr(stm32_iwdg_of_match),
+ .of_match_table = stm32_iwdg_of_match,
},
};
module_platform_driver(stm32_iwdg_driver);
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/of.h>
-#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/types.h>
#include <linux/watchdog.h>
struct watchdog_device *wdd;
wdd = container_of(nb, struct watchdog_device, reboot_nb);
- if (code == SYS_DOWN || code == SYS_HALT) {
+ if (code == SYS_DOWN || code == SYS_HALT || code == SYS_POWER_OFF) {
if (watchdog_hw_running(wdd)) {
int ret;
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/ioport.h>
+#include <linux/math64.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_address.h>
+#include <linux/platform_device.h>
#include <linux/watchdog.h>
/* Max timeout is calculated at 100MHz source clock */
/* Calculate timeout count */
time_out = xdev->freq * wdd->timeout;
- closed_timeout = (time_out * xdev->close_percent) / 100;
+ closed_timeout = div_u64(time_out * xdev->close_percent, 100);
open_timeout = time_out - closed_timeout;
wdd->min_hw_heartbeat_ms = xdev->close_percent * 10 * wdd->timeout;
ceph-$(CONFIG_CEPH_FSCACHE) += cache.o
ceph-$(CONFIG_CEPH_FS_POSIX_ACL) += acl.o
+ceph-$(CONFIG_FS_ENCRYPTION) += crypto.o
newattrs.ia_ctime = current_time(inode);
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- ret = __ceph_setattr(inode, &newattrs);
+ ret = __ceph_setattr(inode, &newattrs, NULL);
if (ret)
goto out_free;
}
newattrs.ia_ctime = old_ctime;
newattrs.ia_mode = old_mode;
newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
- __ceph_setattr(inode, &newattrs);
+ __ceph_setattr(inode, &newattrs, NULL);
}
goto out_free;
}
#include "mds_client.h"
#include "cache.h"
#include "metric.h"
+#include "crypto.h"
#include <linux/ceph/osd_client.h>
#include <linux/ceph/striper.h>
static void finish_netfs_read(struct ceph_osd_request *req)
{
- struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+ struct inode *inode = req->r_inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
struct netfs_io_subrequest *subreq = req->r_priv;
- int num_pages;
+ struct ceph_osd_req_op *op = &req->r_ops[0];
int err = req->r_result;
+ bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, osd_data->length, err);
else if (err == -EBLOCKLISTED)
fsc->blocklisted = true;
- if (err >= 0 && err < subreq->len)
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (err >= 0) {
+ if (sparse && err > 0)
+ err = ceph_sparse_ext_map_end(op);
+ if (err < subreq->len)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (IS_ENCRYPTED(inode) && err > 0) {
+ err = ceph_fscrypt_decrypt_extents(inode,
+ osd_data->pages, subreq->start,
+ op->extent.sparse_ext,
+ op->extent.sparse_ext_cnt);
+ if (err > subreq->len)
+ err = subreq->len;
+ }
+ }
+ if (osd_data->type == CEPH_OSD_DATA_TYPE_PAGES) {
+ ceph_put_page_vector(osd_data->pages,
+ calc_pages_for(osd_data->alignment,
+ osd_data->length), false);
+ }
netfs_subreq_terminated(subreq, err, false);
-
- num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
- ceph_put_page_vector(osd_data->pages, num_pages, false);
iput(req->r_inode);
+ ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode);
struct iov_iter iter;
- struct page **pages;
- size_t page_off;
int err = 0;
u64 len = subreq->len;
+ bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+ u64 off = subreq->start;
if (ceph_inode_is_shutdown(inode)) {
err = -EIO;
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
- req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
- 0, 1, CEPH_OSD_OP_READ,
+ ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
+ off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
if (IS_ERR(req)) {
goto out;
}
+ if (sparse) {
+ err = ceph_alloc_sparse_ext_map(&req->r_ops[0]);
+ if (err)
+ goto out;
+ }
+
dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+
iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
- err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
- if (err < 0) {
- dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
- goto out;
- }
- /* should always give us a page-aligned read */
- WARN_ON_ONCE(page_off);
- len = err;
- err = 0;
+ /*
+ * FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
+ * encrypted inodes. We'd need infrastructure that handles an iov_iter
+ * instead of page arrays, and we don't have that as of yet. Once the
+ * dust settles on the write helpers and encrypt/decrypt routines for
+ * netfs, we should be able to rework this.
+ */
+ if (IS_ENCRYPTED(inode)) {
+ struct page **pages;
+ size_t page_off;
+
+ err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+ if (err < 0) {
+ dout("%s: iov_ter_get_pages_alloc returned %d\n",
+ __func__, err);
+ goto out;
+ }
+
+ /* should always give us a page-aligned read */
+ WARN_ON_ONCE(page_off);
+ len = err;
+ err = 0;
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
+ osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
+ false);
+ } else {
+ osd_req_op_extent_osd_iter(req, 0, &iter);
+ }
+ if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+ err = -EIO;
+ goto out;
+ }
req->r_callback = finish_netfs_read;
req->r_priv = subreq;
req->r_inode = inode;
struct page *page, u64 start)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_snap_context *snapc = page_snap_context(page);
+ struct ceph_snap_context *snapc;
struct ceph_cap_snap *capsnap = NULL;
u64 end = i_size_read(inode);
+ u64 ret;
+ snapc = page_snap_context(ceph_fscrypt_pagecache_page(page));
if (snapc != ci->i_head_snapc) {
bool found = false;
spin_lock(&ci->i_ceph_lock);
spin_unlock(&ci->i_ceph_lock);
WARN_ON(!found);
}
- if (end > page_offset(page) + thp_size(page))
- end = page_offset(page) + thp_size(page);
- return end > start ? end - start : 0;
+ if (end > ceph_fscrypt_page_offset(page) + thp_size(page))
+ end = ceph_fscrypt_page_offset(page) + thp_size(page);
+ ret = end > start ? end - start : 0;
+ if (ret && fscrypt_is_bounce_page(page))
+ ret = round_up(ret, CEPH_FSCRYPT_BLOCK_SIZE);
+ return ret;
}
/*
loff_t page_off = page_offset(page);
int err;
loff_t len = thp_size(page);
+ loff_t wlen;
struct ceph_writeback_ctl ceph_wbc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
bool caching = ceph_is_cache_enabled(inode);
+ struct page *bounce_page = NULL;
dout("writepage %p idx %lu\n", page, page->index);
if (ceph_wbc.i_size < page_off + len)
len = ceph_wbc.i_size - page_off;
+ wlen = IS_ENCRYPTED(inode) ? round_up(len, CEPH_FSCRYPT_BLOCK_SIZE) : len;
dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
- inode, page, page->index, page_off, len, snapc, snapc->seq);
+ inode, page, page->index, page_off, wlen, snapc, snapc->seq);
if (atomic_long_inc_return(&fsc->writeback_count) >
CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
fsc->write_congested = true;
- req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
- CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
- ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
- true);
+ req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
+ page_off, &wlen, 0, 1, CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE, snapc,
+ ceph_wbc.truncate_seq,
+ ceph_wbc.truncate_size, true);
if (IS_ERR(req)) {
redirty_page_for_writepage(wbc, page);
return PTR_ERR(req);
}
+ if (wlen < len)
+ len = wlen;
+
set_page_writeback(page);
if (caching)
ceph_set_page_fscache(page);
ceph_fscache_write_to_cache(inode, page_off, len, caching);
+ if (IS_ENCRYPTED(inode)) {
+ bounce_page = fscrypt_encrypt_pagecache_blocks(page,
+ CEPH_FSCRYPT_BLOCK_SIZE, 0,
+ GFP_NOFS);
+ if (IS_ERR(bounce_page)) {
+ redirty_page_for_writepage(wbc, page);
+ end_page_writeback(page);
+ ceph_osdc_put_request(req);
+ return PTR_ERR(bounce_page);
+ }
+ }
+
/* it may be a short write due to an object boundary */
WARN_ON_ONCE(len > thp_size(page));
- osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
- dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
+ osd_req_op_extent_osd_data_pages(req, 0,
+ bounce_page ? &bounce_page : &page, wlen, 0,
+ false, false);
+ dout("writepage %llu~%llu (%llu bytes, %sencrypted)\n",
+ page_off, len, wlen, IS_ENCRYPTED(inode) ? "" : "not ");
req->r_mtime = inode->i_mtime;
ceph_osdc_start_request(osdc, req);
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, err);
-
+ fscrypt_free_bounce_page(bounce_page);
ceph_osdc_put_request(req);
if (err == 0)
err = len;
total_pages += num_pages;
for (j = 0; j < num_pages; j++) {
page = osd_data->pages[j];
+ if (fscrypt_is_bounce_page(page)) {
+ page = fscrypt_pagecache_page(page);
+ fscrypt_free_bounce_page(osd_data->pages[j]);
+ osd_data->pages[j] = page;
+ }
BUG_ON(!page);
WARN_ON(!PageUptodate(page));
else
kfree(osd_data->pages);
ceph_osdc_put_request(req);
+ ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
/*
fsc->mount_options->congestion_kb))
fsc->write_congested = true;
- pages[locked_pages++] = page;
- fbatch.folios[i] = NULL;
+ if (IS_ENCRYPTED(inode)) {
+ pages[locked_pages] =
+ fscrypt_encrypt_pagecache_blocks(page,
+ PAGE_SIZE, 0,
+ locked_pages ? GFP_NOWAIT : GFP_NOFS);
+ if (IS_ERR(pages[locked_pages])) {
+ if (PTR_ERR(pages[locked_pages]) == -EINVAL)
+ pr_err("%s: inode->i_blkbits=%hhu\n",
+ __func__, inode->i_blkbits);
+ /* better not fail on first page! */
+ BUG_ON(locked_pages == 0);
+ pages[locked_pages] = NULL;
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ break;
+ }
+ ++locked_pages;
+ } else {
+ pages[locked_pages++] = page;
+ }
+ fbatch.folios[i] = NULL;
len += thp_size(page);
}
}
new_request:
- offset = page_offset(pages[0]);
+ offset = ceph_fscrypt_page_offset(pages[0]);
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
ceph_wbc.truncate_size, true);
BUG_ON(IS_ERR(req));
}
- BUG_ON(len < page_offset(pages[locked_pages - 1]) +
- thp_size(page) - offset);
+ BUG_ON(len < ceph_fscrypt_page_offset(pages[locked_pages - 1]) +
+ thp_size(pages[locked_pages - 1]) - offset);
+ if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
+ rc = -EIO;
+ goto release_folios;
+ }
req->r_callback = writepages_finish;
req->r_inode = inode;
data_pages = pages;
op_idx = 0;
for (i = 0; i < locked_pages; i++) {
- u64 cur_offset = page_offset(pages[i]);
+ struct page *page = ceph_fscrypt_pagecache_page(pages[i]);
+
+ u64 cur_offset = page_offset(page);
/*
* Discontinuity in page range? Ceph can handle that by just passing
* multiple extents in the write op.
op_idx++;
}
- set_page_writeback(pages[i]);
+ set_page_writeback(page);
if (caching)
- ceph_set_page_fscache(pages[i]);
+ ceph_set_page_fscache(page);
len += thp_size(page);
}
ceph_fscache_write_to_cache(inode, offset, len, caching);
offset);
len = max(len, min_len);
}
+ if (IS_ENCRYPTED(inode))
+ len = round_up(len, CEPH_FSCRYPT_BLOCK_SIZE);
+
dout("writepages got pages at %llu~%llu\n", offset, len);
+ if (IS_ENCRYPTED(inode) &&
+ ((offset | len) & ~CEPH_FSCRYPT_BLOCK_MASK))
+ pr_warn("%s: bad encrypted write offset=%lld len=%llu\n",
+ __func__, offset, len);
+
osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
0, from_pool, false);
osd_req_op_extent_update(req, op_idx, len);
#include "super.h"
#include "mds_client.h"
#include "cache.h"
+#include "crypto.h"
#include <linux/ceph/decode.h>
#include <linux/ceph/messenger.h>
umode_t mode;
bool inline_data;
bool wake;
+ bool encrypted;
+ u32 fscrypt_auth_len;
+ u8 fscrypt_auth[sizeof(struct ceph_fscrypt_auth)]; // for context
};
-/*
- * cap struct size + flock buffer size + inline version + inline data size +
- * osd_epoch_barrier + oldest_flush_tid
- */
-#define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \
- 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4)
-
/* Marshal up the cap msg to the MDS */
static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
{
arg->size, arg->max_size, arg->xattr_version,
arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
- msg->hdr.version = cpu_to_le16(10);
+ msg->hdr.version = cpu_to_le16(12);
msg->hdr.tid = cpu_to_le64(arg->flush_tid);
fc = msg->front.iov_base;
fc->ino = cpu_to_le64(arg->ino);
fc->snap_follows = cpu_to_le64(arg->follows);
- fc->size = cpu_to_le64(arg->size);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (arg->encrypted)
+ fc->size = cpu_to_le64(round_up(arg->size,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ else
+#endif
+ fc->size = cpu_to_le64(arg->size);
fc->max_size = cpu_to_le64(arg->max_size);
ceph_encode_timespec64(&fc->mtime, &arg->mtime);
ceph_encode_timespec64(&fc->atime, &arg->atime);
/* Advisory flags (version 10) */
ceph_encode_32(&p, arg->flags);
+
+ /* dirstats (version 11) - these are r/o on the client */
+ ceph_encode_64(&p, 0);
+ ceph_encode_64(&p, 0);
+
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ /*
+ * fscrypt_auth and fscrypt_file (version 12)
+ *
+ * fscrypt_auth holds the crypto context (if any). fscrypt_file
+ * tracks the real i_size as an __le64 field (and we use a rounded-up
+ * i_size in the traditional size field).
+ */
+ ceph_encode_32(&p, arg->fscrypt_auth_len);
+ ceph_encode_copy(&p, arg->fscrypt_auth, arg->fscrypt_auth_len);
+ ceph_encode_32(&p, sizeof(__le64));
+ ceph_encode_64(&p, arg->size);
+#else /* CONFIG_FS_ENCRYPTION */
+ ceph_encode_32(&p, 0);
+ ceph_encode_32(&p, 0);
+#endif /* CONFIG_FS_ENCRYPTION */
}
/*
arg->follows = flushing ? ci->i_head_snapc->seq : 0;
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
-
arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
}
}
arg->flags = flags;
+ arg->encrypted = IS_ENCRYPTED(inode);
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (ci->fscrypt_auth_len &&
+ WARN_ON_ONCE(ci->fscrypt_auth_len > sizeof(struct ceph_fscrypt_auth))) {
+ /* Don't set this if it's too big */
+ arg->fscrypt_auth_len = 0;
+ } else {
+ arg->fscrypt_auth_len = ci->fscrypt_auth_len;
+ memcpy(arg->fscrypt_auth, ci->fscrypt_auth,
+ min_t(size_t, ci->fscrypt_auth_len,
+ sizeof(arg->fscrypt_auth)));
+ }
+#endif /* CONFIG_FS_ENCRYPTION */
}
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4 + 8)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+ return CAP_MSG_FIXED_FIELDS + arg->fscrypt_auth_len;
+}
+#else
+#define CAP_MSG_FIXED_FIELDS (sizeof(struct ceph_mds_caps) + \
+ 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4 + 8 + 8 + 4 + 4)
+
+static inline int cap_msg_size(struct cap_msg_args *arg)
+{
+ return CAP_MSG_FIXED_FIELDS;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
/*
* Send a cap msg on the given inode.
*
struct ceph_msg *msg;
struct inode *inode = &ci->netfs.inode;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(arg), GFP_NOFS,
+ false);
if (!msg) {
pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
ceph_vinop(inode), ceph_cap_string(arg->dirty),
struct cap_msg_args arg;
struct ceph_msg *msg;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
- if (!msg)
- return -ENOMEM;
-
arg.session = session;
arg.ino = ceph_vino(inode).ino;
arg.cid = 0;
arg.inline_data = capsnap->inline_data;
arg.flags = 0;
arg.wake = false;
+ arg.encrypted = IS_ENCRYPTED(inode);
+
+ /* No fscrypt_auth changes from a capsnap.*/
+ arg.fscrypt_auth_len = 0;
+
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, cap_msg_size(&arg),
+ GFP_NOFS, false);
+ if (!msg)
+ return -ENOMEM;
encode_cap_msg(msg, &arg);
ceph_con_send(&arg.session->s_con, msg);
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
+int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi, int need,
+ int want, loff_t endoff, int *got)
{
- struct ceph_file_info *fi = filp->private_data;
- struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
int ret, _got, flags;
if (ret < 0)
return ret;
- if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+ if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
fi->filp_gen != READ_ONCE(fsc->filp_gen))
return -EBADF;
continue;
}
- if ((fi->fmode & CEPH_FILE_MODE_WR) &&
+ if (fi && (fi->fmode & CEPH_FILE_MODE_WR) &&
fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
if (ret >= 0 && _got)
ceph_put_cap_refs(ci, _got);
return 0;
}
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff,
+ int *got)
+{
+ struct ceph_file_info *fi = filp->private_data;
+ struct inode *inode = file_inode(filp);
+
+ return __ceph_get_caps(inode, fi, need, want, endoff, got);
+}
+
/*
* Take cap refs. Caller must already know we hold at least one ref
* on the caps in question or we don't know this is safe.
/* currently issued */
int issued;
struct timespec64 btime;
+ u8 *fscrypt_auth;
+ u32 fscrypt_auth_len;
+ u64 fscrypt_file_size;
};
/*
bool deleted_inode = false;
bool fill_inline = false;
+ /*
+ * If there is at least one crypto block then we'll trust
+ * fscrypt_file_size. If the real length of the file is 0, then
+ * ignore it (it has probably been truncated down to 0 by the MDS).
+ */
+ if (IS_ENCRYPTED(inode) && size)
+ size = extra_info->fscrypt_file_size;
+
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
from_kuid(&init_user_ns, inode->i_uid),
from_kgid(&init_user_ns, inode->i_gid));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (ci->fscrypt_auth_len != extra_info->fscrypt_auth_len ||
+ memcmp(ci->fscrypt_auth, extra_info->fscrypt_auth,
+ ci->fscrypt_auth_len))
+ pr_warn_ratelimited("%s: cap grant attempt to change fscrypt_auth on non-I_NEW inode (old len %d new len %d)\n",
+ __func__, ci->fscrypt_auth_len,
+ extra_info->fscrypt_auth_len);
+#endif
}
if ((newcaps & CEPH_CAP_LINK_SHARED) &&
*/
static bool handle_cap_trunc(struct inode *inode,
struct ceph_mds_caps *trunc,
- struct ceph_mds_session *session)
+ struct ceph_mds_session *session,
+ struct cap_extra_info *extra_info)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
issued |= implemented | dirty;
- dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
- inode, mds, seq, truncate_size, truncate_seq);
+ /*
+ * If there is at least one crypto block then we'll trust
+ * fscrypt_file_size. If the real length of the file is 0, then
+ * ignore it (it has probably been truncated down to 0 by the MDS).
+ */
+ if (IS_ENCRYPTED(inode) && size)
+ size = extra_info->fscrypt_file_size;
+
+ dout("%s inode %p mds%d seq %d to %lld truncate seq %d\n",
+ __func__, inode, mds, seq, truncate_size, truncate_seq);
queue_trunc = ceph_fill_file_size(inode, issued,
truncate_seq, truncate_size, size);
return queue_trunc;
*target_cap = cap;
}
+#ifdef CONFIG_FS_ENCRYPTION
+static int parse_fscrypt_fields(void **p, void *end,
+ struct cap_extra_info *extra)
+{
+ u32 len;
+
+ ceph_decode_32_safe(p, end, extra->fscrypt_auth_len, bad);
+ if (extra->fscrypt_auth_len) {
+ ceph_decode_need(p, end, extra->fscrypt_auth_len, bad);
+ extra->fscrypt_auth = kmalloc(extra->fscrypt_auth_len,
+ GFP_KERNEL);
+ if (!extra->fscrypt_auth)
+ return -ENOMEM;
+ ceph_decode_copy_safe(p, end, extra->fscrypt_auth,
+ extra->fscrypt_auth_len, bad);
+ }
+
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len >= sizeof(u64)) {
+ ceph_decode_64_safe(p, end, extra->fscrypt_file_size, bad);
+ len -= sizeof(u64);
+ }
+ ceph_decode_skip_n(p, end, len, bad);
+ return 0;
+bad:
+ return -EIO;
+}
+#else
+static int parse_fscrypt_fields(void **p, void *end,
+ struct cap_extra_info *extra)
+{
+ u32 len;
+
+ /* Don't care about these fields unless we're encryption-capable */
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len)
+ ceph_decode_skip_n(p, end, len, bad);
+ ceph_decode_32_safe(p, end, len, bad);
+ if (len)
+ ceph_decode_skip_n(p, end, len, bad);
+ return 0;
+bad:
+ return -EIO;
+}
+#endif
+
/*
* Handle a caps message from the MDS.
*
dout("handle_caps from mds%d\n", session->s_mds);
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
end = msg->front.iov_base + msg->front.iov_len;
if (msg->front.iov_len < sizeof(*h))
ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
}
+ if (msg_version >= 12) {
+ if (parse_fscrypt_fields(&p, end, &extra_info))
+ goto bad;
+ }
+
/* lookup ino */
inode = ceph_find_inode(mdsc->fsc->sb, vino);
dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
vino.snap, inode);
mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
(unsigned)seq);
break;
case CEPH_CAP_OP_TRUNC:
- queue_trunc = handle_cap_trunc(inode, h, session);
+ queue_trunc = handle_cap_trunc(inode, h, session,
+ &extra_info);
spin_unlock(&ci->i_ceph_lock);
if (queue_trunc)
ceph_queue_vmtruncate(inode);
done_unlocked:
iput(inode);
out:
+ ceph_dec_mds_stopping_blocker(mdsc);
+
ceph_put_string(extra_info.pool_ns);
/* Defer closing the sessions after s_mutex lock being released */
if (close_sessions)
ceph_mdsc_close_sessions(mdsc);
+ kfree(extra_info.fscrypt_auth);
return;
flush_cap_releases:
return ret;
}
+/**
+ * ceph_encode_dentry_release - encode a dentry release into an outgoing request
+ * @p: outgoing request buffer
+ * @dentry: dentry to release
+ * @dir: dir to release it from
+ * @mds: mds that we're speaking to
+ * @drop: caps being dropped
+ * @unless: unless we have these caps
+ *
+ * Encode a dentry release into an outgoing request buffer. Returns 1 if the
+ * thing was released, or a negative error code otherwise.
+ */
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
struct inode *dir,
int mds, int drop, int unless)
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
dout("encode_dentry_release %p mds%d seq %d\n",
dentry, mds, (int)di->lease_seq);
- rel->dname_len = cpu_to_le32(dentry->d_name.len);
- memcpy(*p, dentry->d_name.name, dentry->d_name.len);
- *p += dentry->d_name.len;
rel->dname_seq = cpu_to_le32(di->lease_seq);
__ceph_mdsc_drop_dentry_lease(dentry);
+ spin_unlock(&dentry->d_lock);
+ if (IS_ENCRYPTED(dir) && fscrypt_has_encryption_key(dir)) {
+ int ret2 = ceph_encode_encrypted_fname(dir, dentry, *p);
+
+ if (ret2 < 0)
+ return ret2;
+
+ rel->dname_len = cpu_to_le32(ret2);
+ *p += ret2;
+ } else {
+ rel->dname_len = cpu_to_le32(dentry->d_name.len);
+ memcpy(*p, dentry->d_name.name, dentry->d_name.len);
+ *p += dentry->d_name.len;
+ }
+ } else {
+ spin_unlock(&dentry->d_lock);
}
- spin_unlock(&dentry->d_lock);
return ret;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The base64 encode/decode code was copied from fscrypt:
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ * Written by Uday Savagaonkar, 2014.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+#include <linux/ceph/ceph_debug.h>
+#include <linux/xattr.h>
+#include <linux/fscrypt.h>
+#include <linux/ceph/striper.h>
+
+#include "super.h"
+#include "mds_client.h"
+#include "crypto.h"
+
+/*
+ * The base64url encoding used by fscrypt includes the '_' character, which may
+ * cause problems in snapshot names (which can not start with '_'). Thus, we
+ * used the base64 encoding defined for IMAP mailbox names (RFC 3501) instead,
+ * which replaces '-' and '_' by '+' and ','.
+ */
+static const char base64_table[65] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst)
+{
+ u32 ac = 0;
+ int bits = 0;
+ int i;
+ char *cp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ ac = (ac << 8) | src[i];
+ bits += 8;
+ do {
+ bits -= 6;
+ *cp++ = base64_table[(ac >> bits) & 0x3f];
+ } while (bits >= 6);
+ }
+ if (bits)
+ *cp++ = base64_table[(ac << (6 - bits)) & 0x3f];
+ return cp - dst;
+}
+
+int ceph_base64_decode(const char *src, int srclen, u8 *dst)
+{
+ u32 ac = 0;
+ int bits = 0;
+ int i;
+ u8 *bp = dst;
+
+ for (i = 0; i < srclen; i++) {
+ const char *p = strchr(base64_table, src[i]);
+
+ if (p == NULL || src[i] == 0)
+ return -1;
+ ac = (ac << 6) | (p - base64_table);
+ bits += 6;
+ if (bits >= 8) {
+ bits -= 8;
+ *bp++ = (u8)(ac >> bits);
+ }
+ }
+ if (ac & ((1 << bits) - 1))
+ return -1;
+ return bp - dst;
+}
+
+static int ceph_crypt_get_context(struct inode *inode, void *ctx, size_t len)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fscrypt_auth *cfa = (struct ceph_fscrypt_auth *)ci->fscrypt_auth;
+ u32 ctxlen;
+
+ /* Non existent or too short? */
+ if (!cfa || (ci->fscrypt_auth_len < (offsetof(struct ceph_fscrypt_auth, cfa_blob) + 1)))
+ return -ENOBUFS;
+
+ /* Some format we don't recognize? */
+ if (le32_to_cpu(cfa->cfa_version) != CEPH_FSCRYPT_AUTH_VERSION)
+ return -ENOBUFS;
+
+ ctxlen = le32_to_cpu(cfa->cfa_blob_len);
+ if (len < ctxlen)
+ return -ERANGE;
+
+ memcpy(ctx, cfa->cfa_blob, ctxlen);
+ return ctxlen;
+}
+
+static int ceph_crypt_set_context(struct inode *inode, const void *ctx,
+ size_t len, void *fs_data)
+{
+ int ret;
+ struct iattr attr = { };
+ struct ceph_iattr cia = { };
+ struct ceph_fscrypt_auth *cfa;
+
+ WARN_ON_ONCE(fs_data);
+
+ if (len > FSCRYPT_SET_CONTEXT_MAX_SIZE)
+ return -EINVAL;
+
+ cfa = kzalloc(sizeof(*cfa), GFP_KERNEL);
+ if (!cfa)
+ return -ENOMEM;
+
+ cfa->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+ cfa->cfa_blob_len = cpu_to_le32(len);
+ memcpy(cfa->cfa_blob, ctx, len);
+
+ cia.fscrypt_auth = cfa;
+
+ ret = __ceph_setattr(inode, &attr, &cia);
+ if (ret == 0)
+ inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+ kfree(cia.fscrypt_auth);
+ return ret;
+}
+
+static bool ceph_crypt_empty_dir(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ return ci->i_rsubdirs + ci->i_rfiles == 1;
+}
+
+static const union fscrypt_policy *ceph_get_dummy_policy(struct super_block *sb)
+{
+ return ceph_sb_to_client(sb)->fsc_dummy_enc_policy.policy;
+}
+
+static struct fscrypt_operations ceph_fscrypt_ops = {
+ .get_context = ceph_crypt_get_context,
+ .set_context = ceph_crypt_set_context,
+ .get_dummy_policy = ceph_get_dummy_policy,
+ .empty_dir = ceph_crypt_empty_dir,
+};
+
+void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+ fscrypt_set_ops(sb, &ceph_fscrypt_ops);
+}
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+ fscrypt_free_dummy_policy(&fsc->fsc_dummy_enc_policy);
+}
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+ struct ceph_acl_sec_ctx *as)
+{
+ int ret, ctxsize;
+ bool encrypted = false;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ ret = fscrypt_prepare_new_inode(dir, inode, &encrypted);
+ if (ret)
+ return ret;
+ if (!encrypted)
+ return 0;
+
+ as->fscrypt_auth = kzalloc(sizeof(*as->fscrypt_auth), GFP_KERNEL);
+ if (!as->fscrypt_auth)
+ return -ENOMEM;
+
+ ctxsize = fscrypt_context_for_new_inode(as->fscrypt_auth->cfa_blob,
+ inode);
+ if (ctxsize < 0)
+ return ctxsize;
+
+ as->fscrypt_auth->cfa_version = cpu_to_le32(CEPH_FSCRYPT_AUTH_VERSION);
+ as->fscrypt_auth->cfa_blob_len = cpu_to_le32(ctxsize);
+
+ WARN_ON_ONCE(ci->fscrypt_auth);
+ kfree(ci->fscrypt_auth);
+ ci->fscrypt_auth_len = ceph_fscrypt_auth_len(as->fscrypt_auth);
+ ci->fscrypt_auth = kmemdup(as->fscrypt_auth, ci->fscrypt_auth_len,
+ GFP_KERNEL);
+ if (!ci->fscrypt_auth)
+ return -ENOMEM;
+
+ inode->i_flags |= S_ENCRYPTED;
+
+ return 0;
+}
+
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+ struct ceph_acl_sec_ctx *as)
+{
+ swap(req->r_fscrypt_auth, as->fscrypt_auth);
+}
+
+/*
+ * User-created snapshots can't start with '_'. Snapshots that start with this
+ * character are special (hint: there aren't real snapshots) and use the
+ * following format:
+ *
+ * _<SNAPSHOT-NAME>_<INODE-NUMBER>
+ *
+ * where:
+ * - <SNAPSHOT-NAME> - the real snapshot name that may need to be decrypted,
+ * - <INODE-NUMBER> - the inode number (in decimal) for the actual snapshot
+ *
+ * This function parses these snapshot names and returns the inode
+ * <INODE-NUMBER>. 'name_len' will also bet set with the <SNAPSHOT-NAME>
+ * length.
+ */
+static struct inode *parse_longname(const struct inode *parent,
+ const char *name, int *name_len)
+{
+ struct inode *dir = NULL;
+ struct ceph_vino vino = { .snap = CEPH_NOSNAP };
+ char *inode_number;
+ char *name_end;
+ int orig_len = *name_len;
+ int ret = -EIO;
+
+ /* Skip initial '_' */
+ name++;
+ name_end = strrchr(name, '_');
+ if (!name_end) {
+ dout("Failed to parse long snapshot name: %s\n", name);
+ return ERR_PTR(-EIO);
+ }
+ *name_len = (name_end - name);
+ if (*name_len <= 0) {
+ pr_err("Failed to parse long snapshot name\n");
+ return ERR_PTR(-EIO);
+ }
+
+ /* Get the inode number */
+ inode_number = kmemdup_nul(name_end + 1,
+ orig_len - *name_len - 2,
+ GFP_KERNEL);
+ if (!inode_number)
+ return ERR_PTR(-ENOMEM);
+ ret = kstrtou64(inode_number, 10, &vino.ino);
+ if (ret) {
+ dout("Failed to parse inode number: %s\n", name);
+ dir = ERR_PTR(ret);
+ goto out;
+ }
+
+ /* And finally the inode */
+ dir = ceph_find_inode(parent->i_sb, vino);
+ if (!dir) {
+ /* This can happen if we're not mounting cephfs on the root */
+ dir = ceph_get_inode(parent->i_sb, vino, NULL);
+ if (!dir)
+ dir = ERR_PTR(-ENOENT);
+ }
+ if (IS_ERR(dir))
+ dout("Can't find inode %s (%s)\n", inode_number, name);
+
+out:
+ kfree(inode_number);
+ return dir;
+}
+
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+ char *buf)
+{
+ struct inode *dir = parent;
+ struct qstr iname;
+ u32 len;
+ int name_len;
+ int elen;
+ int ret;
+ u8 *cryptbuf = NULL;
+
+ iname.name = d_name->name;
+ name_len = d_name->len;
+
+ /* Handle the special case of snapshot names that start with '_' */
+ if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+ (iname.name[0] == '_')) {
+ dir = parse_longname(parent, iname.name, &name_len);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+ iname.name++; /* skip initial '_' */
+ }
+ iname.len = name_len;
+
+ if (!fscrypt_has_encryption_key(dir)) {
+ memcpy(buf, d_name->name, d_name->len);
+ elen = d_name->len;
+ goto out;
+ }
+
+ /*
+ * Convert cleartext d_name to ciphertext. If result is longer than
+ * CEPH_NOHASH_NAME_MAX, sha256 the remaining bytes
+ *
+ * See: fscrypt_setup_filename
+ */
+ if (!fscrypt_fname_encrypted_size(dir, iname.len, NAME_MAX, &len)) {
+ elen = -ENAMETOOLONG;
+ goto out;
+ }
+
+ /* Allocate a buffer appropriate to hold the result */
+ cryptbuf = kmalloc(len > CEPH_NOHASH_NAME_MAX ? NAME_MAX : len,
+ GFP_KERNEL);
+ if (!cryptbuf) {
+ elen = -ENOMEM;
+ goto out;
+ }
+
+ ret = fscrypt_fname_encrypt(dir, &iname, cryptbuf, len);
+ if (ret) {
+ elen = ret;
+ goto out;
+ }
+
+ /* hash the end if the name is long enough */
+ if (len > CEPH_NOHASH_NAME_MAX) {
+ u8 hash[SHA256_DIGEST_SIZE];
+ u8 *extra = cryptbuf + CEPH_NOHASH_NAME_MAX;
+
+ /*
+ * hash the extra bytes and overwrite crypttext beyond that
+ * point with it
+ */
+ sha256(extra, len - CEPH_NOHASH_NAME_MAX, hash);
+ memcpy(extra, hash, SHA256_DIGEST_SIZE);
+ len = CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE;
+ }
+
+ /* base64 encode the encrypted name */
+ elen = ceph_base64_encode(cryptbuf, len, buf);
+ dout("base64-encoded ciphertext name = %.*s\n", elen, buf);
+
+ /* To understand the 240 limit, see CEPH_NOHASH_NAME_MAX comments */
+ WARN_ON(elen > 240);
+ if ((elen > 0) && (dir != parent)) {
+ char tmp_buf[NAME_MAX];
+
+ elen = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+ elen, buf, dir->i_ino);
+ memcpy(buf, tmp_buf, elen);
+ }
+
+out:
+ kfree(cryptbuf);
+ if (dir != parent) {
+ if ((dir->i_state & I_NEW))
+ discard_new_inode(dir);
+ else
+ iput(dir);
+ }
+ return elen;
+}
+
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+ char *buf)
+{
+ WARN_ON_ONCE(!fscrypt_has_encryption_key(parent));
+
+ return ceph_encode_encrypted_dname(parent, &dentry->d_name, buf);
+}
+
+/**
+ * ceph_fname_to_usr - convert a filename for userland presentation
+ * @fname: ceph_fname to be converted
+ * @tname: temporary name buffer to use for conversion (may be NULL)
+ * @oname: where converted name should be placed
+ * @is_nokey: set to true if key wasn't available during conversion (may be NULL)
+ *
+ * Given a filename (usually from the MDS), format it for presentation to
+ * userland. If @parent is not encrypted, just pass it back as-is.
+ *
+ * Otherwise, base64 decode the string, and then ask fscrypt to format it
+ * for userland presentation.
+ *
+ * Returns 0 on success or negative error code on error.
+ */
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+ struct fscrypt_str *oname, bool *is_nokey)
+{
+ struct inode *dir = fname->dir;
+ struct fscrypt_str _tname = FSTR_INIT(NULL, 0);
+ struct fscrypt_str iname;
+ char *name = fname->name;
+ int name_len = fname->name_len;
+ int ret;
+
+ /* Sanity check that the resulting name will fit in the buffer */
+ if (fname->name_len > NAME_MAX || fname->ctext_len > NAME_MAX)
+ return -EIO;
+
+ /* Handle the special case of snapshot names that start with '_' */
+ if ((ceph_snap(dir) == CEPH_SNAPDIR) && (name_len > 0) &&
+ (name[0] == '_')) {
+ dir = parse_longname(dir, name, &name_len);
+ if (IS_ERR(dir))
+ return PTR_ERR(dir);
+ name++; /* skip initial '_' */
+ }
+
+ if (!IS_ENCRYPTED(dir)) {
+ oname->name = fname->name;
+ oname->len = fname->name_len;
+ ret = 0;
+ goto out_inode;
+ }
+
+ ret = ceph_fscrypt_prepare_readdir(dir);
+ if (ret)
+ goto out_inode;
+
+ /*
+ * Use the raw dentry name as sent by the MDS instead of
+ * generating a nokey name via fscrypt.
+ */
+ if (!fscrypt_has_encryption_key(dir)) {
+ if (fname->no_copy)
+ oname->name = fname->name;
+ else
+ memcpy(oname->name, fname->name, fname->name_len);
+ oname->len = fname->name_len;
+ if (is_nokey)
+ *is_nokey = true;
+ ret = 0;
+ goto out_inode;
+ }
+
+ if (fname->ctext_len == 0) {
+ int declen;
+
+ if (!tname) {
+ ret = fscrypt_fname_alloc_buffer(NAME_MAX, &_tname);
+ if (ret)
+ goto out_inode;
+ tname = &_tname;
+ }
+
+ declen = ceph_base64_decode(name, name_len, tname->name);
+ if (declen <= 0) {
+ ret = -EIO;
+ goto out;
+ }
+ iname.name = tname->name;
+ iname.len = declen;
+ } else {
+ iname.name = fname->ctext;
+ iname.len = fname->ctext_len;
+ }
+
+ ret = fscrypt_fname_disk_to_usr(dir, 0, 0, &iname, oname);
+ if (!ret && (dir != fname->dir)) {
+ char tmp_buf[CEPH_BASE64_CHARS(NAME_MAX)];
+
+ name_len = snprintf(tmp_buf, sizeof(tmp_buf), "_%.*s_%ld",
+ oname->len, oname->name, dir->i_ino);
+ memcpy(oname->name, tmp_buf, name_len);
+ oname->len = name_len;
+ }
+
+out:
+ fscrypt_fname_free_buffer(&_tname);
+out_inode:
+ if ((dir != fname->dir) && !IS_ERR(dir)) {
+ if ((dir->i_state & I_NEW))
+ discard_new_inode(dir);
+ else
+ iput(dir);
+ }
+ return ret;
+}
+
+/**
+ * ceph_fscrypt_prepare_readdir - simple __fscrypt_prepare_readdir() wrapper
+ * @dir: directory inode for readdir prep
+ *
+ * Simple wrapper around __fscrypt_prepare_readdir() that will mark directory as
+ * non-complete if this call results in having the directory unlocked.
+ *
+ * Returns:
+ * 1 - if directory was locked and key is now loaded (i.e. dir is unlocked)
+ * 0 - if directory is still locked
+ * < 0 - if __fscrypt_prepare_readdir() fails
+ */
+int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+ bool had_key = fscrypt_has_encryption_key(dir);
+ int err;
+
+ if (!IS_ENCRYPTED(dir))
+ return 0;
+
+ err = __fscrypt_prepare_readdir(dir);
+ if (err)
+ return err;
+ if (!had_key && fscrypt_has_encryption_key(dir)) {
+ /* directory just got unlocked, mark it as not complete */
+ ceph_dir_clear_complete(dir);
+ return 1;
+ }
+ return 0;
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num)
+{
+ dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+ return fscrypt_decrypt_block_inplace(inode, page, len, offs, lblk_num);
+}
+
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num,
+ gfp_t gfp_flags)
+{
+ dout("%s: len %u offs %u blk %llu\n", __func__, len, offs, lblk_num);
+ return fscrypt_encrypt_block_inplace(inode, page, len, offs, lblk_num,
+ gfp_flags);
+}
+
+/**
+ * ceph_fscrypt_decrypt_pages - decrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the read data starts
+ * @len: max length to decrypt
+ *
+ * Decrypt an array of fscrypt'ed pages and return the amount of
+ * data decrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored (and should
+ * probably be zeroed by the caller).
+ *
+ * Returns the length of the decrypted data or a negative errno.
+ */
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+ u64 off, int len)
+{
+ int i, num_blocks;
+ u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+ int ret = 0;
+
+ /*
+ * We can't deal with partial blocks on an encrypted file, so mask off
+ * the last bit.
+ */
+ num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+ /* Decrypt each block */
+ for (i = 0; i < num_blocks; ++i) {
+ int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+ int pgidx = blkoff >> PAGE_SHIFT;
+ unsigned int pgoffs = offset_in_page(blkoff);
+ int fret;
+
+ fret = ceph_fscrypt_decrypt_block_inplace(inode, page[pgidx],
+ CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+ baseblk + i);
+ if (fret < 0) {
+ if (ret == 0)
+ ret = fret;
+ break;
+ }
+ ret += CEPH_FSCRYPT_BLOCK_SIZE;
+ }
+ return ret;
+}
+
+/**
+ * ceph_fscrypt_decrypt_extents: decrypt received extents in given buffer
+ * @inode: inode associated with pages being decrypted
+ * @page: pointer to page array
+ * @off: offset into the file that the data in page[0] starts
+ * @map: pointer to extent array
+ * @ext_cnt: length of extent array
+ *
+ * Given an extent map and a page array, decrypt the received data in-place,
+ * skipping holes. Returns the offset into buffer of end of last decrypted
+ * block.
+ */
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+ u64 off, struct ceph_sparse_extent *map,
+ u32 ext_cnt)
+{
+ int i, ret = 0;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 objno, objoff;
+ u32 xlen;
+
+ /* Nothing to do for empty array */
+ if (ext_cnt == 0) {
+ dout("%s: empty array, ret 0\n", __func__);
+ return 0;
+ }
+
+ ceph_calc_file_object_mapping(&ci->i_layout, off, map[0].len,
+ &objno, &objoff, &xlen);
+
+ for (i = 0; i < ext_cnt; ++i) {
+ struct ceph_sparse_extent *ext = &map[i];
+ int pgsoff = ext->off - objoff;
+ int pgidx = pgsoff >> PAGE_SHIFT;
+ int fret;
+
+ if ((ext->off | ext->len) & ~CEPH_FSCRYPT_BLOCK_MASK) {
+ pr_warn("%s: bad encrypted sparse extent idx %d off %llx len %llx\n",
+ __func__, i, ext->off, ext->len);
+ return -EIO;
+ }
+ fret = ceph_fscrypt_decrypt_pages(inode, &page[pgidx],
+ off + pgsoff, ext->len);
+ dout("%s: [%d] 0x%llx~0x%llx fret %d\n", __func__, i,
+ ext->off, ext->len, fret);
+ if (fret < 0) {
+ if (ret == 0)
+ ret = fret;
+ break;
+ }
+ ret = pgsoff + fret;
+ }
+ dout("%s: ret %d\n", __func__, ret);
+ return ret;
+}
+
+/**
+ * ceph_fscrypt_encrypt_pages - encrypt an array of pages
+ * @inode: pointer to inode associated with these pages
+ * @page: pointer to page array
+ * @off: offset into the file that the data starts
+ * @len: max length to encrypt
+ * @gfp: gfp flags to use for allocation
+ *
+ * Decrypt an array of cleartext pages and return the amount of
+ * data encrypted. Any data in the page prior to the start of the
+ * first complete block in the read is ignored. Any incomplete
+ * crypto blocks at the end of the array are ignored.
+ *
+ * Returns the length of the encrypted data or a negative errno.
+ */
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+ int len, gfp_t gfp)
+{
+ int i, num_blocks;
+ u64 baseblk = off >> CEPH_FSCRYPT_BLOCK_SHIFT;
+ int ret = 0;
+
+ /*
+ * We can't deal with partial blocks on an encrypted file, so mask off
+ * the last bit.
+ */
+ num_blocks = ceph_fscrypt_blocks(off, len & CEPH_FSCRYPT_BLOCK_MASK);
+
+ /* Encrypt each block */
+ for (i = 0; i < num_blocks; ++i) {
+ int blkoff = i << CEPH_FSCRYPT_BLOCK_SHIFT;
+ int pgidx = blkoff >> PAGE_SHIFT;
+ unsigned int pgoffs = offset_in_page(blkoff);
+ int fret;
+
+ fret = ceph_fscrypt_encrypt_block_inplace(inode, page[pgidx],
+ CEPH_FSCRYPT_BLOCK_SIZE, pgoffs,
+ baseblk + i, gfp);
+ if (fret < 0) {
+ if (ret == 0)
+ ret = fret;
+ break;
+ }
+ ret += CEPH_FSCRYPT_BLOCK_SIZE;
+ }
+ return ret;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ceph fscrypt functionality
+ */
+
+#ifndef _CEPH_CRYPTO_H
+#define _CEPH_CRYPTO_H
+
+#include <crypto/sha2.h>
+#include <linux/fscrypt.h>
+
+#define CEPH_FSCRYPT_BLOCK_SHIFT 12
+#define CEPH_FSCRYPT_BLOCK_SIZE (_AC(1, UL) << CEPH_FSCRYPT_BLOCK_SHIFT)
+#define CEPH_FSCRYPT_BLOCK_MASK (~(CEPH_FSCRYPT_BLOCK_SIZE-1))
+
+struct ceph_fs_client;
+struct ceph_acl_sec_ctx;
+struct ceph_mds_request;
+
+struct ceph_fname {
+ struct inode *dir;
+ char *name; // b64 encoded, possibly hashed
+ unsigned char *ctext; // binary crypttext (if any)
+ u32 name_len; // length of name buffer
+ u32 ctext_len; // length of crypttext
+ bool no_copy;
+};
+
+/*
+ * Header for the crypted file when truncating the size, this
+ * will be sent to MDS, and the MDS will update the encrypted
+ * last block and then truncate the size.
+ */
+struct ceph_fscrypt_truncate_size_header {
+ __u8 ver;
+ __u8 compat;
+
+ /*
+ * It will be sizeof(assert_ver + file_offset + block_size)
+ * if the last block is empty when it's located in a file
+ * hole. Or the data_len will plus CEPH_FSCRYPT_BLOCK_SIZE.
+ */
+ __le32 data_len;
+
+ __le64 change_attr;
+ __le64 file_offset;
+ __le32 block_size;
+} __packed;
+
+struct ceph_fscrypt_auth {
+ __le32 cfa_version;
+ __le32 cfa_blob_len;
+ u8 cfa_blob[FSCRYPT_SET_CONTEXT_MAX_SIZE];
+} __packed;
+
+#define CEPH_FSCRYPT_AUTH_VERSION 1
+static inline u32 ceph_fscrypt_auth_len(struct ceph_fscrypt_auth *fa)
+{
+ u32 ctxsize = le32_to_cpu(fa->cfa_blob_len);
+
+ return offsetof(struct ceph_fscrypt_auth, cfa_blob) + ctxsize;
+}
+
+#ifdef CONFIG_FS_ENCRYPTION
+/*
+ * We want to encrypt filenames when creating them, but the encrypted
+ * versions of those names may have illegal characters in them. To mitigate
+ * that, we base64 encode them, but that gives us a result that can exceed
+ * NAME_MAX.
+ *
+ * Follow a similar scheme to fscrypt itself, and cap the filename to a
+ * smaller size. If the ciphertext name is longer than the value below, then
+ * sha256 hash the remaining bytes.
+ *
+ * For the fscrypt_nokey_name struct the dirhash[2] member is useless in ceph
+ * so the corresponding struct will be:
+ *
+ * struct fscrypt_ceph_nokey_name {
+ * u8 bytes[157];
+ * u8 sha256[SHA256_DIGEST_SIZE];
+ * }; // 180 bytes => 240 bytes base64-encoded, which is <= NAME_MAX (255)
+ *
+ * (240 bytes is the maximum size allowed for snapshot names to take into
+ * account the format: '_<SNAPSHOT-NAME>_<INODE-NUMBER>'.)
+ *
+ * Note that for long names that end up having their tail portion hashed, we
+ * must also store the full encrypted name (in the dentry's alternate_name
+ * field).
+ */
+#define CEPH_NOHASH_NAME_MAX (180 - SHA256_DIGEST_SIZE)
+
+#define CEPH_BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3)
+
+int ceph_base64_encode(const u8 *src, int srclen, char *dst);
+int ceph_base64_decode(const char *src, int srclen, u8 *dst);
+
+void ceph_fscrypt_set_ops(struct super_block *sb);
+
+void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc);
+
+int ceph_fscrypt_prepare_context(struct inode *dir, struct inode *inode,
+ struct ceph_acl_sec_ctx *as);
+void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+ struct ceph_acl_sec_ctx *as);
+int ceph_encode_encrypted_dname(struct inode *parent, struct qstr *d_name,
+ char *buf);
+int ceph_encode_encrypted_fname(struct inode *parent, struct dentry *dentry,
+ char *buf);
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+ struct fscrypt_str *fname)
+{
+ if (!IS_ENCRYPTED(parent))
+ return 0;
+ return fscrypt_fname_alloc_buffer(NAME_MAX, fname);
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+ struct fscrypt_str *fname)
+{
+ if (IS_ENCRYPTED(parent))
+ fscrypt_fname_free_buffer(fname);
+}
+
+int ceph_fname_to_usr(const struct ceph_fname *fname, struct fscrypt_str *tname,
+ struct fscrypt_str *oname, bool *is_nokey);
+int ceph_fscrypt_prepare_readdir(struct inode *dir);
+
+static inline unsigned int ceph_fscrypt_blocks(u64 off, u64 len)
+{
+ /* crypto blocks cannot span more than one page */
+ BUILD_BUG_ON(CEPH_FSCRYPT_BLOCK_SHIFT > PAGE_SHIFT);
+
+ return ((off+len+CEPH_FSCRYPT_BLOCK_SIZE-1) >> CEPH_FSCRYPT_BLOCK_SHIFT) -
+ (off >> CEPH_FSCRYPT_BLOCK_SHIFT);
+}
+
+/*
+ * If we have an encrypted inode then we must adjust the offset and
+ * range of the on-the-wire read to cover an entire encryption block.
+ * The copy will be done using the original offset and length, after
+ * we've decrypted the result.
+ */
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+ u64 *off, u64 *len)
+{
+ if (IS_ENCRYPTED(inode)) {
+ *len = ceph_fscrypt_blocks(*off, *len) * CEPH_FSCRYPT_BLOCK_SIZE;
+ *off &= CEPH_FSCRYPT_BLOCK_MASK;
+ }
+}
+
+int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num);
+int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num,
+ gfp_t gfp_flags);
+int ceph_fscrypt_decrypt_pages(struct inode *inode, struct page **page,
+ u64 off, int len);
+int ceph_fscrypt_decrypt_extents(struct inode *inode, struct page **page,
+ u64 off, struct ceph_sparse_extent *map,
+ u32 ext_cnt);
+int ceph_fscrypt_encrypt_pages(struct inode *inode, struct page **page, u64 off,
+ int len, gfp_t gfp);
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+ return fscrypt_is_bounce_page(page) ? fscrypt_pagecache_page(page) : page;
+}
+
+#else /* CONFIG_FS_ENCRYPTION */
+
+static inline void ceph_fscrypt_set_ops(struct super_block *sb)
+{
+}
+
+static inline void ceph_fscrypt_free_dummy_policy(struct ceph_fs_client *fsc)
+{
+}
+
+static inline int ceph_fscrypt_prepare_context(struct inode *dir,
+ struct inode *inode,
+ struct ceph_acl_sec_ctx *as)
+{
+ if (IS_ENCRYPTED(dir))
+ return -EOPNOTSUPP;
+ return 0;
+}
+
+static inline void ceph_fscrypt_as_ctx_to_req(struct ceph_mds_request *req,
+ struct ceph_acl_sec_ctx *as_ctx)
+{
+}
+
+static inline int ceph_encode_encrypted_dname(struct inode *parent,
+ struct qstr *d_name, char *buf)
+{
+ memcpy(buf, d_name->name, d_name->len);
+ return d_name->len;
+}
+
+static inline int ceph_encode_encrypted_fname(struct inode *parent,
+ struct dentry *dentry, char *buf)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int ceph_fname_alloc_buffer(struct inode *parent,
+ struct fscrypt_str *fname)
+{
+ return 0;
+}
+
+static inline void ceph_fname_free_buffer(struct inode *parent,
+ struct fscrypt_str *fname)
+{
+}
+
+static inline int ceph_fname_to_usr(const struct ceph_fname *fname,
+ struct fscrypt_str *tname,
+ struct fscrypt_str *oname, bool *is_nokey)
+{
+ oname->name = fname->name;
+ oname->len = fname->name_len;
+ return 0;
+}
+
+static inline int ceph_fscrypt_prepare_readdir(struct inode *dir)
+{
+ return 0;
+}
+
+static inline void ceph_fscrypt_adjust_off_and_len(struct inode *inode,
+ u64 *off, u64 *len)
+{
+}
+
+static inline int ceph_fscrypt_decrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num)
+{
+ return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_block_inplace(const struct inode *inode,
+ struct page *page, unsigned int len,
+ unsigned int offs, u64 lblk_num,
+ gfp_t gfp_flags)
+{
+ return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_pages(struct inode *inode,
+ struct page **page, u64 off,
+ int len)
+{
+ return 0;
+}
+
+static inline int ceph_fscrypt_decrypt_extents(struct inode *inode,
+ struct page **page, u64 off,
+ struct ceph_sparse_extent *map,
+ u32 ext_cnt)
+{
+ return 0;
+}
+
+static inline int ceph_fscrypt_encrypt_pages(struct inode *inode,
+ struct page **page, u64 off,
+ int len, gfp_t gfp)
+{
+ return 0;
+}
+
+static inline struct page *ceph_fscrypt_pagecache_page(struct page *page)
+{
+ return page;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
+static inline loff_t ceph_fscrypt_page_offset(struct page *page)
+{
+ return page_offset(ceph_fscrypt_pagecache_page(page));
+}
+
+#endif /* _CEPH_CRYPTO_H */
#include "super.h"
#include "mds_client.h"
+#include "crypto.h"
/*
* Directory operations: readdir, lookup, create, link, unlink,
di = ceph_dentry(dentry);
if (d_unhashed(dentry) ||
d_really_is_negative(dentry) ||
- di->lease_shared_gen != shared_gen) {
+ di->lease_shared_gen != shared_gen ||
+ ((dentry->d_flags & DCACHE_NOKEY_NAME) &&
+ fscrypt_has_encryption_key(dir))) {
spin_unlock(&dentry->d_lock);
dput(dentry);
err = -EAGAIN;
ctx->pos = 2;
}
+ err = ceph_fscrypt_prepare_readdir(inode);
+ if (err < 0)
+ return err;
+
spin_lock(&ci->i_ceph_lock);
/* request Fx cap. if have Fx, we don't need to release Fs cap
* for later create/unlink. */
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
+
err = ceph_alloc_readdir_reply_buffer(req, inode);
if (err) {
ceph_mdsc_put_request(req);
req->r_inode_drop = CEPH_CAP_FILE_EXCL;
}
if (dfi->last_name) {
- req->r_path2 = kstrdup(dfi->last_name, GFP_KERNEL);
+ struct qstr d_name = { .name = dfi->last_name,
+ .len = strlen(dfi->last_name) };
+
+ req->r_path2 = kzalloc(NAME_MAX + 1, GFP_KERNEL);
if (!req->r_path2) {
ceph_mdsc_put_request(req);
return -ENOMEM;
}
+
+ err = ceph_encode_encrypted_dname(inode, &d_name,
+ req->r_path2);
+ if (err < 0) {
+ ceph_mdsc_put_request(req);
+ return err;
+ }
} else if (is_hash_order(ctx->pos)) {
req->r_args.readdir.offset_hash =
cpu_to_le32(fpos_hash(ctx->pos));
for (; i < rinfo->dir_nr; i++) {
struct ceph_mds_reply_dir_entry *rde = rinfo->dir_entries + i;
- BUG_ON(rde->offset < ctx->pos);
+ if (rde->offset < ctx->pos) {
+ pr_warn("%s: rde->offset 0x%llx ctx->pos 0x%llx\n",
+ __func__, rde->offset, ctx->pos);
+ return -EIO;
+ }
+
+ if (WARN_ON_ONCE(!rde->inode.in))
+ return -EIO;
ctx->pos = rde->offset;
dout("readdir (%d/%d) -> %llx '%.*s' %p\n",
i, rinfo->dir_nr, ctx->pos,
rde->name_len, rde->name, &rde->inode.in);
- BUG_ON(!rde->inode.in);
-
if (!dir_emit(ctx, rde->name, rde->name_len,
ceph_present_ino(inode->i_sb, le64_to_cpu(rde->inode.in->ino)),
le32_to_cpu(rde->inode.in->mode) >> 12)) {
dout("filldir stopping us...\n");
return 0;
}
+
+ /* Reset the lengths to their original allocated vals */
ctx->pos++;
}
dfi->dir_ordered_count);
spin_unlock(&ci->i_ceph_lock);
}
-
dout("readdir %p file %p done.\n", inode, file);
return 0;
}
if (dentry->d_name.len > NAME_MAX)
return ERR_PTR(-ENAMETOOLONG);
+ if (IS_ENCRYPTED(dir)) {
+ bool had_key = fscrypt_has_encryption_key(dir);
+
+ err = fscrypt_prepare_lookup_partial(dir, dentry);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ /* mark directory as incomplete if it has been unlocked */
+ if (!had_key && fscrypt_has_encryption_key(dir))
+ ceph_dir_clear_complete(dir);
+ }
+
/* can we conclude ENOENT locally? */
if (d_really_is_negative(dentry)) {
struct ceph_inode_info *ci = ceph_inode(dir);
goto out;
}
- err = ceph_pre_init_acls(dir, &mode, &as_ctx);
- if (err < 0)
- goto out;
- err = ceph_security_init_secctx(dentry, mode, &as_ctx);
- if (err < 0)
- goto out;
-
dout("mknod in dir %p dentry %p mode 0%ho rdev %d\n",
dir, dentry, mode, rdev);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_MKNOD, USE_AUTH_MDS);
err = PTR_ERR(req);
goto out;
}
+
+ req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+ if (IS_ERR(req->r_new_inode)) {
+ err = PTR_ERR(req->r_new_inode);
+ req->r_new_inode = NULL;
+ goto out_req;
+ }
+
+ if (S_ISREG(mode) && IS_ENCRYPTED(dir))
+ set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_parent = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
- if (as_ctx.pagelist) {
- req->r_pagelist = as_ctx.pagelist;
- as_ctx.pagelist = NULL;
- }
+
+ ceph_as_ctx_to_req(req, &as_ctx);
+
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
+out_req:
ceph_mdsc_put_request(req);
out:
if (!err)
return ceph_mknod(idmap, dir, dentry, mode, 0);
}
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+ const char *dest)
+{
+ int err;
+ int len = strlen(dest);
+ struct fscrypt_str osd_link = FSTR_INIT(NULL, 0);
+
+ err = fscrypt_prepare_symlink(req->r_parent, dest, len, PATH_MAX,
+ &osd_link);
+ if (err)
+ goto out;
+
+ err = fscrypt_encrypt_symlink(req->r_new_inode, dest, len, &osd_link);
+ if (err)
+ goto out;
+
+ req->r_path2 = kmalloc(CEPH_BASE64_CHARS(osd_link.len) + 1, GFP_KERNEL);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ len = ceph_base64_encode(osd_link.name, osd_link.len, req->r_path2);
+ req->r_path2[len] = '\0';
+out:
+ fscrypt_fname_free_buffer(&osd_link);
+ return err;
+}
+#else
+static int prep_encrypted_symlink_target(struct ceph_mds_request *req,
+ const char *dest)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
static int ceph_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *dest)
{
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
struct ceph_acl_sec_ctx as_ctx = {};
+ umode_t mode = S_IFLNK | 0777;
int err;
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out;
}
- err = ceph_security_init_secctx(dentry, S_IFLNK | 0777, &as_ctx);
- if (err < 0)
- goto out;
-
dout("symlink in dir %p dentry %p to '%s'\n", dir, dentry, dest);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SYMLINK, USE_AUTH_MDS);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
}
- req->r_path2 = kstrdup(dest, GFP_KERNEL);
- if (!req->r_path2) {
- err = -ENOMEM;
- ceph_mdsc_put_request(req);
- goto out;
+
+ req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+ if (IS_ERR(req->r_new_inode)) {
+ err = PTR_ERR(req->r_new_inode);
+ req->r_new_inode = NULL;
+ goto out_req;
}
+
req->r_parent = dir;
ihold(dir);
+ if (IS_ENCRYPTED(req->r_new_inode)) {
+ err = prep_encrypted_symlink_target(req, dest);
+ if (err)
+ goto out_req;
+ } else {
+ req->r_path2 = kstrdup(dest, GFP_KERNEL);
+ if (!req->r_path2) {
+ err = -ENOMEM;
+ goto out_req;
+ }
+ }
+
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
- if (as_ctx.pagelist) {
- req->r_pagelist = as_ctx.pagelist;
- as_ctx.pagelist = NULL;
- }
+
+ ceph_as_ctx_to_req(req, &as_ctx);
+
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
+out_req:
ceph_mdsc_put_request(req);
out:
if (err)
err = -EDQUOT;
goto out;
}
-
- mode |= S_IFDIR;
- err = ceph_pre_init_acls(dir, &mode, &as_ctx);
- if (err < 0)
- goto out;
- err = ceph_security_init_secctx(dentry, mode, &as_ctx);
- if (err < 0)
+ if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
+ !fscrypt_has_encryption_key(dir)) {
+ err = -ENOKEY;
goto out;
+ }
+
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
if (IS_ERR(req)) {
goto out;
}
+ mode |= S_IFDIR;
+ req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+ if (IS_ERR(req->r_new_inode)) {
+ err = PTR_ERR(req->r_new_inode);
+ req->r_new_inode = NULL;
+ goto out_req;
+ }
+
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_parent = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
- if (as_ctx.pagelist) {
- req->r_pagelist = as_ctx.pagelist;
- as_ctx.pagelist = NULL;
- }
+
+ ceph_as_ctx_to_req(req, &as_ctx);
+
err = ceph_mdsc_do_request(mdsc, dir, req);
if (!err &&
!req->r_reply_info.head->is_target &&
!req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
+out_req:
ceph_mdsc_put_request(req);
out:
if (!err)
if (ceph_snap(dir) != CEPH_NOSNAP)
return -EROFS;
+ err = fscrypt_prepare_link(old_dentry, dir, dentry);
+ if (err)
+ return err;
+
dout("link in dir %p %llx.%llx old_dentry %p:'%pd' dentry %p:'%pd'\n",
dir, ceph_vinop(dir), old_dentry, old_dentry, dentry, dentry);
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LINK, USE_AUTH_MDS);
if (err)
return err;
+ err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
+ flags);
+ if (err)
+ return err;
+
dout("rename dir %p dentry %p to dir %p dentry %p\n",
old_dir, old_dentry, new_dir, new_dentry);
req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
struct inode *dir, *inode;
struct ceph_mds_client *mdsc;
+ valid = fscrypt_d_revalidate(dentry, flags);
+ if (valid <= 0)
+ return valid;
+
if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
inode = d_inode(dentry);
}
- dout("d_revalidate %p '%pd' inode %p offset 0x%llx\n", dentry,
- dentry, inode, ceph_dentry(dentry)->offset);
+ dout("d_revalidate %p '%pd' inode %p offset 0x%llx nokey %d\n", dentry,
+ dentry, inode, ceph_dentry(dentry)->offset,
+ !!(dentry->d_flags & DCACHE_NOKEY_NAME));
mdsc = ceph_sb_to_client(dir->i_sb)->mdsc;
#include "super.h"
#include "mds_client.h"
+#include "crypto.h"
/*
* Basic fh
{
struct ceph_mds_client *mdsc;
struct ceph_mds_request *req;
+ struct inode *dir = d_inode(parent);
struct inode *inode = d_inode(child);
+ struct ceph_mds_reply_info_parsed *rinfo;
int err;
if (ceph_snap(inode) != CEPH_NOSNAP)
if (IS_ERR(req))
return PTR_ERR(req);
- inode_lock(d_inode(parent));
-
+ inode_lock(dir);
req->r_inode = inode;
ihold(inode);
req->r_ino2 = ceph_vino(d_inode(parent));
- req->r_parent = d_inode(parent);
- ihold(req->r_parent);
+ req->r_parent = dir;
+ ihold(dir);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ inode_unlock(dir);
- inode_unlock(d_inode(parent));
+ if (err)
+ goto out;
- if (!err) {
- struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
+ rinfo = &req->r_reply_info;
+ if (!IS_ENCRYPTED(dir)) {
memcpy(name, rinfo->dname, rinfo->dname_len);
name[rinfo->dname_len] = 0;
- dout("get_name %p ino %llx.%llx name %s\n",
- child, ceph_vinop(inode), name);
} else {
- dout("get_name %p ino %llx.%llx err %d\n",
- child, ceph_vinop(inode), err);
- }
+ struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+ struct ceph_fname fname = { .dir = dir,
+ .name = rinfo->dname,
+ .ctext = rinfo->altname,
+ .name_len = rinfo->dname_len,
+ .ctext_len = rinfo->altname_len };
+
+ err = ceph_fname_alloc_buffer(dir, &oname);
+ if (err < 0)
+ goto out;
+ err = ceph_fname_to_usr(&fname, NULL, &oname, NULL);
+ if (!err) {
+ memcpy(name, oname.name, oname.len);
+ name[oname.len] = 0;
+ }
+ ceph_fname_free_buffer(dir, &oname);
+ }
+out:
+ dout("get_name %p ino %llx.%llx err %d %s%s\n",
+ child, ceph_vinop(inode), err,
+ err ? "" : "name ", err ? "" : name);
ceph_mdsc_put_request(req);
return err;
}
/* filter out O_CREAT|O_EXCL; vfs did that already. yuck. */
flags = file->f_flags & ~(O_CREAT|O_EXCL);
- if (S_ISDIR(inode->i_mode))
+ if (S_ISDIR(inode->i_mode)) {
flags = O_DIRECTORY; /* mds likes to know */
+ } else if (S_ISREG(inode->i_mode)) {
+ err = fscrypt_file_open(inode, file);
+ if (err)
+ return err;
+ }
dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
ceph_vinop(inode), file, flags, file->f_flags);
ceph_mdsc_release_dir_caps(req);
}
-static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
+static int ceph_finish_async_create(struct inode *dir, struct inode *inode,
+ struct dentry *dentry,
struct file *file, umode_t mode,
struct ceph_mds_request *req,
struct ceph_acl_sec_ctx *as_ctx,
struct ceph_mds_reply_info_in iinfo = { .in = &in };
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di = ceph_dentry(dentry);
- struct inode *inode;
struct timespec64 now;
struct ceph_string *pool_ns;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
ktime_get_real_ts64(&now);
- inode = ceph_get_inode(dentry->d_sb, vino);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
-
iinfo.inline_version = CEPH_INLINE_NONE;
iinfo.change_attr = 1;
ceph_encode_timespec64(&iinfo.btime, &now);
ceph_dir_clear_complete(dir);
if (!d_unhashed(dentry))
d_drop(dentry);
- if (inode->i_state & I_NEW)
- discard_new_inode(inode);
+ discard_new_inode(inode);
} else {
struct dentry *dn;
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
+ struct inode *new_inode = NULL;
struct dentry *dn;
struct ceph_acl_sec_ctx as_ctx = {};
bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
*/
flags &= ~O_TRUNC;
+retry:
if (flags & O_CREAT) {
if (ceph_quota_is_max_files_exceeded(dir))
return -EDQUOT;
- err = ceph_pre_init_acls(dir, &mode, &as_ctx);
- if (err < 0)
- return err;
- err = ceph_security_init_secctx(dentry, mode, &as_ctx);
- if (err < 0)
+
+ new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
+ if (IS_ERR(new_inode)) {
+ err = PTR_ERR(new_inode);
goto out_ctx;
+ }
/* Async create can't handle more than a page of xattrs */
if (as_ctx.pagelist &&
!list_is_singular(&as_ctx.pagelist->head))
/* If it's not being looked up, it's negative */
return -ENOENT;
}
-retry:
+
/* do the open */
req = prepare_open_request(dir->i_sb, flags, mode);
if (IS_ERR(req)) {
req->r_args.open.mask = cpu_to_le32(mask);
req->r_parent = dir;
ihold(dir);
+ if (IS_ENCRYPTED(dir)) {
+ set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+ err = fscrypt_prepare_lookup_partial(dir, dentry);
+ if (err < 0)
+ goto out_req;
+ }
if (flags & O_CREAT) {
struct ceph_file_layout lo;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL |
CEPH_CAP_XATTR_EXCL;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
- if (as_ctx.pagelist) {
- req->r_pagelist = as_ctx.pagelist;
- as_ctx.pagelist = NULL;
- }
- if (try_async &&
- (req->r_dir_caps =
- try_prep_async_create(dir, dentry, &lo,
- &req->r_deleg_ino))) {
+
+ ceph_as_ctx_to_req(req, &as_ctx);
+
+ if (try_async && (req->r_dir_caps =
+ try_prep_async_create(dir, dentry, &lo,
+ &req->r_deleg_ino))) {
+ struct ceph_vino vino = { .ino = req->r_deleg_ino,
+ .snap = CEPH_NOSNAP };
struct ceph_dentry_info *di = ceph_dentry(dentry);
set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
req->r_callback = ceph_async_create_cb;
+ /* Hash inode before RPC */
+ new_inode = ceph_get_inode(dir->i_sb, vino, new_inode);
+ if (IS_ERR(new_inode)) {
+ err = PTR_ERR(new_inode);
+ new_inode = NULL;
+ goto out_req;
+ }
+ WARN_ON_ONCE(!(new_inode->i_state & I_NEW));
+
spin_lock(&dentry->d_lock);
di->flags |= CEPH_DENTRY_ASYNC_CREATE;
spin_unlock(&dentry->d_lock);
err = ceph_mdsc_submit_request(mdsc, dir, req);
if (!err) {
- err = ceph_finish_async_create(dir, dentry,
- file, mode, req,
- &as_ctx, &lo);
+ err = ceph_finish_async_create(dir, new_inode,
+ dentry, file,
+ mode, req,
+ &as_ctx, &lo);
+ new_inode = NULL;
} else if (err == -EJUKEBOX) {
restore_deleg_ino(dir, req->r_deleg_ino);
ceph_mdsc_put_request(req);
+ discard_new_inode(new_inode);
+ ceph_release_acl_sec_ctx(&as_ctx);
+ memset(&as_ctx, 0, sizeof(as_ctx));
+ new_inode = NULL;
try_async = false;
ceph_put_string(rcu_dereference_raw(lo.pool_ns));
goto retry;
}
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
+ req->r_new_inode = new_inode;
+ new_inode = NULL;
err = ceph_mdsc_do_request(mdsc, (flags & O_CREAT) ? dir : NULL, req);
if (err == -ENOENT) {
dentry = ceph_handle_snapdir(req, dentry);
dout("atomic_open finish_no_open on dn %p\n", dn);
err = finish_no_open(file, dn);
} else {
+ if (IS_ENCRYPTED(dir) &&
+ !fscrypt_has_permitted_context(dir, d_inode(dentry))) {
+ pr_warn("Inconsistent encryption context (parent %llx:%llx child %llx:%llx)\n",
+ ceph_vinop(dir), ceph_vinop(d_inode(dentry)));
+ goto out_req;
+ }
+
dout("atomic_open finish_open on dn %p\n", dn);
if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
struct inode *newino = d_inode(dentry);
}
out_req:
ceph_mdsc_put_request(req);
+ iput(new_inode);
out_ctx:
ceph_release_acl_sec_ctx(&as_ctx);
dout("atomic_open result=%d\n", err);
* If we get a short result from the OSD, check against i_size; we need to
* only return a short read to the caller if we hit EOF.
*/
-static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
- int *retry_op)
+ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+ struct iov_iter *to, int *retry_op,
+ u64 *last_objver)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_client *osdc = &fsc->client->osdc;
ssize_t ret;
- u64 off = iocb->ki_pos;
+ u64 off = *ki_pos;
u64 len = iov_iter_count(to);
u64 i_size = i_size_read(inode);
+ bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
+ u64 objver = 0;
- dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
- (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+ dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
+
+ if (ceph_inode_is_shutdown(inode))
+ return -EIO;
if (!len)
return 0;
bool more;
int idx;
size_t left;
+ struct ceph_osd_req_op *op;
+ u64 read_off = off;
+ u64 read_len = len;
+
+ /* determine new offset/length if encrypted */
+ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+
+ dout("sync_read orig %llu~%llu reading %llu~%llu",
+ off, len, read_off, read_len);
req = ceph_osdc_new_request(osdc, &ci->i_layout,
- ci->i_vino, off, &len, 0, 1,
- CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+ ci->i_vino, read_off, &read_len, 0, 1,
+ sparse ? CEPH_OSD_OP_SPARSE_READ :
+ CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
if (IS_ERR(req)) {
break;
}
+ /* adjust len downward if the request truncated the len */
+ if (off + len > read_off + read_len)
+ len = read_off + read_len - off;
more = len < iov_iter_count(to);
- num_pages = calc_pages_for(off, len);
- page_off = off & ~PAGE_MASK;
+ num_pages = calc_pages_for(read_off, read_len);
+ page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
break;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
+ osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
+ offset_in_page(read_off),
false, false);
+
+ op = &req->r_ops[0];
+ if (sparse) {
+ ret = ceph_alloc_sparse_ext_map(op);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
+
ceph_osdc_start_request(osdc, req);
ret = ceph_osdc_wait_request(osdc, req);
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
- len, ret);
+ read_len, ret);
- ceph_osdc_put_request(req);
+ if (ret > 0)
+ objver = req->r_version;
i_size = i_size_read(inode);
dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
off, len, ret, i_size, (more ? " MORE" : ""));
- if (ret == -ENOENT)
+ /* Fix it to go to end of extent map */
+ if (sparse && ret >= 0)
+ ret = ceph_sparse_ext_map_end(op);
+ else if (ret == -ENOENT)
ret = 0;
+
+ if (ret > 0 && IS_ENCRYPTED(inode)) {
+ int fret;
+
+ fret = ceph_fscrypt_decrypt_extents(inode, pages,
+ read_off, op->extent.sparse_ext,
+ op->extent.sparse_ext_cnt);
+ if (fret < 0) {
+ ret = fret;
+ ceph_osdc_put_request(req);
+ break;
+ }
+
+ /* account for any partial block at the beginning */
+ fret -= (off - read_off);
+
+ /*
+ * Short read after big offset adjustment?
+ * Nothing is usable, just call it a zero
+ * len read.
+ */
+ fret = max(fret, 0);
+
+ /* account for partial block at the end */
+ ret = min_t(ssize_t, fret, len);
+ }
+
+ ceph_osdc_put_request(req);
+
+ /* Short read but not EOF? Zero out the remainder. */
if (ret >= 0 && ret < len && (off + ret < i_size)) {
int zlen = min(len - ret, i_size - off - ret);
int zoff = page_off + ret;
+
dout("sync_read zero gap %llu~%llu\n",
- off + ret, off + ret + zlen);
+ off + ret, off + ret + zlen);
ceph_zero_page_vector_range(zoff, zlen, pages);
ret += zlen;
}
idx = 0;
left = ret > 0 ? ret : 0;
while (left > 0) {
- size_t len, copied;
- page_off = off & ~PAGE_MASK;
- len = min_t(size_t, left, PAGE_SIZE - page_off);
+ size_t plen, copied;
+
+ plen = min_t(size_t, left, PAGE_SIZE - page_off);
SetPageUptodate(pages[idx]);
copied = copy_page_to_iter(pages[idx++],
- page_off, len, to);
+ page_off, plen, to);
off += copied;
left -= copied;
- if (copied < len) {
+ page_off = 0;
+ if (copied < plen) {
ret = -EFAULT;
break;
}
break;
}
- if (off > iocb->ki_pos) {
- if (off >= i_size) {
- *retry_op = CHECK_EOF;
- ret = i_size - iocb->ki_pos;
- iocb->ki_pos = i_size;
- } else {
- ret = off - iocb->ki_pos;
- iocb->ki_pos = off;
+ if (ret > 0) {
+ if (off > *ki_pos) {
+ if (off >= i_size) {
+ *retry_op = CHECK_EOF;
+ ret = i_size - *ki_pos;
+ *ki_pos = i_size;
+ } else {
+ ret = off - *ki_pos;
+ *ki_pos = off;
+ }
}
- }
+ if (last_objver)
+ *last_objver = objver;
+ }
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
return ret;
}
+static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
+ int *retry_op)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+
+ dout("sync_read on file %p %llx~%zx %s\n", file, iocb->ki_pos,
+ iov_iter_count(to), (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
+
+ return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
+}
+
struct ceph_aio_request {
struct kiocb *iocb;
size_t total_len;
struct inode *inode = req->r_inode;
struct ceph_aio_request *aio_req = req->r_priv;
struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ struct ceph_osd_req_op *op = &req->r_ops[0];
struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
unsigned int len = osd_data->bvec_pos.iter.bi_size;
+ bool sparse = (op->op == CEPH_OSD_OP_SPARSE_READ);
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
BUG_ON(!osd_data->num_bvecs);
}
rc = -ENOMEM;
} else if (!aio_req->write) {
+ if (sparse && rc >= 0)
+ rc = ceph_sparse_ext_map_end(op);
if (rc == -ENOENT)
rc = 0;
if (rc >= 0 && len > rc) {
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
bool should_dirty = !write && user_backed_iter(iter);
+ bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
return -EROFS;
while (iov_iter_count(iter) > 0) {
u64 size = iov_iter_count(iter);
ssize_t len;
+ struct ceph_osd_req_op *op;
+ int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ;
if (write)
size = min_t(u64, size, fsc->mount_options->wsize);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
vino, pos, &size, 0,
1,
- write ? CEPH_OSD_OP_WRITE :
- CEPH_OSD_OP_READ,
+ write ? CEPH_OSD_OP_WRITE : readop,
flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
}
osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
+ op = &req->r_ops[0];
+ if (sparse) {
+ ret = ceph_alloc_sparse_ext_map(op);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ break;
+ }
+ }
if (aio_req) {
aio_req->total_len += len;
size = i_size_read(inode);
if (!write) {
- if (ret == -ENOENT)
+ if (sparse && ret >= 0)
+ ret = ceph_sparse_ext_map_end(op);
+ else if (ret == -ENOENT)
ret = 0;
+
if (ret >= 0 && ret < len && pos + ret < size) {
struct iov_iter i;
int zlen = min_t(size_t, len - ret,
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_vino vino;
+ struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
struct page **pages;
u64 len;
int num_pages;
int written = 0;
- int flags;
int ret;
bool check_caps = false;
struct timespec64 mtime = current_time(inode);
return ret;
ceph_fscache_invalidate(inode, false);
- ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_SHIFT,
- (pos + count - 1) >> PAGE_SHIFT);
- if (ret < 0)
- dout("invalidate_inode_pages2_range returned %d\n", ret);
-
- flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
while ((len = iov_iter_count(from)) > 0) {
size_t left;
int n;
-
- vino = ceph_vino(inode);
- req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 0, 1,
- CEPH_OSD_OP_WRITE, flags, snapc,
- ci->i_truncate_seq,
- ci->i_truncate_size,
- false);
- if (IS_ERR(req)) {
- ret = PTR_ERR(req);
- break;
- }
+ u64 write_pos = pos;
+ u64 write_len = len;
+ u64 objnum, objoff;
+ u32 xlen;
+ u64 assert_ver = 0;
+ bool rmw;
+ bool first, last;
+ struct iov_iter saved_iter = *from;
+ size_t off;
+
+ ceph_fscrypt_adjust_off_and_len(inode, &write_pos, &write_len);
+
+ /* clamp the length to the end of first object */
+ ceph_calc_file_object_mapping(&ci->i_layout, write_pos,
+ write_len, &objnum, &objoff,
+ &xlen);
+ write_len = xlen;
+
+ /* adjust len downward if it goes beyond current object */
+ if (pos + len > write_pos + write_len)
+ len = write_pos + write_len - pos;
/*
- * write from beginning of first page,
- * regardless of io alignment
+ * If we had to adjust the length or position to align with a
+ * crypto block, then we must do a read/modify/write cycle. We
+ * use a version assertion to redrive the thing if something
+ * changes in between.
*/
- num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ first = pos != write_pos;
+ last = (pos + len) != (write_pos + write_len);
+ rmw = first || last;
+
+ dout("sync_write ino %llx %lld~%llu adjusted %lld~%llu -- %srmw\n",
+ ci->i_vino.ino, pos, len, write_pos, write_len,
+ rmw ? "" : "no ");
+ /*
+ * The data is emplaced into the page as it would be if it were
+ * in an array of pagecache pages.
+ */
+ num_pages = calc_pages_for(write_pos, write_len);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
ret = PTR_ERR(pages);
- goto out;
+ break;
+ }
+
+ /* Do we need to preload the pages? */
+ if (rmw) {
+ u64 first_pos = write_pos;
+ u64 last_pos = (write_pos + write_len) - CEPH_FSCRYPT_BLOCK_SIZE;
+ u64 read_len = CEPH_FSCRYPT_BLOCK_SIZE;
+ struct ceph_osd_req_op *op;
+
+ /* We should only need to do this for encrypted inodes */
+ WARN_ON_ONCE(!IS_ENCRYPTED(inode));
+
+ /* No need to do two reads if first and last blocks are same */
+ if (first && last_pos == first_pos)
+ last = false;
+
+ /*
+ * Allocate a read request for one or two extents,
+ * depending on how the request was aligned.
+ */
+ req = ceph_osdc_new_request(osdc, &ci->i_layout,
+ ci->i_vino, first ? first_pos : last_pos,
+ &read_len, 0, (first && last) ? 2 : 1,
+ CEPH_OSD_OP_SPARSE_READ, CEPH_OSD_FLAG_READ,
+ NULL, ci->i_truncate_seq,
+ ci->i_truncate_size, false);
+ if (IS_ERR(req)) {
+ ceph_release_page_vector(pages, num_pages);
+ ret = PTR_ERR(req);
+ break;
+ }
+
+ /* Something is misaligned! */
+ if (read_len != CEPH_FSCRYPT_BLOCK_SIZE) {
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ ret = -EIO;
+ break;
+ }
+
+ /* Add extent for first block? */
+ op = &req->r_ops[0];
+
+ if (first) {
+ osd_req_op_extent_osd_data_pages(req, 0, pages,
+ CEPH_FSCRYPT_BLOCK_SIZE,
+ offset_in_page(first_pos),
+ false, false);
+ /* We only expect a single extent here */
+ ret = __ceph_alloc_sparse_ext_map(op, 1);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ }
+
+ /* Add extent for last block */
+ if (last) {
+ /* Init the other extent if first extent has been used */
+ if (first) {
+ op = &req->r_ops[1];
+ osd_req_op_extent_init(req, 1,
+ CEPH_OSD_OP_SPARSE_READ,
+ last_pos, CEPH_FSCRYPT_BLOCK_SIZE,
+ ci->i_truncate_size,
+ ci->i_truncate_seq);
+ }
+
+ ret = __ceph_alloc_sparse_ext_map(op, 1);
+ if (ret) {
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+
+ osd_req_op_extent_osd_data_pages(req, first ? 1 : 0,
+ &pages[num_pages - 1],
+ CEPH_FSCRYPT_BLOCK_SIZE,
+ offset_in_page(last_pos),
+ false, false);
+ }
+
+ ceph_osdc_start_request(osdc, req);
+ ret = ceph_osdc_wait_request(osdc, req);
+
+ /* FIXME: length field is wrong if there are 2 extents */
+ ceph_update_read_metrics(&fsc->mdsc->metric,
+ req->r_start_latency,
+ req->r_end_latency,
+ read_len, ret);
+
+ /* Ok if object is not already present */
+ if (ret == -ENOENT) {
+ /*
+ * If there is no object, then we can't assert
+ * on its version. Set it to 0, and we'll use an
+ * exclusive create instead.
+ */
+ ceph_osdc_put_request(req);
+ ret = 0;
+
+ /*
+ * zero out the soon-to-be uncopied parts of the
+ * first and last pages.
+ */
+ if (first)
+ zero_user_segment(pages[0], 0,
+ offset_in_page(first_pos));
+ if (last)
+ zero_user_segment(pages[num_pages - 1],
+ offset_in_page(last_pos),
+ PAGE_SIZE);
+ } else {
+ if (ret < 0) {
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+
+ op = &req->r_ops[0];
+ if (op->extent.sparse_ext_cnt == 0) {
+ if (first)
+ zero_user_segment(pages[0], 0,
+ offset_in_page(first_pos));
+ else
+ zero_user_segment(pages[num_pages - 1],
+ offset_in_page(last_pos),
+ PAGE_SIZE);
+ } else if (op->extent.sparse_ext_cnt != 1 ||
+ ceph_sparse_ext_map_end(op) !=
+ CEPH_FSCRYPT_BLOCK_SIZE) {
+ ret = -EIO;
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+
+ if (first && last) {
+ op = &req->r_ops[1];
+ if (op->extent.sparse_ext_cnt == 0) {
+ zero_user_segment(pages[num_pages - 1],
+ offset_in_page(last_pos),
+ PAGE_SIZE);
+ } else if (op->extent.sparse_ext_cnt != 1 ||
+ ceph_sparse_ext_map_end(op) !=
+ CEPH_FSCRYPT_BLOCK_SIZE) {
+ ret = -EIO;
+ ceph_osdc_put_request(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ }
+
+ /* Grab assert version. It must be non-zero. */
+ assert_ver = req->r_version;
+ WARN_ON_ONCE(ret > 0 && assert_ver == 0);
+
+ ceph_osdc_put_request(req);
+ if (first) {
+ ret = ceph_fscrypt_decrypt_block_inplace(inode,
+ pages[0], CEPH_FSCRYPT_BLOCK_SIZE,
+ offset_in_page(first_pos),
+ first_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+ if (ret < 0) {
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ }
+ if (last) {
+ ret = ceph_fscrypt_decrypt_block_inplace(inode,
+ pages[num_pages - 1],
+ CEPH_FSCRYPT_BLOCK_SIZE,
+ offset_in_page(last_pos),
+ last_pos >> CEPH_FSCRYPT_BLOCK_SHIFT);
+ if (ret < 0) {
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ }
+ }
}
left = len;
+ off = offset_in_page(pos);
for (n = 0; n < num_pages; n++) {
- size_t plen = min_t(size_t, left, PAGE_SIZE);
- ret = copy_page_from_iter(pages[n], 0, plen, from);
+ size_t plen = min_t(size_t, left, PAGE_SIZE - off);
+
+ /* copy the data */
+ ret = copy_page_from_iter(pages[n], off, plen, from);
if (ret != plen) {
ret = -EFAULT;
break;
}
+ off = 0;
left -= ret;
}
-
if (ret < 0) {
+ dout("sync_write write failed with %d\n", ret);
ceph_release_page_vector(pages, num_pages);
- goto out;
+ break;
}
- req->r_inode = inode;
+ if (IS_ENCRYPTED(inode)) {
+ ret = ceph_fscrypt_encrypt_pages(inode, pages,
+ write_pos, write_len,
+ GFP_KERNEL);
+ if (ret < 0) {
+ dout("encryption failed with %d\n", ret);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ }
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
- false, true);
+ req = ceph_osdc_new_request(osdc, &ci->i_layout,
+ ci->i_vino, write_pos, &write_len,
+ rmw ? 1 : 0, rmw ? 2 : 1,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE,
+ snapc, ci->i_truncate_seq,
+ ci->i_truncate_size, false);
+ if (IS_ERR(req)) {
+ ret = PTR_ERR(req);
+ ceph_release_page_vector(pages, num_pages);
+ break;
+ }
+ dout("sync_write write op %lld~%llu\n", write_pos, write_len);
+ osd_req_op_extent_osd_data_pages(req, rmw ? 1 : 0, pages, write_len,
+ offset_in_page(write_pos), false,
+ true);
+ req->r_inode = inode;
req->r_mtime = mtime;
- ceph_osdc_start_request(&fsc->client->osdc, req);
- ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
+
+ /* Set up the assertion */
+ if (rmw) {
+ /*
+ * Set up the assertion. If we don't have a version
+ * number, then the object doesn't exist yet. Use an
+ * exclusive create instead of a version assertion in
+ * that case.
+ */
+ if (assert_ver) {
+ osd_req_op_init(req, 0, CEPH_OSD_OP_ASSERT_VER, 0);
+ req->r_ops[0].assert_ver.ver = assert_ver;
+ } else {
+ osd_req_op_init(req, 0, CEPH_OSD_OP_CREATE,
+ CEPH_OSD_OP_FLAG_EXCL);
+ }
+ }
+
+ ceph_osdc_start_request(osdc, req);
+ ret = ceph_osdc_wait_request(osdc, req);
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, len, ret);
-out:
ceph_osdc_put_request(req);
if (ret != 0) {
+ dout("sync_write osd write returned %d\n", ret);
+ /* Version changed! Must re-do the rmw cycle */
+ if ((assert_ver && (ret == -ERANGE || ret == -EOVERFLOW)) ||
+ (!assert_ver && ret == -EEXIST)) {
+ /* We should only ever see this on a rmw */
+ WARN_ON_ONCE(!rmw);
+
+ /* The version should never go backward */
+ WARN_ON_ONCE(ret == -EOVERFLOW);
+
+ *from = saved_iter;
+
+ /* FIXME: limit number of times we loop? */
+ continue;
+ }
ceph_set_error_write(ci);
break;
}
ceph_clear_error_write(ci);
+
+ /*
+ * We successfully wrote to a range of the file. Declare
+ * that region of the pagecache invalid.
+ */
+ ret = invalidate_inode_pages2_range(
+ inode->i_mapping,
+ pos >> PAGE_SHIFT,
+ (pos + len - 1) >> PAGE_SHIFT);
+ if (ret < 0) {
+ dout("invalidate_inode_pages2_range returned %d\n",
+ ret);
+ ret = 0;
+ }
pos += len;
written += len;
+ dout("sync_write written %d\n", written);
if (pos > i_size_read(inode)) {
check_caps = ceph_inode_set_size(inode, pos);
if (check_caps)
ret = written;
iocb->ki_pos = pos;
}
+ dout("sync_write returning %d\n", ret);
return ret;
}
ceph_cap_string(got));
if (!ceph_has_inline_data(ci)) {
- if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
+ if (!retry_op &&
+ (iocb->ki_flags & IOCB_DIRECT) &&
+ !IS_ENCRYPTED(inode)) {
ret = ceph_direct_read_write(iocb, to,
NULL, NULL);
if (ret >= 0 && ret < len)
/* we might need to revert back to that point */
data = *from;
- if (iocb->ki_flags & IOCB_DIRECT)
+ if ((iocb->ki_flags & IOCB_DIRECT) && !IS_ENCRYPTED(inode))
written = ceph_direct_read_write(iocb, &data, snapc,
&prealloc_cf);
else
if (!S_ISREG(inode->i_mode))
return -EOPNOTSUPP;
+ if (IS_ENCRYPTED(inode))
+ return -EOPNOTSUPP;
+
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;
return -EOPNOTSUPP;
}
+ /* Every encrypted inode gets its own key, so we can't offload them */
+ if (IS_ENCRYPTED(src_inode) || IS_ENCRYPTED(dst_inode))
+ return -EOPNOTSUPP;
+
if (len < src_ci->i_layout.object_size)
return -EOPNOTSUPP; /* no remote copy will be done */
#include <linux/random.h>
#include <linux/sort.h>
#include <linux/iversion.h>
+#include <linux/fscrypt.h>
#include "super.h"
#include "mds_client.h"
#include "cache.h"
+#include "crypto.h"
#include <linux/ceph/decode.h>
/*
*/
static const struct inode_operations ceph_symlink_iops;
+static const struct inode_operations ceph_encrypted_symlink_iops;
static void ceph_inode_work(struct work_struct *work);
return 0;
}
-struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino)
+/**
+ * ceph_new_inode - allocate a new inode in advance of an expected create
+ * @dir: parent directory for new inode
+ * @dentry: dentry that may eventually point to new inode
+ * @mode: mode of new inode
+ * @as_ctx: pointer to inherited security context
+ *
+ * Allocate a new inode in advance of an operation to create a new inode.
+ * This allocates the inode and sets up the acl_sec_ctx with appropriate
+ * info for the new inode.
+ *
+ * Returns a pointer to the new inode or an ERR_PTR.
+ */
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+ umode_t *mode, struct ceph_acl_sec_ctx *as_ctx)
+{
+ int err;
+ struct inode *inode;
+
+ inode = new_inode(dir->i_sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ if (!S_ISLNK(*mode)) {
+ err = ceph_pre_init_acls(dir, mode, as_ctx);
+ if (err < 0)
+ goto out_err;
+ }
+
+ inode->i_state = 0;
+ inode->i_mode = *mode;
+
+ err = ceph_security_init_secctx(dentry, *mode, as_ctx);
+ if (err < 0)
+ goto out_err;
+
+ /*
+ * We'll skip setting fscrypt context for snapshots, leaving that for
+ * the handle_reply().
+ */
+ if (ceph_snap(dir) != CEPH_SNAPDIR) {
+ err = ceph_fscrypt_prepare_context(dir, inode, as_ctx);
+ if (err)
+ goto out_err;
+ }
+
+ return inode;
+out_err:
+ iput(inode);
+ return ERR_PTR(err);
+}
+
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+ struct ceph_acl_sec_ctx *as_ctx)
+{
+ if (as_ctx->pagelist) {
+ req->r_pagelist = as_ctx->pagelist;
+ as_ctx->pagelist = NULL;
+ }
+ ceph_fscrypt_as_ctx_to_req(req, as_ctx);
+}
+
+/**
+ * ceph_get_inode - find or create/hash a new inode
+ * @sb: superblock to search and allocate in
+ * @vino: vino to search for
+ * @newino: optional new inode to insert if one isn't found (may be NULL)
+ *
+ * Search for or insert a new inode into the hash for the given vino, and
+ * return a reference to it. If new is non-NULL, its reference is consumed.
+ */
+struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino,
+ struct inode *newino)
{
struct inode *inode;
if (ceph_vino_is_reserved(vino))
return ERR_PTR(-EREMOTEIO);
- inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
- ceph_set_ino_cb, &vino);
- if (!inode)
+ if (newino) {
+ inode = inode_insert5(newino, (unsigned long)vino.ino,
+ ceph_ino_compare, ceph_set_ino_cb, &vino);
+ if (inode != newino)
+ iput(newino);
+ } else {
+ inode = iget5_locked(sb, (unsigned long)vino.ino,
+ ceph_ino_compare, ceph_set_ino_cb, &vino);
+ }
+
+ if (!inode) {
+ dout("No inode found for %llx.%llx\n", vino.ino, vino.snap);
return ERR_PTR(-ENOMEM);
+ }
dout("get_inode on %llu=%llx.%llx got %p new %d\n", ceph_present_inode(inode),
ceph_vinop(inode), inode, !!(inode->i_state & I_NEW));
.ino = ceph_ino(parent),
.snap = CEPH_SNAPDIR,
};
- struct inode *inode = ceph_get_inode(parent->i_sb, vino);
+ struct inode *inode = ceph_get_inode(parent->i_sb, vino, NULL);
struct ceph_inode_info *ci = ceph_inode(inode);
+ int ret = -ENOTDIR;
if (IS_ERR(inode))
return inode;
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
+#ifdef CONFIG_FS_ENCRYPTION
+ /* if encrypted, just borrow fscrypt_auth from parent */
+ if (IS_ENCRYPTED(parent)) {
+ struct ceph_inode_info *pci = ceph_inode(parent);
+
+ ci->fscrypt_auth = kmemdup(pci->fscrypt_auth,
+ pci->fscrypt_auth_len,
+ GFP_KERNEL);
+ if (ci->fscrypt_auth) {
+ inode->i_flags |= S_ENCRYPTED;
+ ci->fscrypt_auth_len = pci->fscrypt_auth_len;
+ } else {
+ dout("Failed to alloc snapdir fscrypt_auth\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
+#endif
if (inode->i_state & I_NEW) {
inode->i_op = &ceph_snapdir_iops;
inode->i_fop = &ceph_snapdir_fops;
discard_new_inode(inode);
else
iput(inode);
- return ERR_PTR(-ENOTDIR);
+ return ERR_PTR(ret);
}
const struct inode_operations ceph_file_iops = {
ci->i_truncate_seq = 0;
ci->i_truncate_size = 0;
ci->i_truncate_pending = 0;
+ ci->i_truncate_pagecache_size = 0;
ci->i_max_size = 0;
ci->i_reported_size = 0;
INIT_WORK(&ci->i_work, ceph_inode_work);
ci->i_work_mask = 0;
memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
+#ifdef CONFIG_FS_ENCRYPTION
+ ci->fscrypt_auth = NULL;
+ ci->fscrypt_auth_len = 0;
+#endif
return &ci->netfs.inode;
}
struct ceph_inode_info *ci = ceph_inode(inode);
kfree(ci->i_symlink);
+#ifdef CONFIG_FS_ENCRYPTION
+ kfree(ci->fscrypt_auth);
+#endif
+ fscrypt_free_inode(inode);
kmem_cache_free(ceph_inode_cachep, ci);
}
clear_inode(inode);
ceph_fscache_unregister_inode_cookie(ci);
+ fscrypt_put_encryption_info(inode);
__ceph_remove_caps(ci);
ceph_fscache_update(inode);
ci->i_reported_size = size;
if (truncate_seq != ci->i_truncate_seq) {
- dout("truncate_seq %u -> %u\n",
+ dout("%s truncate_seq %u -> %u\n", __func__,
ci->i_truncate_seq, truncate_seq);
ci->i_truncate_seq = truncate_seq;
}
}
}
- if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0 &&
- ci->i_truncate_size != truncate_size) {
- dout("truncate_size %lld -> %llu\n", ci->i_truncate_size,
- truncate_size);
+
+ /*
+ * It's possible that the new sizes of the two consecutive
+ * size truncations will be in the same fscrypt last block,
+ * and we need to truncate the corresponding page caches
+ * anyway.
+ */
+ if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) >= 0) {
+ dout("%s truncate_size %lld -> %llu, encrypted %d\n", __func__,
+ ci->i_truncate_size, truncate_size, !!IS_ENCRYPTED(inode));
+
ci->i_truncate_size = truncate_size;
+
+ if (IS_ENCRYPTED(inode)) {
+ dout("%s truncate_pagecache_size %lld -> %llu\n",
+ __func__, ci->i_truncate_pagecache_size, size);
+ ci->i_truncate_pagecache_size = size;
+ } else {
+ ci->i_truncate_pagecache_size = truncate_size;
+ }
}
return queue_trunc;
}
inode, time_warp_seq, ci->i_time_warp_seq);
}
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static int decode_encrypted_symlink(const char *encsym, int enclen, u8 **decsym)
+{
+ int declen;
+ u8 *sym;
+
+ sym = kmalloc(enclen + 1, GFP_NOFS);
+ if (!sym)
+ return -ENOMEM;
+
+ declen = ceph_base64_decode(encsym, enclen, sym);
+ if (declen < 0) {
+ pr_err("%s: can't decode symlink (%d). Content: %.*s\n",
+ __func__, declen, enclen, encsym);
+ kfree(sym);
+ return -EIO;
+ }
+ sym[declen + 1] = '\0';
+ *decsym = sym;
+ return declen;
+}
+#else
+static int decode_encrypted_symlink(const char *encsym, int symlen, u8 **decsym)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
/*
* Populate an inode based on info from mds. May be called on new or
* existing inodes.
issued |= __ceph_caps_dirty(ci);
new_issued = ~issued & info_caps;
- /* directories have fl_stripe_unit set to zero */
- if (le32_to_cpu(info->layout.fl_stripe_unit))
- inode->i_blkbits =
- fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
- else
- inode->i_blkbits = CEPH_BLOCK_SHIFT;
-
__ceph_update_quota(ci, iinfo->max_bytes, iinfo->max_files);
+#ifdef CONFIG_FS_ENCRYPTION
+ if (iinfo->fscrypt_auth_len &&
+ ((inode->i_state & I_NEW) || (ci->fscrypt_auth_len == 0))) {
+ kfree(ci->fscrypt_auth);
+ ci->fscrypt_auth_len = iinfo->fscrypt_auth_len;
+ ci->fscrypt_auth = iinfo->fscrypt_auth;
+ iinfo->fscrypt_auth = NULL;
+ iinfo->fscrypt_auth_len = 0;
+ inode_set_flags(inode, S_ENCRYPTED, S_ENCRYPTED);
+ }
+#endif
+
if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) &&
(issued & CEPH_CAP_AUTH_EXCL) == 0) {
inode->i_mode = mode;
ceph_decode_timespec64(&ci->i_snap_btime, &iinfo->snap_btime);
}
+ /* directories have fl_stripe_unit set to zero */
+ if (IS_ENCRYPTED(inode))
+ inode->i_blkbits = CEPH_FSCRYPT_BLOCK_SHIFT;
+ else if (le32_to_cpu(info->layout.fl_stripe_unit))
+ inode->i_blkbits =
+ fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
+ else
+ inode->i_blkbits = CEPH_BLOCK_SHIFT;
+
if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) &&
(issued & CEPH_CAP_LINK_EXCL) == 0)
set_nlink(inode, le32_to_cpu(info->nlink));
if (new_version ||
(new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
+ u64 size = le64_to_cpu(info->size);
s64 old_pool = ci->i_layout.pool_id;
struct ceph_string *old_ns;
pool_ns = old_ns;
+ if (IS_ENCRYPTED(inode) && size &&
+ iinfo->fscrypt_file_len == sizeof(__le64)) {
+ u64 fsize = __le64_to_cpu(*(__le64 *)iinfo->fscrypt_file);
+
+ if (size == round_up(fsize, CEPH_FSCRYPT_BLOCK_SIZE)) {
+ size = fsize;
+ } else {
+ pr_warn("fscrypt size mismatch: size=%llu fscrypt_file=%llu, discarding fscrypt_file size.\n",
+ info->size, size);
+ }
+ }
+
queue_trunc = ceph_fill_file_size(inode, issued,
le32_to_cpu(info->truncate_seq),
le64_to_cpu(info->truncate_size),
- le64_to_cpu(info->size));
+ size);
/* only update max_size on auth cap */
if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
ci->i_max_size != le64_to_cpu(info->max_size)) {
inode->i_fop = &ceph_file_fops;
break;
case S_IFLNK:
- inode->i_op = &ceph_symlink_iops;
if (!ci->i_symlink) {
u32 symlen = iinfo->symlink_len;
char *sym;
spin_unlock(&ci->i_ceph_lock);
- if (symlen != i_size_read(inode)) {
- pr_err("%s %llx.%llx BAD symlink "
- "size %lld\n", __func__,
- ceph_vinop(inode),
- i_size_read(inode));
+ if (IS_ENCRYPTED(inode)) {
+ if (symlen != i_size_read(inode))
+ pr_err("%s %llx.%llx BAD symlink size %lld\n",
+ __func__, ceph_vinop(inode),
+ i_size_read(inode));
+
+ err = decode_encrypted_symlink(iinfo->symlink,
+ symlen, (u8 **)&sym);
+ if (err < 0) {
+ pr_err("%s decoding encrypted symlink failed: %d\n",
+ __func__, err);
+ goto out;
+ }
+ symlen = err;
i_size_write(inode, symlen);
inode->i_blocks = calc_inode_blocks(symlen);
- }
+ } else {
+ if (symlen != i_size_read(inode)) {
+ pr_err("%s %llx.%llx BAD symlink size %lld\n",
+ __func__, ceph_vinop(inode),
+ i_size_read(inode));
+ i_size_write(inode, symlen);
+ inode->i_blocks = calc_inode_blocks(symlen);
+ }
- err = -ENOMEM;
- sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
- if (!sym)
- goto out;
+ err = -ENOMEM;
+ sym = kstrndup(iinfo->symlink, symlen, GFP_NOFS);
+ if (!sym)
+ goto out;
+ }
spin_lock(&ci->i_ceph_lock);
if (!ci->i_symlink)
else
kfree(sym); /* lost a race */
}
- inode->i_link = ci->i_symlink;
+
+ if (IS_ENCRYPTED(inode)) {
+ /*
+ * Encrypted symlinks need to be decrypted before we can
+ * cache their targets in i_link. Don't touch it here.
+ */
+ inode->i_op = &ceph_encrypted_symlink_iops;
+ } else {
+ inode->i_link = ci->i_symlink;
+ inode->i_op = &ceph_symlink_iops;
+ }
break;
case S_IFDIR:
inode->i_op = &ceph_dir_iops;
if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME &&
test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) &&
!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
+ bool is_nokey = false;
struct qstr dname;
struct dentry *dn, *parent;
+ struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+ struct ceph_fname fname = { .dir = dir,
+ .name = rinfo->dname,
+ .ctext = rinfo->altname,
+ .name_len = rinfo->dname_len,
+ .ctext_len = rinfo->altname_len };
BUG_ON(!rinfo->head->is_target);
BUG_ON(req->r_dentry);
parent = d_find_any_alias(dir);
BUG_ON(!parent);
- dname.name = rinfo->dname;
- dname.len = rinfo->dname_len;
+ err = ceph_fname_alloc_buffer(dir, &oname);
+ if (err < 0) {
+ dput(parent);
+ goto done;
+ }
+
+ err = ceph_fname_to_usr(&fname, NULL, &oname, &is_nokey);
+ if (err < 0) {
+ dput(parent);
+ ceph_fname_free_buffer(dir, &oname);
+ goto done;
+ }
+ dname.name = oname.name;
+ dname.len = oname.len;
dname.hash = full_name_hash(parent, dname.name, dname.len);
tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
dname.len, dname.name, dn);
if (!dn) {
dput(parent);
+ ceph_fname_free_buffer(dir, &oname);
err = -ENOMEM;
goto done;
}
+ if (is_nokey) {
+ spin_lock(&dn->d_lock);
+ dn->d_flags |= DCACHE_NOKEY_NAME;
+ spin_unlock(&dn->d_lock);
+ }
err = 0;
} else if (d_really_is_positive(dn) &&
(ceph_ino(d_inode(dn)) != tvino.ino ||
dput(dn);
goto retry_lookup;
}
+ ceph_fname_free_buffer(dir, &oname);
req->r_dentry = dn;
dput(parent);
vino.ino = le64_to_cpu(rde->inode.in->ino);
vino.snap = le64_to_cpu(rde->inode.in->snapid);
- in = ceph_get_inode(req->r_dentry->d_sb, vino);
+ in = ceph_get_inode(req->r_dentry->d_sb, vino, NULL);
if (IS_ERR(in)) {
err = PTR_ERR(in);
dout("new_inode badness got %d\n", err);
struct ceph_mds_session *session)
{
struct dentry *parent = req->r_dentry;
- struct ceph_inode_info *ci = ceph_inode(d_inode(parent));
+ struct inode *inode = d_inode(parent);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct qstr dname;
struct dentry *dn;
tvino.snap = le64_to_cpu(rde->inode.in->snapid);
if (rinfo->hash_order) {
- u32 hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
- rde->name, rde->name_len);
- hash = ceph_frag_value(hash);
+ u32 hash = ceph_frag_value(rde->raw_hash);
if (hash != last_hash)
fpos_offset = 2;
last_hash = hash;
err = -ENOMEM;
goto out;
}
+ if (rde->is_nokey) {
+ spin_lock(&dn->d_lock);
+ dn->d_flags |= DCACHE_NOKEY_NAME;
+ spin_unlock(&dn->d_lock);
+ }
} else if (d_really_is_positive(dn) &&
(ceph_ino(d_inode(dn)) != tvino.ino ||
ceph_snap(d_inode(dn)) != tvino.snap)) {
if (d_really_is_positive(dn)) {
in = d_inode(dn);
} else {
- in = ceph_get_inode(parent->d_sb, tvino);
+ in = ceph_get_inode(parent->d_sb, tvino, NULL);
if (IS_ERR(in)) {
dout("new_inode badness\n");
d_drop(dn);
retry:
spin_lock(&ci->i_ceph_lock);
if (ci->i_truncate_pending == 0) {
- dout("__do_pending_vmtruncate %p none pending\n", inode);
+ dout("%s %p none pending\n", __func__, inode);
spin_unlock(&ci->i_ceph_lock);
mutex_unlock(&ci->i_truncate_mutex);
return;
*/
if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
spin_unlock(&ci->i_ceph_lock);
- dout("__do_pending_vmtruncate %p flushing snaps first\n",
- inode);
+ dout("%s %p flushing snaps first\n", __func__, inode);
filemap_write_and_wait_range(&inode->i_data, 0,
inode->i_sb->s_maxbytes);
goto retry;
/* there should be no reader or writer */
WARN_ON_ONCE(ci->i_rd_ref || ci->i_wr_ref);
- to = ci->i_truncate_size;
+ to = ci->i_truncate_pagecache_size;
wrbuffer_refs = ci->i_wrbuffer_ref;
- dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
+ dout("%s %p (%d) to %lld\n", __func__, inode,
ci->i_truncate_pending, to);
spin_unlock(&ci->i_ceph_lock);
truncate_pagecache(inode, to);
spin_lock(&ci->i_ceph_lock);
- if (to == ci->i_truncate_size) {
+ if (to == ci->i_truncate_pagecache_size) {
ci->i_truncate_pending = 0;
finish = 1;
}
iput(inode);
}
+static const char *ceph_encrypted_get_link(struct dentry *dentry,
+ struct inode *inode,
+ struct delayed_call *done)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ if (!dentry)
+ return ERR_PTR(-ECHILD);
+
+ return fscrypt_get_symlink(inode, ci->i_symlink, i_size_read(inode),
+ done);
+}
+
+static int ceph_encrypted_symlink_getattr(struct mnt_idmap *idmap,
+ const struct path *path,
+ struct kstat *stat, u32 request_mask,
+ unsigned int query_flags)
+{
+ int ret;
+
+ ret = ceph_getattr(idmap, path, stat, request_mask, query_flags);
+ if (ret)
+ return ret;
+ return fscrypt_symlink_getattr(path, stat);
+}
+
/*
* symlinks
*/
.listxattr = ceph_listxattr,
};
-int __ceph_setattr(struct inode *inode, struct iattr *attr)
+static const struct inode_operations ceph_encrypted_symlink_iops = {
+ .get_link = ceph_encrypted_get_link,
+ .setattr = ceph_setattr,
+ .getattr = ceph_encrypted_symlink_getattr,
+ .listxattr = ceph_listxattr,
+};
+
+/*
+ * Transfer the encrypted last block to the MDS and the MDS
+ * will help update it when truncating a smaller size.
+ *
+ * We don't support a PAGE_SIZE that is smaller than the
+ * CEPH_FSCRYPT_BLOCK_SIZE.
+ */
+static int fill_fscrypt_truncate(struct inode *inode,
+ struct ceph_mds_request *req,
+ struct iattr *attr)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int boff = attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE;
+ loff_t pos, orig_pos = round_down(attr->ia_size,
+ CEPH_FSCRYPT_BLOCK_SIZE);
+ u64 block = orig_pos >> CEPH_FSCRYPT_BLOCK_SHIFT;
+ struct ceph_pagelist *pagelist = NULL;
+ struct kvec iov = {0};
+ struct iov_iter iter;
+ struct page *page = NULL;
+ struct ceph_fscrypt_truncate_size_header header;
+ int retry_op = 0;
+ int len = CEPH_FSCRYPT_BLOCK_SIZE;
+ loff_t i_size = i_size_read(inode);
+ int got, ret, issued;
+ u64 objver;
+
+ ret = __ceph_get_caps(inode, NULL, CEPH_CAP_FILE_RD, 0, -1, &got);
+ if (ret < 0)
+ return ret;
+
+ issued = __ceph_caps_issued(ci, NULL);
+
+ dout("%s size %lld -> %lld got cap refs on %s, issued %s\n", __func__,
+ i_size, attr->ia_size, ceph_cap_string(got),
+ ceph_cap_string(issued));
+
+ /* Try to writeback the dirty pagecaches */
+ if (issued & (CEPH_CAP_FILE_BUFFER)) {
+ loff_t lend = orig_pos + CEPH_FSCRYPT_BLOCK_SHIFT - 1;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping,
+ orig_pos, lend);
+ if (ret < 0)
+ goto out;
+ }
+
+ page = __page_cache_alloc(GFP_KERNEL);
+ if (page == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pagelist = ceph_pagelist_alloc(GFP_KERNEL);
+ if (!pagelist) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ iov.iov_base = kmap_local_page(page);
+ iov.iov_len = len;
+ iov_iter_kvec(&iter, READ, &iov, 1, len);
+
+ pos = orig_pos;
+ ret = __ceph_sync_read(inode, &pos, &iter, &retry_op, &objver);
+ if (ret < 0)
+ goto out;
+
+ /* Insert the header first */
+ header.ver = 1;
+ header.compat = 1;
+ header.change_attr = cpu_to_le64(inode_peek_iversion_raw(inode));
+
+ /*
+ * Always set the block_size to CEPH_FSCRYPT_BLOCK_SIZE,
+ * because in MDS it may need this to do the truncate.
+ */
+ header.block_size = cpu_to_le32(CEPH_FSCRYPT_BLOCK_SIZE);
+
+ /*
+ * If we hit a hole here, we should just skip filling
+ * the fscrypt for the request, because once the fscrypt
+ * is enabled, the file will be split into many blocks
+ * with the size of CEPH_FSCRYPT_BLOCK_SIZE, if there
+ * has a hole, the hole size should be multiple of block
+ * size.
+ *
+ * If the Rados object doesn't exist, it will be set to 0.
+ */
+ if (!objver) {
+ dout("%s hit hole, ppos %lld < size %lld\n", __func__,
+ pos, i_size);
+
+ header.data_len = cpu_to_le32(8 + 8 + 4);
+ header.file_offset = 0;
+ ret = 0;
+ } else {
+ header.data_len = cpu_to_le32(8 + 8 + 4 + CEPH_FSCRYPT_BLOCK_SIZE);
+ header.file_offset = cpu_to_le64(orig_pos);
+
+ dout("%s encrypt block boff/bsize %d/%lu\n", __func__,
+ boff, CEPH_FSCRYPT_BLOCK_SIZE);
+
+ /* truncate and zero out the extra contents for the last block */
+ memset(iov.iov_base + boff, 0, PAGE_SIZE - boff);
+
+ /* encrypt the last block */
+ ret = ceph_fscrypt_encrypt_block_inplace(inode, page,
+ CEPH_FSCRYPT_BLOCK_SIZE,
+ 0, block,
+ GFP_KERNEL);
+ if (ret)
+ goto out;
+ }
+
+ /* Insert the header */
+ ret = ceph_pagelist_append(pagelist, &header, sizeof(header));
+ if (ret)
+ goto out;
+
+ if (header.block_size) {
+ /* Append the last block contents to pagelist */
+ ret = ceph_pagelist_append(pagelist, iov.iov_base,
+ CEPH_FSCRYPT_BLOCK_SIZE);
+ if (ret)
+ goto out;
+ }
+ req->r_pagelist = pagelist;
+out:
+ dout("%s %p size dropping cap refs on %s\n", __func__,
+ inode, ceph_cap_string(got));
+ ceph_put_cap_refs(ci, got);
+ if (iov.iov_base)
+ kunmap_local(iov.iov_base);
+ if (page)
+ __free_pages(page, 0);
+ if (ret && pagelist)
+ ceph_pagelist_release(pagelist);
+ return ret;
+}
+
+int __ceph_setattr(struct inode *inode, struct iattr *attr,
+ struct ceph_iattr *cia)
{
struct ceph_inode_info *ci = ceph_inode(inode);
unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_cap_flush *prealloc_cf;
+ loff_t isize = i_size_read(inode);
int issued;
int release = 0, dirtied = 0;
int mask = 0;
int err = 0;
int inode_dirty_flags = 0;
bool lock_snap_rwsem = false;
+ bool fill_fscrypt;
+ int truncate_retry = 20; /* The RMW will take around 50ms */
+retry:
prealloc_cf = ceph_alloc_cap_flush();
if (!prealloc_cf)
return -ENOMEM;
return PTR_ERR(req);
}
+ fill_fscrypt = false;
spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
}
dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ if (cia && cia->fscrypt_auth) {
+ u32 len = ceph_fscrypt_auth_len(cia->fscrypt_auth);
+
+ if (len > sizeof(*cia->fscrypt_auth)) {
+ err = -EINVAL;
+ spin_unlock(&ci->i_ceph_lock);
+ goto out;
+ }
+
+ dout("setattr %llx:%llx fscrypt_auth len %u to %u)\n",
+ ceph_vinop(inode), ci->fscrypt_auth_len, len);
+
+ /* It should never be re-set once set */
+ WARN_ON_ONCE(ci->fscrypt_auth);
+
+ if (issued & CEPH_CAP_AUTH_EXCL) {
+ dirtied |= CEPH_CAP_AUTH_EXCL;
+ kfree(ci->fscrypt_auth);
+ ci->fscrypt_auth = (u8 *)cia->fscrypt_auth;
+ ci->fscrypt_auth_len = len;
+ } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 ||
+ ci->fscrypt_auth_len != len ||
+ memcmp(ci->fscrypt_auth, cia->fscrypt_auth, len)) {
+ req->r_fscrypt_auth = cia->fscrypt_auth;
+ mask |= CEPH_SETATTR_FSCRYPT_AUTH;
+ release |= CEPH_CAP_AUTH_SHARED;
+ }
+ cia->fscrypt_auth = NULL;
+ }
+#else
+ if (cia && cia->fscrypt_auth) {
+ err = -EINVAL;
+ spin_unlock(&ci->i_ceph_lock);
+ goto out;
+ }
+#endif /* CONFIG_FS_ENCRYPTION */
if (ia_valid & ATTR_UID) {
dout("setattr %p uid %d -> %d\n", inode,
}
}
if (ia_valid & ATTR_SIZE) {
- loff_t isize = i_size_read(inode);
-
dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
- if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
+ /*
+ * Only when the new size is smaller and not aligned to
+ * CEPH_FSCRYPT_BLOCK_SIZE will the RMW is needed.
+ */
+ if (IS_ENCRYPTED(inode) && attr->ia_size < isize &&
+ (attr->ia_size % CEPH_FSCRYPT_BLOCK_SIZE)) {
+ mask |= CEPH_SETATTR_SIZE;
+ release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
+ CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+ set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+ mask |= CEPH_SETATTR_FSCRYPT_FILE;
+ req->r_args.setattr.size =
+ cpu_to_le64(round_up(attr->ia_size,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ req->r_args.setattr.old_size =
+ cpu_to_le64(round_up(isize,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ req->r_fscrypt_file = attr->ia_size;
+ fill_fscrypt = true;
+ } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) {
if (attr->ia_size > isize) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
}
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
attr->ia_size != isize) {
- req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
- req->r_args.setattr.old_size = cpu_to_le64(isize);
mask |= CEPH_SETATTR_SIZE;
release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
+ if (IS_ENCRYPTED(inode) && attr->ia_size) {
+ set_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags);
+ mask |= CEPH_SETATTR_FSCRYPT_FILE;
+ req->r_args.setattr.size =
+ cpu_to_le64(round_up(attr->ia_size,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ req->r_args.setattr.old_size =
+ cpu_to_le64(round_up(isize,
+ CEPH_FSCRYPT_BLOCK_SIZE));
+ req->r_fscrypt_file = attr->ia_size;
+ } else {
+ req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
+ req->r_args.setattr.old_size = cpu_to_le64(isize);
+ req->r_fscrypt_file = 0;
+ }
}
}
if (ia_valid & ATTR_MTIME) {
release &= issued;
spin_unlock(&ci->i_ceph_lock);
- if (lock_snap_rwsem)
+ if (lock_snap_rwsem) {
up_read(&mdsc->snap_rwsem);
+ lock_snap_rwsem = false;
+ }
if (inode_dirty_flags)
__mark_inode_dirty(inode, inode_dirty_flags);
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
req->r_stamp = attr->ia_ctime;
+ if (fill_fscrypt) {
+ err = fill_fscrypt_truncate(inode, req, attr);
+ if (err)
+ goto out;
+ }
+
+ /*
+ * The truncate request will return -EAGAIN when the
+ * last block has been updated just before the MDS
+ * successfully gets the xlock for the FILE lock. To
+ * avoid corrupting the file contents we need to retry
+ * it.
+ */
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err == -EAGAIN && truncate_retry--) {
+ dout("setattr %p result=%d (%s locally, %d remote), retry it!\n",
+ inode, err, ceph_cap_string(dirtied), mask);
+ ceph_mdsc_put_request(req);
+ ceph_free_cap_flush(prealloc_cf);
+ goto retry;
+ }
}
+out:
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
ceph_cap_string(dirtied), mask);
if (ceph_inode_is_shutdown(inode))
return -ESTALE;
+ err = fscrypt_prepare_setattr(dentry, attr);
+ if (err)
+ return err;
+
err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
if (err != 0)
return err;
ceph_quota_is_max_bytes_exceeded(inode, attr->ia_size))
return -EDQUOT;
- err = __ceph_setattr(inode, attr);
+ err = __ceph_setattr(inode, attr, NULL);
if (err >= 0 && (attr->ia_valid & ATTR_MODE))
err = posix_acl_chmod(&nop_mnt_idmap, dentry, attr->ia_mode);
stat->nlink = 1 + 1 + ci->i_subdirs;
}
- stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
stat->attributes |= STATX_ATTR_CHANGE_MONOTONIC;
+ if (IS_ENCRYPTED(inode))
+ stat->attributes |= STATX_ATTR_ENCRYPTED;
+ stat->attributes_mask |= (STATX_ATTR_CHANGE_MONOTONIC |
+ STATX_ATTR_ENCRYPTED);
+
stat->result_mask = request_mask & valid_mask;
return err;
}
#include "mds_client.h"
#include "ioctl.h"
#include <linux/ceph/striper.h>
+#include <linux/fscrypt.h>
/*
* ioctls
return 0;
}
+static int vet_mds_for_fscrypt(struct file *file)
+{
+ int i, ret = -EOPNOTSUPP;
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(file_inode(file)->i_sb);
+
+ mutex_lock(&mdsc->mutex);
+ for (i = 0; i < mdsc->max_sessions; i++) {
+ struct ceph_mds_session *s = mdsc->sessions[i];
+
+ if (!s)
+ continue;
+ if (test_bit(CEPHFS_FEATURE_ALTERNATE_NAME, &s->s_features))
+ ret = 0;
+ break;
+ }
+ mutex_unlock(&mdsc->mutex);
+ return ret;
+}
+
+static long ceph_set_encryption_policy(struct file *file, unsigned long arg)
+{
+ int ret, got = 0;
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+
+ /* encrypted directories can't have striped layout */
+ if (ci->i_layout.stripe_count > 1)
+ return -EINVAL;
+
+ ret = vet_mds_for_fscrypt(file);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure we hold these caps so that we _know_ that the rstats check
+ * in the empty_dir check is reliable.
+ */
+ ret = ceph_get_caps(file, CEPH_CAP_FILE_SHARED, 0, -1, &got);
+ if (ret)
+ return ret;
+
+ ret = fscrypt_ioctl_set_policy(file, (const void __user *)arg);
+ if (got)
+ ceph_put_cap_refs(ci, got);
+
+ return ret;
+}
+
+static const char *ceph_ioctl_cmd_name(const unsigned int cmd)
+{
+ switch (cmd) {
+ case CEPH_IOC_GET_LAYOUT:
+ return "get_layout";
+ case CEPH_IOC_SET_LAYOUT:
+ return "set_layout";
+ case CEPH_IOC_SET_LAYOUT_POLICY:
+ return "set_layout_policy";
+ case CEPH_IOC_GET_DATALOC:
+ return "get_dataloc";
+ case CEPH_IOC_LAZYIO:
+ return "lazyio";
+ case CEPH_IOC_SYNCIO:
+ return "syncio";
+ case FS_IOC_SET_ENCRYPTION_POLICY:
+ return "set_encryption_policy";
+ case FS_IOC_GET_ENCRYPTION_POLICY:
+ return "get_encryption_policy";
+ case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+ return "get_encryption_policy_ex";
+ case FS_IOC_ADD_ENCRYPTION_KEY:
+ return "add_encryption_key";
+ case FS_IOC_REMOVE_ENCRYPTION_KEY:
+ return "remove_encryption_key";
+ case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+ return "remove_encryption_key_all_users";
+ case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+ return "get_encryption_key_status";
+ case FS_IOC_GET_ENCRYPTION_NONCE:
+ return "get_encryption_nonce";
+ default:
+ return "unknown";
+ }
+}
+
long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
+ int ret;
+
+ dout("ioctl file %p cmd %s arg %lu\n", file,
+ ceph_ioctl_cmd_name(cmd), arg);
switch (cmd) {
case CEPH_IOC_GET_LAYOUT:
return ceph_ioctl_get_layout(file, (void __user *)arg);
case CEPH_IOC_SYNCIO:
return ceph_ioctl_syncio(file);
+
+ case FS_IOC_SET_ENCRYPTION_POLICY:
+ return ceph_set_encryption_policy(file, arg);
+
+ case FS_IOC_GET_ENCRYPTION_POLICY:
+ ret = vet_mds_for_fscrypt(file);
+ if (ret)
+ return ret;
+ return fscrypt_ioctl_get_policy(file, (void __user *)arg);
+
+ case FS_IOC_GET_ENCRYPTION_POLICY_EX:
+ ret = vet_mds_for_fscrypt(file);
+ if (ret)
+ return ret;
+ return fscrypt_ioctl_get_policy_ex(file, (void __user *)arg);
+
+ case FS_IOC_ADD_ENCRYPTION_KEY:
+ ret = vet_mds_for_fscrypt(file);
+ if (ret)
+ return ret;
+ return fscrypt_ioctl_add_key(file, (void __user *)arg);
+
+ case FS_IOC_REMOVE_ENCRYPTION_KEY:
+ return fscrypt_ioctl_remove_key(file, (void __user *)arg);
+
+ case FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS:
+ return fscrypt_ioctl_remove_key_all_users(file,
+ (void __user *)arg);
+
+ case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
+ return fscrypt_ioctl_get_key_status(file, (void __user *)arg);
+
+ case FS_IOC_GET_ENCRYPTION_NONCE:
+ ret = vet_mds_for_fscrypt(file);
+ if (ret)
+ return ret;
+ return fscrypt_ioctl_get_nonce(file, (void __user *)arg);
}
return -ENOTTY;
#include "super.h"
#include "mds_client.h"
+#include "crypto.h"
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/messenger.h>
info->rsnaps = 0;
}
+ if (struct_v >= 5) {
+ u32 alen;
+
+ ceph_decode_32_safe(p, end, alen, bad);
+
+ while (alen--) {
+ u32 len;
+
+ /* key */
+ ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_skip_n(p, end, len, bad);
+ /* value */
+ ceph_decode_32_safe(p, end, len, bad);
+ ceph_decode_skip_n(p, end, len, bad);
+ }
+ }
+
+ /* fscrypt flag -- ignore */
+ if (struct_v >= 6)
+ ceph_decode_skip_8(p, end, bad);
+
+ info->fscrypt_auth = NULL;
+ info->fscrypt_auth_len = 0;
+ info->fscrypt_file = NULL;
+ info->fscrypt_file_len = 0;
+ if (struct_v >= 7) {
+ ceph_decode_32_safe(p, end, info->fscrypt_auth_len, bad);
+ if (info->fscrypt_auth_len) {
+ info->fscrypt_auth = kmalloc(info->fscrypt_auth_len,
+ GFP_KERNEL);
+ if (!info->fscrypt_auth)
+ return -ENOMEM;
+ ceph_decode_copy_safe(p, end, info->fscrypt_auth,
+ info->fscrypt_auth_len, bad);
+ }
+ ceph_decode_32_safe(p, end, info->fscrypt_file_len, bad);
+ if (info->fscrypt_file_len) {
+ info->fscrypt_file = kmalloc(info->fscrypt_file_len,
+ GFP_KERNEL);
+ if (!info->fscrypt_file)
+ return -ENOMEM;
+ ceph_decode_copy_safe(p, end, info->fscrypt_file,
+ info->fscrypt_file_len, bad);
+ }
+ }
*p = end;
} else {
+ /* legacy (unversioned) struct */
if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
ceph_decode_64_safe(p, end, info->inline_version, bad);
ceph_decode_32_safe(p, end, info->inline_len, bad);
static int parse_reply_info_lease(void **p, void *end,
struct ceph_mds_reply_lease **lease,
- u64 features)
+ u64 features, u32 *altname_len, u8 **altname)
{
+ u8 struct_v;
+ u32 struct_len;
+ void *lend;
+
if (features == (u64)-1) {
- u8 struct_v, struct_compat;
- u32 struct_len;
+ u8 struct_compat;
+
ceph_decode_8_safe(p, end, struct_v, bad);
ceph_decode_8_safe(p, end, struct_compat, bad);
+
/* struct_v is expected to be >= 1. we only understand
* encoding whose struct_compat == 1. */
if (!struct_v || struct_compat != 1)
goto bad;
+
ceph_decode_32_safe(p, end, struct_len, bad);
- ceph_decode_need(p, end, struct_len, bad);
- end = *p + struct_len;
+ } else {
+ struct_len = sizeof(**lease);
+ *altname_len = 0;
+ *altname = NULL;
}
- ceph_decode_need(p, end, sizeof(**lease), bad);
+ lend = *p + struct_len;
+ ceph_decode_need(p, end, struct_len, bad);
*lease = *p;
*p += sizeof(**lease);
- if (features == (u64)-1)
- *p = end;
+
+ if (features == (u64)-1) {
+ if (struct_v >= 2) {
+ ceph_decode_32_safe(p, end, *altname_len, bad);
+ ceph_decode_need(p, end, *altname_len, bad);
+ *altname = *p;
+ *p += *altname_len;
+ } else {
+ *altname = NULL;
+ *altname_len = 0;
+ }
+ }
+ *p = lend;
return 0;
bad:
return -EIO;
info->dname = *p;
*p += info->dname_len;
- err = parse_reply_info_lease(p, end, &info->dlease, features);
+ err = parse_reply_info_lease(p, end, &info->dlease, features,
+ &info->altname_len, &info->altname);
if (err < 0)
goto out_bad;
}
* parse readdir results
*/
static int parse_reply_info_readdir(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info,
- u64 features)
+ struct ceph_mds_request *req,
+ u64 features)
{
+ struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
u32 num, i = 0;
int err;
info->dir_nr = num;
while (num) {
+ struct inode *inode = d_inode(req->r_dentry);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+ struct fscrypt_str tname = FSTR_INIT(NULL, 0);
+ struct fscrypt_str oname = FSTR_INIT(NULL, 0);
+ struct ceph_fname fname;
+ u32 altname_len, _name_len;
+ u8 *altname, *_name;
+
/* dentry */
- ceph_decode_32_safe(p, end, rde->name_len, bad);
- ceph_decode_need(p, end, rde->name_len, bad);
- rde->name = *p;
- *p += rde->name_len;
- dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
+ ceph_decode_32_safe(p, end, _name_len, bad);
+ ceph_decode_need(p, end, _name_len, bad);
+ _name = *p;
+ *p += _name_len;
+ dout("parsed dir dname '%.*s'\n", _name_len, _name);
+
+ if (info->hash_order)
+ rde->raw_hash = ceph_str_hash(ci->i_dir_layout.dl_dir_hash,
+ _name, _name_len);
/* dentry lease */
- err = parse_reply_info_lease(p, end, &rde->lease, features);
+ err = parse_reply_info_lease(p, end, &rde->lease, features,
+ &altname_len, &altname);
if (err)
goto out_bad;
+
+ /*
+ * Try to dencrypt the dentry names and update them
+ * in the ceph_mds_reply_dir_entry struct.
+ */
+ fname.dir = inode;
+ fname.name = _name;
+ fname.name_len = _name_len;
+ fname.ctext = altname;
+ fname.ctext_len = altname_len;
+ /*
+ * The _name_len maybe larger than altname_len, such as
+ * when the human readable name length is in range of
+ * (CEPH_NOHASH_NAME_MAX, CEPH_NOHASH_NAME_MAX + SHA256_DIGEST_SIZE),
+ * then the copy in ceph_fname_to_usr will corrupt the
+ * data if there has no encryption key.
+ *
+ * Just set the no_copy flag and then if there has no
+ * encryption key the oname.name will be assigned to
+ * _name always.
+ */
+ fname.no_copy = true;
+ if (altname_len == 0) {
+ /*
+ * Set tname to _name, and this will be used
+ * to do the base64_decode in-place. It's
+ * safe because the decoded string should
+ * always be shorter, which is 3/4 of origin
+ * string.
+ */
+ tname.name = _name;
+
+ /*
+ * Set oname to _name too, and this will be
+ * used to do the dencryption in-place.
+ */
+ oname.name = _name;
+ oname.len = _name_len;
+ } else {
+ /*
+ * This will do the decryption only in-place
+ * from altname cryptext directly.
+ */
+ oname.name = altname;
+ oname.len = altname_len;
+ }
+ rde->is_nokey = false;
+ err = ceph_fname_to_usr(&fname, &tname, &oname, &rde->is_nokey);
+ if (err) {
+ pr_err("%s unable to decode %.*s, got %d\n", __func__,
+ _name_len, _name, err);
+ goto out_bad;
+ }
+ rde->name = oname.name;
+ rde->name_len = oname.len;
+
/* inode */
err = parse_reply_info_in(p, end, &rde->inode, features);
if (err < 0)
* parse extra results
*/
static int parse_reply_info_extra(void **p, void *end,
- struct ceph_mds_reply_info_parsed *info,
+ struct ceph_mds_request *req,
u64 features, struct ceph_mds_session *s)
{
+ struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
u32 op = le32_to_cpu(info->head->op);
if (op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
- return parse_reply_info_readdir(p, end, info, features);
+ return parse_reply_info_readdir(p, end, req, features);
else if (op == CEPH_MDS_OP_CREATE)
return parse_reply_info_create(p, end, info, features, s);
else if (op == CEPH_MDS_OP_GETVXATTR)
* parse entire mds reply
*/
static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
- struct ceph_mds_reply_info_parsed *info,
- u64 features)
+ struct ceph_mds_request *req, u64 features)
{
+ struct ceph_mds_reply_info_parsed *info = &req->r_reply_info;
void *p, *end;
u32 len;
int err;
ceph_decode_32_safe(&p, end, len, bad);
if (len > 0) {
ceph_decode_need(&p, end, len, bad);
- err = parse_reply_info_extra(&p, p+len, info, features, s);
+ err = parse_reply_info_extra(&p, p+len, req, features, s);
if (err < 0)
goto out_bad;
}
static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
{
+ int i;
+
+ kfree(info->diri.fscrypt_auth);
+ kfree(info->diri.fscrypt_file);
+ kfree(info->targeti.fscrypt_auth);
+ kfree(info->targeti.fscrypt_file);
if (!info->dir_entries)
return;
+
+ for (i = 0; i < info->dir_nr; i++) {
+ struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
+
+ kfree(rde->inode.fscrypt_auth);
+ kfree(rde->inode.fscrypt_file);
+ }
free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
}
iput(req->r_parent);
}
iput(req->r_target_inode);
+ iput(req->r_new_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
put_cred(req->r_cred);
if (req->r_pagelist)
ceph_pagelist_release(req->r_pagelist);
+ kfree(req->r_fscrypt_auth);
+ kfree(req->r_altname);
put_request_session(req);
ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
WARN_ON_ONCE(!list_empty(&req->r_wait));
return mdsc->oldest_tid;
}
-/*
- * Build a dentry's path. Allocate on heap; caller must kfree. Based
- * on build_path_from_dentry in fs/cifs/dir.c.
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+ struct inode *dir = req->r_parent;
+ struct dentry *dentry = req->r_dentry;
+ u8 *cryptbuf = NULL;
+ u32 len = 0;
+ int ret = 0;
+
+ /* only encode if we have parent and dentry */
+ if (!dir || !dentry)
+ goto success;
+
+ /* No-op unless this is encrypted */
+ if (!IS_ENCRYPTED(dir))
+ goto success;
+
+ ret = ceph_fscrypt_prepare_readdir(dir);
+ if (ret < 0)
+ return ERR_PTR(ret);
+
+ /* No key? Just ignore it. */
+ if (!fscrypt_has_encryption_key(dir))
+ goto success;
+
+ if (!fscrypt_fname_encrypted_size(dir, dentry->d_name.len, NAME_MAX,
+ &len)) {
+ WARN_ON_ONCE(1);
+ return ERR_PTR(-ENAMETOOLONG);
+ }
+
+ /* No need to append altname if name is short enough */
+ if (len <= CEPH_NOHASH_NAME_MAX) {
+ len = 0;
+ goto success;
+ }
+
+ cryptbuf = kmalloc(len, GFP_KERNEL);
+ if (!cryptbuf)
+ return ERR_PTR(-ENOMEM);
+
+ ret = fscrypt_fname_encrypt(dir, &dentry->d_name, cryptbuf, len);
+ if (ret) {
+ kfree(cryptbuf);
+ return ERR_PTR(ret);
+ }
+success:
+ *plen = len;
+ return cryptbuf;
+}
+#else
+static u8 *get_fscrypt_altname(const struct ceph_mds_request *req, u32 *plen)
+{
+ *plen = 0;
+ return NULL;
+}
+#endif
+
+/**
+ * ceph_mdsc_build_path - build a path string to a given dentry
+ * @dentry: dentry to which path should be built
+ * @plen: returned length of string
+ * @pbase: returned base inode number
+ * @for_wire: is this path going to be sent to the MDS?
+ *
+ * Build a string that represents the path to the dentry. This is mostly called
+ * for two different purposes:
+ *
+ * 1) we need to build a path string to send to the MDS (for_wire == true)
+ * 2) we need a path string for local presentation (e.g. debugfs)
+ * (for_wire == false)
*
- * If @stop_on_nosnap, generate path relative to the first non-snapped
- * inode.
+ * The path is built in reverse, starting with the dentry. Walk back up toward
+ * the root, building the path until the first non-snapped inode is reached
+ * (for_wire) or the root inode is reached (!for_wire).
*
* Encode hidden .snap dirs as a double /, i.e.
* foo/.snap/bar -> foo//bar
*/
char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
- int stop_on_nosnap)
+ int for_wire)
{
- struct dentry *temp;
+ struct dentry *cur;
+ struct inode *inode;
char *path;
int pos;
unsigned seq;
path[pos] = '\0';
seq = read_seqbegin(&rename_lock);
- rcu_read_lock();
- temp = dentry;
+ cur = dget(dentry);
for (;;) {
- struct inode *inode;
+ struct dentry *parent;
- spin_lock(&temp->d_lock);
- inode = d_inode(temp);
+ spin_lock(&cur->d_lock);
+ inode = d_inode(cur);
if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
dout("build_path path+%d: %p SNAPDIR\n",
- pos, temp);
- } else if (stop_on_nosnap && inode && dentry != temp &&
+ pos, cur);
+ spin_unlock(&cur->d_lock);
+ parent = dget_parent(cur);
+ } else if (for_wire && inode && dentry != cur &&
ceph_snap(inode) == CEPH_NOSNAP) {
- spin_unlock(&temp->d_lock);
+ spin_unlock(&cur->d_lock);
pos++; /* get rid of any prepended '/' */
break;
+ } else if (!for_wire || !IS_ENCRYPTED(d_inode(cur->d_parent))) {
+ pos -= cur->d_name.len;
+ if (pos < 0) {
+ spin_unlock(&cur->d_lock);
+ break;
+ }
+ memcpy(path + pos, cur->d_name.name, cur->d_name.len);
+ spin_unlock(&cur->d_lock);
+ parent = dget_parent(cur);
} else {
- pos -= temp->d_name.len;
+ int len, ret;
+ char buf[NAME_MAX];
+
+ /*
+ * Proactively copy name into buf, in case we need to
+ * present it as-is.
+ */
+ memcpy(buf, cur->d_name.name, cur->d_name.len);
+ len = cur->d_name.len;
+ spin_unlock(&cur->d_lock);
+ parent = dget_parent(cur);
+
+ ret = ceph_fscrypt_prepare_readdir(d_inode(parent));
+ if (ret < 0) {
+ dput(parent);
+ dput(cur);
+ return ERR_PTR(ret);
+ }
+
+ if (fscrypt_has_encryption_key(d_inode(parent))) {
+ len = ceph_encode_encrypted_fname(d_inode(parent),
+ cur, buf);
+ if (len < 0) {
+ dput(parent);
+ dput(cur);
+ return ERR_PTR(len);
+ }
+ }
+ pos -= len;
if (pos < 0) {
- spin_unlock(&temp->d_lock);
+ dput(parent);
break;
}
- memcpy(path + pos, temp->d_name.name, temp->d_name.len);
+ memcpy(path + pos, buf, len);
}
- spin_unlock(&temp->d_lock);
- temp = READ_ONCE(temp->d_parent);
+ dput(cur);
+ cur = parent;
/* Are we at the root? */
- if (IS_ROOT(temp))
+ if (IS_ROOT(cur))
break;
/* Are we out of buffer? */
path[pos] = '/';
}
- base = ceph_ino(d_inode(temp));
- rcu_read_unlock();
+ inode = d_inode(cur);
+ base = inode ? ceph_ino(inode) : 0;
+ dput(cur);
if (read_seqretry(&rename_lock, seq))
goto retry;
* A rename didn't occur, but somehow we didn't end up where
* we thought we would. Throw a warning and try again.
*/
- pr_warn("build_path did not end path lookup where "
- "expected, pos is %d\n", pos);
+ pr_warn("build_path did not end path lookup where expected (pos = %d)\n",
+ pos);
goto retry;
}
rcu_read_lock();
if (!dir)
dir = d_inode_rcu(dentry->d_parent);
- if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
+ if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP &&
+ !IS_ENCRYPTED(dir)) {
*pino = ceph_ino(dir);
rcu_read_unlock();
*ppath = dentry->d_name.name;
return r;
}
-static void encode_timestamp_and_gids(void **p,
- const struct ceph_mds_request *req)
+static void encode_mclientrequest_tail(void **p,
+ const struct ceph_mds_request *req)
{
struct ceph_timespec ts;
int i;
ceph_encode_timespec64(&ts, &req->r_stamp);
ceph_encode_copy(p, &ts, sizeof(ts));
- /* gid_list */
+ /* v4: gid_list */
ceph_encode_32(p, req->r_cred->group_info->ngroups);
for (i = 0; i < req->r_cred->group_info->ngroups; i++)
ceph_encode_64(p, from_kgid(&init_user_ns,
req->r_cred->group_info->gid[i]));
+
+ /* v5: altname */
+ ceph_encode_32(p, req->r_altname_len);
+ ceph_encode_copy(p, req->r_altname, req->r_altname_len);
+
+ /* v6: fscrypt_auth and fscrypt_file */
+ if (req->r_fscrypt_auth) {
+ u32 authlen = ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+ ceph_encode_32(p, authlen);
+ ceph_encode_copy(p, req->r_fscrypt_auth, authlen);
+ } else {
+ ceph_encode_32(p, 0);
+ }
+ if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags)) {
+ ceph_encode_32(p, sizeof(__le64));
+ ceph_encode_64(p, req->r_fscrypt_file);
+ } else {
+ ceph_encode_32(p, 0);
+ }
+}
+
+static struct ceph_mds_request_head_legacy *
+find_legacy_request_head(void *p, u64 features)
+{
+ bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+ struct ceph_mds_request_head_old *ohead;
+
+ if (legacy)
+ return (struct ceph_mds_request_head_legacy *)p;
+ ohead = (struct ceph_mds_request_head_old *)p;
+ return (struct ceph_mds_request_head_legacy *)&ohead->oldest_client_tid;
}
/*
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_msg *msg;
- struct ceph_mds_request_head_old *head;
+ struct ceph_mds_request_head_legacy *lhead;
const char *path1 = NULL;
const char *path2 = NULL;
u64 ino1 = 0, ino2 = 0;
void *p, *end;
int ret;
bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
+ bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+ &session->s_features);
ret = set_request_path_attr(req->r_inode, req->r_dentry,
req->r_parent, req->r_path1, req->r_ino1.ino,
goto out_free1;
}
- len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
- len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
- sizeof(struct ceph_timespec);
- len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+ req->r_altname = get_fscrypt_altname(req, &req->r_altname_len);
+ if (IS_ERR(req->r_altname)) {
+ msg = ERR_CAST(req->r_altname);
+ req->r_altname = NULL;
+ goto out_free2;
+ }
+
+ /*
+ * For old cephs without supporting the 32bit retry/fwd feature
+ * it will copy the raw memories directly when decoding the
+ * requests. While new cephs will decode the head depending the
+ * version member, so we need to make sure it will be compatible
+ * with them both.
+ */
+ if (legacy)
+ len = sizeof(struct ceph_mds_request_head_legacy);
+ else if (old_version)
+ len = sizeof(struct ceph_mds_request_head_old);
+ else
+ len = sizeof(struct ceph_mds_request_head);
- /* calculate (max) length for cap releases */
+ /* filepaths */
+ len += 2 * (1 + sizeof(u32) + sizeof(u64));
+ len += pathlen1 + pathlen2;
+
+ /* cap releases */
len += sizeof(struct ceph_mds_request_release) *
(!!req->r_inode_drop + !!req->r_dentry_drop +
!!req->r_old_inode_drop + !!req->r_old_dentry_drop);
if (req->r_old_dentry_drop)
len += pathlen2;
+ /* MClientRequest tail */
+
+ /* req->r_stamp */
+ len += sizeof(struct ceph_timespec);
+
+ /* gid list */
+ len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
+
+ /* alternate name */
+ len += sizeof(u32) + req->r_altname_len;
+
+ /* fscrypt_auth */
+ len += sizeof(u32); // fscrypt_auth
+ if (req->r_fscrypt_auth)
+ len += ceph_fscrypt_auth_len(req->r_fscrypt_auth);
+
+ /* fscrypt_file */
+ len += sizeof(u32);
+ if (test_bit(CEPH_MDS_R_FSCRYPT_FILE, &req->r_req_flags))
+ len += sizeof(__le64);
+
msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
if (!msg) {
msg = ERR_PTR(-ENOMEM);
msg->hdr.tid = cpu_to_le64(req->r_tid);
+ lhead = find_legacy_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
+
/*
- * The old ceph_mds_request_head didn't contain a version field, and
+ * The ceph_mds_request_head_legacy didn't contain a version field, and
* one was added when we moved the message version from 3->4.
*/
if (legacy) {
msg->hdr.version = cpu_to_le16(3);
- head = msg->front.iov_base;
- p = msg->front.iov_base + sizeof(*head);
- } else {
- struct ceph_mds_request_head *new_head = msg->front.iov_base;
+ p = msg->front.iov_base + sizeof(*lhead);
+ } else if (old_version) {
+ struct ceph_mds_request_head_old *ohead = msg->front.iov_base;
msg->hdr.version = cpu_to_le16(4);
- new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
- head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
- p = msg->front.iov_base + sizeof(*new_head);
+ ohead->version = cpu_to_le16(1);
+ p = msg->front.iov_base + sizeof(*ohead);
+ } else {
+ struct ceph_mds_request_head *nhead = msg->front.iov_base;
+
+ msg->hdr.version = cpu_to_le16(6);
+ nhead->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+ p = msg->front.iov_base + sizeof(*nhead);
}
end = msg->front.iov_base + msg->front.iov_len;
- head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
- head->op = cpu_to_le32(req->r_op);
- head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
- req->r_cred->fsuid));
- head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
- req->r_cred->fsgid));
- head->ino = cpu_to_le64(req->r_deleg_ino);
- head->args = req->r_args;
+ lhead->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
+ lhead->op = cpu_to_le32(req->r_op);
+ lhead->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+ req->r_cred->fsuid));
+ lhead->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+ req->r_cred->fsgid));
+ lhead->ino = cpu_to_le64(req->r_deleg_ino);
+ lhead->args = req->r_args;
ceph_encode_filepath(&p, end, ino1, path1);
ceph_encode_filepath(&p, end, ino2, path2);
req->r_inode ? req->r_inode : d_inode(req->r_dentry),
mds, req->r_inode_drop, req->r_inode_unless,
req->r_op == CEPH_MDS_OP_READDIR);
- if (req->r_dentry_drop)
- releases += ceph_encode_dentry_release(&p, req->r_dentry,
+ if (req->r_dentry_drop) {
+ ret = ceph_encode_dentry_release(&p, req->r_dentry,
req->r_parent, mds, req->r_dentry_drop,
req->r_dentry_unless);
- if (req->r_old_dentry_drop)
- releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
+ if (ret < 0)
+ goto out_err;
+ releases += ret;
+ }
+ if (req->r_old_dentry_drop) {
+ ret = ceph_encode_dentry_release(&p, req->r_old_dentry,
req->r_old_dentry_dir, mds,
req->r_old_dentry_drop,
req->r_old_dentry_unless);
+ if (ret < 0)
+ goto out_err;
+ releases += ret;
+ }
if (req->r_old_inode_drop)
releases += ceph_encode_inode_release(&p,
d_inode(req->r_old_dentry),
p = msg->front.iov_base + req->r_request_release_offset;
}
- head->num_releases = cpu_to_le16(releases);
+ lhead->num_releases = cpu_to_le16(releases);
- encode_timestamp_and_gids(&p, req);
+ encode_mclientrequest_tail(&p, req);
if (WARN_ON_ONCE(p > end)) {
ceph_msg_put(msg);
ceph_mdsc_free_path((char *)path1, pathlen1);
out:
return msg;
+out_err:
+ ceph_msg_put(msg);
+ msg = ERR_PTR(ret);
+ goto out_free2;
}
/*
complete_all(&req->r_completion);
}
-static struct ceph_mds_request_head_old *
-find_old_request_head(void *p, u64 features)
-{
- bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
- struct ceph_mds_request_head *new_head;
-
- if (legacy)
- return (struct ceph_mds_request_head_old *)p;
- new_head = (struct ceph_mds_request_head *)p;
- return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
-}
-
/*
* called under mdsc->mutex
*/
{
int mds = session->s_mds;
struct ceph_mds_client *mdsc = session->s_mdsc;
- struct ceph_mds_request_head_old *rhead;
+ struct ceph_mds_request_head_legacy *lhead;
+ struct ceph_mds_request_head *nhead;
struct ceph_msg *msg;
- int flags = 0, max_retry;
+ int flags = 0, old_max_retry;
+ bool old_version = !test_bit(CEPHFS_FEATURE_32BITS_RETRY_FWD,
+ &session->s_features);
/*
- * The type of 'r_attempts' in kernel 'ceph_mds_request'
- * is 'int', while in 'ceph_mds_request_head' the type of
- * 'num_retry' is '__u8'. So in case the request retries
- * exceeding 256 times, the MDS will receive a incorrect
- * retry seq.
- *
- * In this case it's ususally a bug in MDS and continue
- * retrying the request makes no sense.
- *
- * In future this could be fixed in ceph code, so avoid
- * using the hardcode here.
+ * Avoid inifinite retrying after overflow. The client will
+ * increase the retry count and if the MDS is old version,
+ * so we limit to retry at most 256 times.
*/
- max_retry = sizeof_field(struct ceph_mds_request_head, num_retry);
- max_retry = 1 << (max_retry * BITS_PER_BYTE);
- if (req->r_attempts >= max_retry) {
- pr_warn_ratelimited("%s request tid %llu seq overflow\n",
- __func__, req->r_tid);
- return -EMULTIHOP;
+ if (req->r_attempts) {
+ old_max_retry = sizeof_field(struct ceph_mds_request_head_old,
+ num_retry);
+ old_max_retry = 1 << (old_max_retry * BITS_PER_BYTE);
+ if ((old_version && req->r_attempts >= old_max_retry) ||
+ ((uint32_t)req->r_attempts >= U32_MAX)) {
+ pr_warn_ratelimited("%s request tid %llu seq overflow\n",
+ __func__, req->r_tid);
+ return -EMULTIHOP;
+ }
}
req->r_attempts++;
* d_move mangles the src name.
*/
msg = req->r_request;
- rhead = find_old_request_head(msg->front.iov_base,
- session->s_con.peer_features);
+ lhead = find_legacy_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
- flags = le32_to_cpu(rhead->flags);
+ flags = le32_to_cpu(lhead->flags);
flags |= CEPH_MDS_FLAG_REPLAY;
- rhead->flags = cpu_to_le32(flags);
+ lhead->flags = cpu_to_le32(flags);
if (req->r_target_inode)
- rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
+ lhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
- rhead->num_retry = req->r_attempts - 1;
+ lhead->num_retry = req->r_attempts - 1;
+ if (!old_version) {
+ nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+ nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+ }
/* remove cap/dentry releases from message */
- rhead->num_releases = 0;
+ lhead->num_releases = 0;
p = msg->front.iov_base + req->r_request_release_offset;
- encode_timestamp_and_gids(&p, req);
+ encode_mclientrequest_tail(&p, req);
msg->front.iov_len = p - msg->front.iov_base;
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
}
req->r_request = msg;
- rhead = find_old_request_head(msg->front.iov_base,
- session->s_con.peer_features);
- rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
+ lhead = find_legacy_request_head(msg->front.iov_base,
+ session->s_con.peer_features);
+ lhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_REPLAY;
if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
flags |= CEPH_MDS_FLAG_ASYNC;
if (req->r_parent)
flags |= CEPH_MDS_FLAG_WANT_DENTRY;
- rhead->flags = cpu_to_le32(flags);
- rhead->num_fwd = req->r_num_fwd;
- rhead->num_retry = req->r_attempts - 1;
+ lhead->flags = cpu_to_le32(flags);
+ lhead->num_fwd = req->r_num_fwd;
+ lhead->num_retry = req->r_attempts - 1;
+ if (!old_version) {
+ nhead = (struct ceph_mds_request_head*)msg->front.iov_base;
+ nhead->ext_num_fwd = cpu_to_le32(req->r_num_fwd);
+ nhead->ext_num_retry = cpu_to_le32(req->r_attempts - 1);
+ }
dout(" r_parent = %p\n", req->r_parent);
return 0;
}
dout("handle_reply tid %lld result %d\n", tid, result);
- rinfo = &req->r_reply_info;
if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
- err = parse_reply_info(session, msg, rinfo, (u64)-1);
+ err = parse_reply_info(session, msg, req, (u64)-1);
else
- err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
+ err = parse_reply_info(session, msg, req,
+ session->s_con.peer_features);
mutex_unlock(&mdsc->mutex);
/* Must find target inode outside of mutexes to avoid deadlocks */
+ rinfo = &req->r_reply_info;
if ((err >= 0) && rinfo->head->is_target) {
- struct inode *in;
+ struct inode *in = xchg(&req->r_new_inode, NULL);
struct ceph_vino tvino = {
.ino = le64_to_cpu(rinfo->targeti.in->ino),
.snap = le64_to_cpu(rinfo->targeti.in->snapid)
};
- in = ceph_get_inode(mdsc->fsc->sb, tvino);
+ /*
+ * If we ended up opening an existing inode, discard
+ * r_new_inode
+ */
+ if (req->r_op == CEPH_MDS_OP_CREATE &&
+ !req->r_reply_info.has_create_ino) {
+ /* This should never happen on an async create */
+ WARN_ON_ONCE(req->r_deleg_ino);
+ iput(in);
+ in = NULL;
+ }
+
+ in = ceph_get_inode(mdsc->fsc->sb, tvino, in);
if (IS_ERR(in)) {
err = PTR_ERR(in);
mutex_lock(&session->s_mutex);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
req->r_op == CEPH_MDS_OP_LSSNAP))
- ceph_readdir_prepopulate(req, req->r_session);
+ err = ceph_readdir_prepopulate(req, req->r_session);
}
current->journal_info = NULL;
mutex_unlock(&req->r_fill_mutex);
if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
dout("forward tid %llu aborted, unregistering\n", tid);
__unregister_request(mdsc, req);
- } else if (fwd_seq <= req->r_num_fwd) {
+ } else if (fwd_seq <= req->r_num_fwd || (uint32_t)fwd_seq >= U32_MAX) {
/*
- * The type of 'num_fwd' in ceph 'MClientRequestForward'
- * is 'int32_t', while in 'ceph_mds_request_head' the
- * type is '__u8'. So in case the request bounces between
- * MDSes exceeding 256 times, the client will get stuck.
- *
- * In this case it's ususally a bug in MDS and continue
- * bouncing the request makes no sense.
+ * Avoid inifinite retrying after overflow.
*
- * In future this could be fixed in ceph code, so avoid
- * using the hardcode here.
+ * The MDS will increase the fwd count and in client side
+ * if the num_fwd is less than the one saved in request
+ * that means the MDS is an old version and overflowed of
+ * 8 bits.
*/
- int max = sizeof_field(struct ceph_mds_request_head, num_fwd);
- max = 1 << (max * BITS_PER_BYTE);
- if (req->r_num_fwd >= max) {
- mutex_lock(&req->r_fill_mutex);
- req->r_err = -EMULTIHOP;
- set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
- mutex_unlock(&req->r_fill_mutex);
- aborted = true;
- pr_warn_ratelimited("forward tid %llu seq overflow\n",
- tid);
- } else {
- dout("forward tid %llu to mds%d - old seq %d <= %d\n",
- tid, next_mds, req->r_num_fwd, fwd_seq);
- }
+ mutex_lock(&req->r_fill_mutex);
+ req->r_err = -EMULTIHOP;
+ set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
+ mutex_unlock(&req->r_fill_mutex);
+ aborted = true;
+ pr_warn_ratelimited("forward tid %llu seq overflow\n", tid);
} else {
/* resend. forward race not possible; mds would drop */
dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
dout("handle_lease from mds%d\n", mds);
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
goto bad;
dname.len, dname.name);
mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
-
if (!inode) {
dout("handle_lease no inode %llx\n", vino.ino);
goto release;
out:
mutex_unlock(&session->s_mutex);
iput(inode);
+
+ ceph_dec_mds_stopping_blocker(mdsc);
return;
bad:
+ ceph_dec_mds_stopping_blocker(mdsc);
+
pr_err("corrupt lease message\n");
ceph_msg_dump(msg);
}
}
init_completion(&mdsc->safe_umount_waiters);
+ spin_lock_init(&mdsc->stopping_lock);
+ atomic_set(&mdsc->stopping_blockers, 0);
+ init_completion(&mdsc->stopping_waiter);
init_waitqueue_head(&mdsc->session_close_wq);
INIT_LIST_HEAD(&mdsc->waiting_for_map);
mdsc->quotarealms_inodes = RB_ROOT;
CEPHFS_FEATURE_ALTERNATE_NAME,
CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
CEPHFS_FEATURE_OP_GETVXATTR,
+ CEPHFS_FEATURE_32BITS_RETRY_FWD,
- CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_OP_GETVXATTR,
+ CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_32BITS_RETRY_FWD,
};
#define CEPHFS_FEATURES_CLIENT_SUPPORTED { \
CEPHFS_FEATURE_MULTI_RECONNECT, \
CEPHFS_FEATURE_DELEG_INO, \
CEPHFS_FEATURE_METRIC_COLLECT, \
+ CEPHFS_FEATURE_ALTERNATE_NAME, \
CEPHFS_FEATURE_NOTIFY_SESSION_STATE, \
CEPHFS_FEATURE_OP_GETVXATTR, \
+ CEPHFS_FEATURE_32BITS_RETRY_FWD, \
}
/*
s32 dir_pin;
struct ceph_timespec btime;
struct ceph_timespec snap_btime;
+ u8 *fscrypt_auth;
+ u8 *fscrypt_file;
+ u32 fscrypt_auth_len;
+ u32 fscrypt_file_len;
u64 rsnaps;
u64 change_attr;
};
struct ceph_mds_reply_dir_entry {
+ bool is_nokey;
char *name;
u32 name_len;
+ u32 raw_hash;
struct ceph_mds_reply_lease *lease;
struct ceph_mds_reply_info_in inode;
loff_t offset;
struct ceph_mds_reply_info_in diri, targeti;
struct ceph_mds_reply_dirfrag *dirfrag;
char *dname;
+ u8 *altname;
u32 dname_len;
+ u32 altname_len;
struct ceph_mds_reply_lease *dlease;
struct ceph_mds_reply_xattr xattr_info;
struct inode *r_parent; /* parent dir inode */
struct inode *r_target_inode; /* resulting inode */
+ struct inode *r_new_inode; /* new inode (for creates) */
#define CEPH_MDS_R_DIRECT_IS_HASH (1) /* r_direct_hash is valid */
#define CEPH_MDS_R_ABORTED (2) /* call was aborted */
#define CEPH_MDS_R_DID_PREPOPULATE (6) /* prepopulated readdir */
#define CEPH_MDS_R_PARENT_LOCKED (7) /* is r_parent->i_rwsem wlocked? */
#define CEPH_MDS_R_ASYNC (8) /* async request */
+#define CEPH_MDS_R_FSCRYPT_FILE (9) /* must marshal fscrypt_file field */
unsigned long r_req_flags;
struct mutex r_fill_mutex;
union ceph_mds_request_args r_args;
+
+ struct ceph_fscrypt_auth *r_fscrypt_auth;
+ u64 r_fscrypt_file;
+
+ u8 *r_altname; /* fscrypt binary crypttext for long filenames */
+ u32 r_altname_len; /* length of r_altname */
+
int r_fmode; /* file mode, if expecting cap */
int r_request_release_offset;
const struct cred *r_cred;
};
enum {
- CEPH_MDSC_STOPPING_BEGIN = 1,
- CEPH_MDSC_STOPPING_FLUSHED = 2,
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHING = 2,
+ CEPH_MDSC_STOPPING_FLUSHED = 3,
};
/*
struct ceph_mds_session **sessions; /* NULL for mds if no session */
atomic_t num_sessions;
int max_sessions; /* len of sessions array */
- int stopping; /* true if shutting down */
+
+ spinlock_t stopping_lock; /* protect snap_empty */
+ int stopping; /* the stage of shutting down */
+ atomic_t stopping_blockers;
+ struct completion stopping_waiter;
atomic64_t quotarealms_count; /* # realms with quota */
/*
}
extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base,
- int stop_on_nosnap);
+ int for_wire);
extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry);
extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
struct inode *inode;
struct ceph_inode_info *ci;
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
if (msg->front.iov_len < sizeof(*h)) {
pr_err("%s corrupt message mds%d len %d\n", __func__,
session->s_mds, (int)msg->front.iov_len);
ceph_msg_dump(msg);
- return;
+ goto out;
}
- /* increment msg sequence number */
- mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
- mutex_unlock(&session->s_mutex);
-
/* lookup inode */
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
inode = ceph_find_inode(sb, vino);
if (!inode) {
pr_warn("Failed to find inode %llu\n", vino.ino);
- return;
+ goto out;
}
ci = ceph_inode(inode);
spin_unlock(&ci->i_ceph_lock);
iput(inode);
+out:
+ ceph_dec_mds_stopping_blocker(mdsc);
}
static struct ceph_quotarealm_inode *
int locked_rwsem = 0;
bool close_sessions = false;
+ if (!ceph_inc_mds_stopping_blocker(mdsc, session))
+ return;
+
/* decode */
if (msg->front.iov_len < sizeof(*h))
goto bad;
dout("%s from mds%d op %s split %llx tracelen %d\n", __func__,
mds, ceph_snap_op_name(op), split, trace_len);
- mutex_lock(&session->s_mutex);
- inc_session_sequence(session);
- mutex_unlock(&session->s_mutex);
-
down_write(&mdsc->snap_rwsem);
locked_rwsem = 1;
up_write(&mdsc->snap_rwsem);
flush_snaps(mdsc);
+ ceph_dec_mds_stopping_blocker(mdsc);
return;
bad:
if (locked_rwsem)
up_write(&mdsc->snap_rwsem);
+ ceph_dec_mds_stopping_blocker(mdsc);
+
if (close_sessions)
ceph_mdsc_close_sessions(mdsc);
return;
#include "super.h"
#include "mds_client.h"
#include "cache.h"
+#include "crypto.h"
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/decode.h>
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
dout("put_super\n");
+ ceph_fscrypt_free_dummy_policy(fsc);
ceph_mdsc_close_sessions(fsc->mdsc);
}
Opt_recover_session,
Opt_source,
Opt_mon_addr,
+ Opt_test_dummy_encryption,
/* string args above */
Opt_dirstat,
Opt_rbytes,
Opt_copyfrom,
Opt_wsync,
Opt_pagecache,
+ Opt_sparseread,
};
enum ceph_recover_session_mode {
fsparam_string ("fsc", Opt_fscache), // fsc=...
fsparam_flag_no ("ino32", Opt_ino32),
fsparam_string ("mds_namespace", Opt_mds_namespace),
+ fsparam_string ("mon_addr", Opt_mon_addr),
fsparam_flag_no ("poolperm", Opt_poolperm),
fsparam_flag_no ("quotadf", Opt_quotadf),
fsparam_u32 ("rasize", Opt_rasize),
fsparam_u32 ("rsize", Opt_rsize),
fsparam_string ("snapdirname", Opt_snapdirname),
fsparam_string ("source", Opt_source),
- fsparam_string ("mon_addr", Opt_mon_addr),
+ fsparam_flag ("test_dummy_encryption", Opt_test_dummy_encryption),
+ fsparam_string ("test_dummy_encryption", Opt_test_dummy_encryption),
fsparam_u32 ("wsize", Opt_wsize),
fsparam_flag_no ("wsync", Opt_wsync),
fsparam_flag_no ("pagecache", Opt_pagecache),
+ fsparam_flag_no ("sparseread", Opt_sparseread),
{}
};
else
fsopt->flags &= ~CEPH_MOUNT_OPT_NOPAGECACHE;
break;
+ case Opt_sparseread:
+ if (result.negated)
+ fsopt->flags &= ~CEPH_MOUNT_OPT_SPARSEREAD;
+ else
+ fsopt->flags |= CEPH_MOUNT_OPT_SPARSEREAD;
+ break;
+ case Opt_test_dummy_encryption:
+#ifdef CONFIG_FS_ENCRYPTION
+ fscrypt_free_dummy_policy(&fsopt->dummy_enc_policy);
+ ret = fscrypt_parse_test_dummy_encryption(param,
+ &fsopt->dummy_enc_policy);
+ if (ret == -EINVAL) {
+ warnfc(fc, "Value of option \"%s\" is unrecognized",
+ param->key);
+ } else if (ret == -EEXIST) {
+ warnfc(fc, "Conflicting test_dummy_encryption options");
+ ret = -EINVAL;
+ }
+#else
+ warnfc(fc,
+ "FS encryption not supported: test_dummy_encryption mount option ignored");
+#endif
+ break;
default:
BUG();
}
kfree(args->server_path);
kfree(args->fscache_uniq);
kfree(args->mon_addr);
+ fscrypt_free_dummy_policy(&args->dummy_enc_policy);
kfree(args);
}
if (!(fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS))
seq_puts(m, ",wsync");
-
if (fsopt->flags & CEPH_MOUNT_OPT_NOPAGECACHE)
seq_puts(m, ",nopagecache");
+ if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+ seq_puts(m, ",sparseread");
+
+ fscrypt_show_test_dummy_encryption(m, ',', root->d_sb);
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
seq_printf(m, ",wsize=%u", fsopt->wsize);
return root;
}
+#ifdef CONFIG_FS_ENCRYPTION
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+ struct fs_context *fc,
+ struct ceph_mount_options *fsopt)
+{
+ struct ceph_fs_client *fsc = sb->s_fs_info;
+
+ if (!fscrypt_is_dummy_policy_set(&fsopt->dummy_enc_policy))
+ return 0;
+
+ /* No changing encryption context on remount. */
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
+ !fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+ &fsc->fsc_dummy_enc_policy))
+ return 0;
+ errorfc(fc, "Can't set test_dummy_encryption on remount");
+ return -EINVAL;
+ }
+
+ /* Also make sure fsopt doesn't contain a conflicting value. */
+ if (fscrypt_is_dummy_policy_set(&fsc->fsc_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&fsopt->dummy_enc_policy,
+ &fsc->fsc_dummy_enc_policy))
+ return 0;
+ errorfc(fc, "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+
+ fsc->fsc_dummy_enc_policy = fsopt->dummy_enc_policy;
+ memset(&fsopt->dummy_enc_policy, 0, sizeof(fsopt->dummy_enc_policy));
+
+ warnfc(fc, "test_dummy_encryption mode enabled");
+ return 0;
+}
+#else
+static int ceph_apply_test_dummy_encryption(struct super_block *sb,
+ struct fs_context *fc,
+ struct ceph_mount_options *fsopt)
+{
+ return 0;
+}
+#endif
+
/*
* mount: join the ceph cluster, and open root directory.
*/
goto out;
}
+ err = ceph_apply_test_dummy_encryption(fsc->sb, fc,
+ fsc->mount_options);
+ if (err)
+ goto out;
+
dout("mount opening path '%s'\n", path);
ceph_fs_debugfs_init(fsc);
out:
mutex_unlock(&fsc->client->mount_mutex);
+ ceph_fscrypt_free_dummy_policy(fsc);
return ERR_PTR(err);
}
s->s_time_max = U32_MAX;
s->s_flags |= SB_NODIRATIME | SB_NOATIME;
+ ceph_fscrypt_set_ops(s);
+
ret = set_anon_super_fc(s, fc);
if (ret != 0)
fsc->sb = NULL;
static int ceph_reconfigure_fc(struct fs_context *fc)
{
+ int err;
struct ceph_parse_opts_ctx *pctx = fc->fs_private;
struct ceph_mount_options *fsopt = pctx->opts;
- struct ceph_fs_client *fsc = ceph_sb_to_client(fc->root->d_sb);
+ struct super_block *sb = fc->root->d_sb;
+ struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
+
+ err = ceph_apply_test_dummy_encryption(sb, fc, fsopt);
+ if (err)
+ return err;
if (fsopt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
ceph_set_mount_opt(fsc, ASYNC_DIROPS);
else
ceph_clear_mount_opt(fsc, ASYNC_DIROPS);
+ if (fsopt->flags & CEPH_MOUNT_OPT_SPARSEREAD)
+ ceph_set_mount_opt(fsc, SPARSEREAD);
+ else
+ ceph_clear_mount_opt(fsc, SPARSEREAD);
+
if (strcmp_null(fsc->mount_options->mon_addr, fsopt->mon_addr)) {
kfree(fsc->mount_options->mon_addr);
fsc->mount_options->mon_addr = fsopt->mon_addr;
pr_notice("ceph: monitor addresses recorded, but not used for reconnection");
}
- sync_filesystem(fc->root->d_sb);
+ sync_filesystem(sb);
return 0;
}
return -ENOMEM;
}
+/*
+ * Return true if it successfully increases the blocker counter,
+ * or false if the mdsc is in stopping and flushed state.
+ */
+static bool __inc_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ spin_lock(&mdsc->stopping_lock);
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING) {
+ spin_unlock(&mdsc->stopping_lock);
+ return false;
+ }
+ atomic_inc(&mdsc->stopping_blockers);
+ spin_unlock(&mdsc->stopping_lock);
+ return true;
+}
+
+static void __dec_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ spin_lock(&mdsc->stopping_lock);
+ if (!atomic_dec_return(&mdsc->stopping_blockers) &&
+ mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHING)
+ complete_all(&mdsc->stopping_waiter);
+ spin_unlock(&mdsc->stopping_lock);
+}
+
+/* For metadata IO requests */
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session)
+{
+ mutex_lock(&session->s_mutex);
+ inc_session_sequence(session);
+ mutex_unlock(&session->s_mutex);
+
+ return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ __dec_stopping_blocker(mdsc);
+}
+
+/* For data IO requests */
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ return __inc_stopping_blocker(mdsc);
+}
+
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc)
+{
+ __dec_stopping_blocker(mdsc);
+}
+
static void ceph_kill_sb(struct super_block *s)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(s);
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ bool wait;
dout("kill_sb %p\n", s);
- ceph_mdsc_pre_umount(fsc->mdsc);
+ ceph_mdsc_pre_umount(mdsc);
flush_fs_workqueues(fsc);
/*
* Though the kill_anon_super() will finally trigger the
- * sync_filesystem() anyway, we still need to do it here
- * and then bump the stage of shutdown to stop the work
- * queue as earlier as possible.
+ * sync_filesystem() anyway, we still need to do it here and
+ * then bump the stage of shutdown. This will allow us to
+ * drop any further message, which will increase the inodes'
+ * i_count reference counters but makes no sense any more,
+ * from MDSs.
+ *
+ * Without this when evicting the inodes it may fail in the
+ * kill_anon_super(), which will trigger a warning when
+ * destroying the fscrypt keyring and then possibly trigger
+ * a further crash in ceph module when the iput() tries to
+ * evict the inodes later.
*/
sync_filesystem(s);
- fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+ spin_lock(&mdsc->stopping_lock);
+ mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHING;
+ wait = !!atomic_read(&mdsc->stopping_blockers);
+ spin_unlock(&mdsc->stopping_lock);
+
+ if (wait && atomic_read(&mdsc->stopping_blockers)) {
+ long timeleft = wait_for_completion_killable_timeout(
+ &mdsc->stopping_waiter,
+ fsc->client->options->mount_timeout);
+ if (!timeleft) /* timed out */
+ pr_warn("umount timed out, %ld\n", timeleft);
+ else if (timeleft < 0) /* killed */
+ pr_warn("umount was killed, %ld\n", timeleft);
+ }
+ mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
#include <linux/hashtable.h>
#include <linux/ceph/libceph.h>
+#include "crypto.h"
/* large granularity for statfs utilization stats to facilitate
* large volume sizes on 32-bit machines. */
#define CEPH_MOUNT_OPT_NOCOPYFROM (1<<14) /* don't use RADOS 'copy-from' op */
#define CEPH_MOUNT_OPT_ASYNC_DIROPS (1<<15) /* allow async directory ops */
#define CEPH_MOUNT_OPT_NOPAGECACHE (1<<16) /* bypass pagecache altogether */
+#define CEPH_MOUNT_OPT_SPARSEREAD (1<<17) /* always do sparse reads */
#define CEPH_MOUNT_OPT_DEFAULT \
(CEPH_MOUNT_OPT_DCACHE | \
char *server_path; /* default NULL (means "/") */
char *fscache_uniq; /* default NULL */
char *mon_addr;
+ struct fscrypt_dummy_policy dummy_enc_policy;
};
/* mount state */
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_volume *fscache;
#endif
+#ifdef CONFIG_FS_ENCRYPTION
+ struct fscrypt_dummy_policy fsc_dummy_enc_policy;
+#endif
};
-
/*
* File i/o capability. This tracks shared state with the metadata
* server that allows us to cache or writeback attributes or to read
u32 i_truncate_seq; /* last truncate to smaller size */
u64 i_truncate_size; /* and the size we last truncated down to */
int i_truncate_pending; /* still need to call vmtruncate */
+ /*
+ * For none fscrypt case it equals to i_truncate_size or it will
+ * equals to fscrypt_file_size
+ */
+ u64 i_truncate_pagecache_size;
u64 i_max_size; /* max file size authorized by mds */
u64 i_reported_size; /* (max_)size reported to or requested of mds */
struct work_struct i_work;
unsigned long i_work_mask;
+
+#ifdef CONFIG_FS_ENCRYPTION
+ u32 fscrypt_auth_len;
+ u32 fscrypt_file_len;
+ u8 *fscrypt_auth;
+ u8 *fscrypt_file;
+#endif
};
struct ceph_netfs_request_data {
/* inode.c */
struct ceph_mds_reply_info_in;
struct ceph_mds_reply_dirfrag;
+struct ceph_acl_sec_ctx;
extern const struct inode_operations ceph_file_iops;
extern void ceph_evict_inode(struct inode *inode);
extern void ceph_free_inode(struct inode *inode);
+struct inode *ceph_new_inode(struct inode *dir, struct dentry *dentry,
+ umode_t *mode, struct ceph_acl_sec_ctx *as_ctx);
+void ceph_as_ctx_to_req(struct ceph_mds_request *req,
+ struct ceph_acl_sec_ctx *as_ctx);
+
extern struct inode *ceph_get_inode(struct super_block *sb,
- struct ceph_vino vino);
+ struct ceph_vino vino,
+ struct inode *newino);
extern struct inode *ceph_get_snapdir(struct inode *parent);
extern int ceph_fill_file_size(struct inode *inode, int issued,
u32 truncate_seq, u64 truncate_size, u64 size);
}
extern int ceph_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
-extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
+
+struct ceph_iattr {
+ struct ceph_fscrypt_auth *fscrypt_auth;
+};
+
+extern int __ceph_setattr(struct inode *inode, struct iattr *attr,
+ struct ceph_iattr *cia);
extern int ceph_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct mnt_idmap *idmap,
void *sec_ctx;
u32 sec_ctxlen;
#endif
+#ifdef CONFIG_FS_ENCRYPTION
+ struct ceph_fscrypt_auth *fscrypt_auth;
+#endif
struct ceph_pagelist *pagelist;
};
struct inode *dir,
int mds, int drop, int unless);
+extern int __ceph_get_caps(struct inode *inode, struct ceph_file_info *fi,
+ int need, int want, loff_t endoff, int *got);
extern int ceph_get_caps(struct file *filp, int need, int want,
loff_t endoff, int *got);
extern int ceph_try_get_caps(struct inode *inode,
extern int ceph_open(struct inode *inode, struct file *file);
extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode);
+extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
+ struct iov_iter *to, int *retry_op,
+ u64 *last_objver);
extern int ceph_release(struct inode *inode, struct file *filp);
extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
char *data, size_t len);
struct kstatfs *buf);
extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
+bool ceph_inc_mds_stopping_blocker(struct ceph_mds_client *mdsc,
+ struct ceph_mds_session *session);
+void ceph_dec_mds_stopping_blocker(struct ceph_mds_client *mdsc);
+bool ceph_inc_osd_stopping_blocker(struct ceph_mds_client *mdsc);
+void ceph_dec_osd_stopping_blocker(struct ceph_mds_client *mdsc);
#endif /* _FS_CEPH_SUPER_H */
return ret;
}
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static bool ceph_vxattrcb_fscrypt_auth_exists(struct ceph_inode_info *ci)
+{
+ return ci->fscrypt_auth_len;
+}
+
+static ssize_t ceph_vxattrcb_fscrypt_auth(struct ceph_inode_info *ci,
+ char *val, size_t size)
+{
+ if (size) {
+ if (size < ci->fscrypt_auth_len)
+ return -ERANGE;
+ memcpy(val, ci->fscrypt_auth, ci->fscrypt_auth_len);
+ }
+ return ci->fscrypt_auth_len;
+}
+#endif /* CONFIG_FS_ENCRYPTION */
+
#define CEPH_XATTR_NAME(_type, _name) XATTR_CEPH_PREFIX #_type "." #_name
#define CEPH_XATTR_NAME2(_type, _name, _name2) \
XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
.exists_cb = NULL,
.flags = VXATTR_FLAG_READONLY,
},
+#if IS_ENABLED(CONFIG_FS_ENCRYPTION)
+ {
+ .name = "ceph.fscrypt.auth",
+ .name_size = sizeof("ceph.fscrypt.auth"),
+ .getxattr_cb = ceph_vxattrcb_fscrypt_auth,
+ .exists_cb = ceph_vxattrcb_fscrypt_auth_exists,
+ .flags = VXATTR_FLAG_READONLY,
+ },
+#endif /* CONFIG_FS_ENCRYPTION */
{ .name = NULL, 0 } /* Required table terminator */
};
#ifdef CONFIG_CEPH_FS_SECURITY_LABEL
security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
#endif
+#ifdef CONFIG_FS_ENCRYPTION
+ kfree(as_ctx->fscrypt_auth);
+#endif
if (as_ctx->pagelist)
ceph_pagelist_release(as_ctx->pagelist);
}
/*
* Calculate the time in jiffies until a dentry/attributes are valid
*/
-static u64 time_to_jiffies(u64 sec, u32 nsec)
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec)
{
if (sec || nsec) {
struct timespec64 ts = {
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
{
fuse_dentry_settime(entry,
- time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
-}
-
-static u64 attr_timeout(struct fuse_attr_out *o)
-{
- return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
-}
-
-u64 entry_attr_timeout(struct fuse_entry_out *o)
-{
- return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
+ fuse_time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
}
void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
goto invalid;
forget_all_cached_acls(inode);
- fuse_change_attributes(inode, &outarg.attr,
- entry_attr_timeout(&outarg),
+ fuse_change_attributes(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
attr_version);
fuse_change_entry_timeout(entry, &outarg);
} else if (inode) {
S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}
+static bool fuse_valid_size(u64 size)
+{
+ return size <= LLONG_MAX;
+}
+
bool fuse_invalid_attr(struct fuse_attr *attr)
{
- return !fuse_valid_type(attr->mode) ||
- attr->size > LLONG_MAX;
+ return !fuse_valid_type(attr->mode) || !fuse_valid_size(attr->size);
}
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
goto out_put_forget;
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
- &outarg->attr, entry_attr_timeout(outarg),
+ &outarg->attr, ATTR_TIMEOUT(outarg),
attr_version);
err = -ENOMEM;
if (!*inode) {
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
- &outentry.attr, entry_attr_timeout(&outentry), 0);
+ &outentry.attr, ATTR_TIMEOUT(&outentry), 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
fuse_sync_release(NULL, ff, flags);
if (err == -ENOSYS) {
fc->no_create = 1;
goto mknod;
- }
+ } else if (err == -EEXIST)
+ fuse_invalidate_entry(entry);
out_dput:
dput(res);
return err;
goto out_put_forget_req;
inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
- &outarg.attr, entry_attr_timeout(&outarg), 0);
+ &outarg.attr, ATTR_TIMEOUT(&outarg), 0);
if (!inode) {
fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1);
return -ENOMEM;
return 0;
out_put_forget_req:
+ if (err == -EEXIST)
+ fuse_invalidate_entry(entry);
kfree(forget);
return err;
}
if (!err) {
fuse_dir_changed(dir);
fuse_entry_unlinked(entry);
- } else if (err == -EINTR)
+ } else if (err == -EINTR || err == -ENOENT)
fuse_invalidate_entry(entry);
return err;
}
if (!err) {
fuse_dir_changed(dir);
fuse_entry_unlinked(entry);
- } else if (err == -EINTR)
+ } else if (err == -EINTR || err == -ENOENT)
fuse_invalidate_entry(entry);
return err;
}
/* newent will end up negative */
if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent))
fuse_entry_unlinked(newent);
- } else if (err == -EINTR) {
+ } else if (err == -EINTR || err == -ENOENT) {
/* If request was interrupted, DEITY only knows if the
rename actually took place. If the invalidation
fails (e.g. some process has CWD under the renamed
stat->blksize = 1 << blkbits;
}
+static void fuse_statx_to_attr(struct fuse_statx *sx, struct fuse_attr *attr)
+{
+ memset(attr, 0, sizeof(*attr));
+ attr->ino = sx->ino;
+ attr->size = sx->size;
+ attr->blocks = sx->blocks;
+ attr->atime = sx->atime.tv_sec;
+ attr->mtime = sx->mtime.tv_sec;
+ attr->ctime = sx->ctime.tv_sec;
+ attr->atimensec = sx->atime.tv_nsec;
+ attr->mtimensec = sx->mtime.tv_nsec;
+ attr->ctimensec = sx->ctime.tv_nsec;
+ attr->mode = sx->mode;
+ attr->nlink = sx->nlink;
+ attr->uid = sx->uid;
+ attr->gid = sx->gid;
+ attr->rdev = new_encode_dev(MKDEV(sx->rdev_major, sx->rdev_minor));
+ attr->blksize = sx->blksize;
+}
+
+static int fuse_do_statx(struct inode *inode, struct file *file,
+ struct kstat *stat)
+{
+ int err;
+ struct fuse_attr attr;
+ struct fuse_statx *sx;
+ struct fuse_statx_in inarg;
+ struct fuse_statx_out outarg;
+ struct fuse_mount *fm = get_fuse_mount(inode);
+ u64 attr_version = fuse_get_attr_version(fm->fc);
+ FUSE_ARGS(args);
+
+ memset(&inarg, 0, sizeof(inarg));
+ memset(&outarg, 0, sizeof(outarg));
+ /* Directories have separate file-handle space */
+ if (file && S_ISREG(inode->i_mode)) {
+ struct fuse_file *ff = file->private_data;
+
+ inarg.getattr_flags |= FUSE_GETATTR_FH;
+ inarg.fh = ff->fh;
+ }
+ /* For now leave sync hints as the default, request all stats. */
+ inarg.sx_flags = 0;
+ inarg.sx_mask = STATX_BASIC_STATS | STATX_BTIME;
+ args.opcode = FUSE_STATX;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.out_numargs = 1;
+ args.out_args[0].size = sizeof(outarg);
+ args.out_args[0].value = &outarg;
+ err = fuse_simple_request(fm, &args);
+ if (err)
+ return err;
+
+ sx = &outarg.stat;
+ if (((sx->mask & STATX_SIZE) && !fuse_valid_size(sx->size)) ||
+ ((sx->mask & STATX_TYPE) && (!fuse_valid_type(sx->mode) ||
+ inode_wrong_type(inode, sx->mode)))) {
+ make_bad_inode(inode);
+ return -EIO;
+ }
+
+ fuse_statx_to_attr(&outarg.stat, &attr);
+ if ((sx->mask & STATX_BASIC_STATS) == STATX_BASIC_STATS) {
+ fuse_change_attributes(inode, &attr, &outarg.stat,
+ ATTR_TIMEOUT(&outarg), attr_version);
+ }
+
+ if (stat) {
+ stat->result_mask = sx->mask & (STATX_BASIC_STATS | STATX_BTIME);
+ stat->btime.tv_sec = sx->btime.tv_sec;
+ stat->btime.tv_nsec = min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+ fuse_fillattr(inode, &attr, stat);
+ stat->result_mask |= STATX_TYPE;
+ }
+
+ return 0;
+}
+
static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct file *file)
{
fuse_make_bad(inode);
err = -EIO;
} else {
- fuse_change_attributes(inode, &outarg.attr,
- attr_timeout(&outarg),
+ fuse_change_attributes(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
attr_version);
if (stat)
fuse_fillattr(inode, &outarg.attr, stat);
unsigned int flags)
{
struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_conn *fc = get_fuse_conn(inode);
int err = 0;
bool sync;
u32 inval_mask = READ_ONCE(fi->inval_mask);
u32 cache_mask = fuse_get_cache_mask(inode);
- if (flags & AT_STATX_FORCE_SYNC)
+
+ /* FUSE only supports basic stats and possibly btime */
+ request_mask &= STATX_BASIC_STATS | STATX_BTIME;
+retry:
+ if (fc->no_statx)
+ request_mask &= STATX_BASIC_STATS;
+
+ if (!request_mask)
+ sync = false;
+ else if (flags & AT_STATX_FORCE_SYNC)
sync = true;
else if (flags & AT_STATX_DONT_SYNC)
sync = false;
if (sync) {
forget_all_cached_acls(inode);
- err = fuse_do_getattr(inode, stat, file);
+ /* Try statx if BTIME is requested */
+ if (!fc->no_statx && (request_mask & ~STATX_BASIC_STATS)) {
+ err = fuse_do_statx(inode, file, stat);
+ if (err == -ENOSYS) {
+ fc->no_statx = 1;
+ goto retry;
+ }
+ } else {
+ err = fuse_do_getattr(inode, stat, file);
+ }
} else if (stat) {
generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
+ if (test_bit(FUSE_I_BTIME, &fi->state)) {
+ stat->btime = fi->i_btime;
+ stat->result_mask |= STATX_BTIME;
+ }
}
return err;
/* FIXME: clear I_DIRTY_SYNC? */
}
- fuse_change_attributes_common(inode, &outarg.attr,
- attr_timeout(&outarg),
+ fuse_change_attributes_common(inode, &outarg.attr, NULL,
+ ATTR_TIMEOUT(&outarg),
fuse_get_cache_mask(inode));
oldsize = inode->i_size;
/* see the comment in fuse_change_attributes() */
#include <linux/uio.h>
#include <linux/fs.h>
#include <linux/filelock.h>
-#include <linux/file.h>
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
unsigned int open_flags, int opcode,
fuse_release_nowrite(inode);
}
-struct fuse_flush_args {
- struct fuse_args args;
- struct fuse_flush_in inarg;
- struct work_struct work;
- struct file *file;
-};
-
-static int fuse_do_flush(struct fuse_flush_args *fa)
+static int fuse_flush(struct file *file, fl_owner_t id)
{
- int err;
- struct inode *inode = file_inode(fa->file);
+ struct inode *inode = file_inode(file);
struct fuse_mount *fm = get_fuse_mount(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_flush_in inarg;
+ FUSE_ARGS(args);
+ int err;
+
+ if (fuse_is_bad(inode))
+ return -EIO;
+
+ if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
+ return 0;
err = write_inode_now(inode, 1);
if (err)
- goto out;
+ return err;
inode_lock(inode);
fuse_sync_writes(inode);
inode_unlock(inode);
- err = filemap_check_errors(fa->file->f_mapping);
+ err = filemap_check_errors(file->f_mapping);
if (err)
- goto out;
+ return err;
err = 0;
if (fm->fc->no_flush)
goto inval_attr_out;
- err = fuse_simple_request(fm, &fa->args);
+ memset(&inarg, 0, sizeof(inarg));
+ inarg.fh = ff->fh;
+ inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
+ args.opcode = FUSE_FLUSH;
+ args.nodeid = get_node_id(inode);
+ args.in_numargs = 1;
+ args.in_args[0].size = sizeof(inarg);
+ args.in_args[0].value = &inarg;
+ args.force = true;
+
+ err = fuse_simple_request(fm, &args);
if (err == -ENOSYS) {
fm->fc->no_flush = 1;
err = 0;
*/
if (!err && fm->fc->writeback_cache)
fuse_invalidate_attr_mask(inode, STATX_BLOCKS);
-
-out:
- fput(fa->file);
- kfree(fa);
return err;
}
-static void fuse_flush_async(struct work_struct *work)
-{
- struct fuse_flush_args *fa = container_of(work, typeof(*fa), work);
-
- fuse_do_flush(fa);
-}
-
-static int fuse_flush(struct file *file, fl_owner_t id)
-{
- struct fuse_flush_args *fa;
- struct inode *inode = file_inode(file);
- struct fuse_mount *fm = get_fuse_mount(inode);
- struct fuse_file *ff = file->private_data;
-
- if (fuse_is_bad(inode))
- return -EIO;
-
- if (ff->open_flags & FOPEN_NOFLUSH && !fm->fc->writeback_cache)
- return 0;
-
- fa = kzalloc(sizeof(*fa), GFP_KERNEL);
- if (!fa)
- return -ENOMEM;
-
- fa->inarg.fh = ff->fh;
- fa->inarg.lock_owner = fuse_lock_owner_id(fm->fc, id);
- fa->args.opcode = FUSE_FLUSH;
- fa->args.nodeid = get_node_id(inode);
- fa->args.in_numargs = 1;
- fa->args.in_args[0].size = sizeof(fa->inarg);
- fa->args.in_args[0].value = &fa->inarg;
- fa->args.force = true;
- fa->file = get_file(file);
-
- /* Don't wait if the task is exiting */
- if (current->flags & PF_EXITING) {
- INIT_WORK(&fa->work, fuse_flush_async);
- schedule_work(&fa->work);
- return 0;
- }
-
- return fuse_do_flush(fa);
-}
-
int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
int datasync, int opcode)
{
int write = flags & FUSE_DIO_WRITE;
int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fm->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
int err = 0;
struct fuse_io_args *ia;
unsigned int max_pages;
+ bool fopen_direct_io = ff->open_flags & FOPEN_DIRECT_IO;
max_pages = iov_iter_npages(iter, fc->max_pages);
ia = fuse_io_alloc(io, max_pages);
if (!ia)
return -ENOMEM;
+ if (fopen_direct_io && fc->direct_io_relax) {
+ res = filemap_write_and_wait_range(mapping, pos, pos + count - 1);
+ if (res) {
+ fuse_io_free(ia);
+ return res;
+ }
+ }
if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) {
if (!write)
inode_lock(inode);
inode_unlock(inode);
}
+ if (fopen_direct_io && write) {
+ res = invalidate_inode_pages2_range(mapping, idx_from, idx_to);
+ if (res) {
+ fuse_io_free(ia);
+ return res;
+ }
+ }
+
io->should_dirty = !write && user_backed_iter(iter);
while (count) {
ssize_t nres;
static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fm->fc;
/* DAX mmap is superior to direct_io mmap */
if (FUSE_IS_DAX(file_inode(file)))
return fuse_dax_mmap(file, vma);
if (ff->open_flags & FOPEN_DIRECT_IO) {
- /* Can't provide the coherency needed for MAP_SHARED */
- if (vma->vm_flags & VM_MAYSHARE)
+ /* Can't provide the coherency needed for MAP_SHARED
+ * if FUSE_DIRECT_IO_RELAX isn't set.
+ */
+ if ((vma->vm_flags & VM_MAYSHARE) && !fc->direct_io_relax)
return -ENODEV;
invalidate_inode_pages2(file->f_mapping);
preserve the original mode */
umode_t orig_i_mode;
+ /* Cache birthtime */
+ struct timespec64 i_btime;
+
/** 64 bit inode number */
u64 orig_ino;
FUSE_I_SIZE_UNSTABLE,
/* Bad inode */
FUSE_I_BAD,
+ /* Has btime */
+ FUSE_I_BTIME,
};
struct fuse_conn;
/* Is tmpfile not implemented by fs? */
unsigned int no_tmpfile:1;
+ /* relax restrictions in FOPEN_DIRECT_IO mode */
+ unsigned int direct_io_relax:1;
+
+ /* Is statx not implemented by fs? */
+ unsigned int no_statx:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
* Change attributes of an inode
*/
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx,
u64 attr_valid, u64 attr_version);
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask);
u32 fuse_get_cache_mask(struct inode *inode);
void fuse_invalidate_atime(struct inode *inode);
-u64 entry_attr_timeout(struct fuse_entry_out *o);
+u64 fuse_time_to_jiffies(u64 sec, u32 nsec);
+#define ATTR_TIMEOUT(o) \
+ fuse_time_to_jiffies((o)->attr_valid, (o)->attr_valid_nsec)
+
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o);
/**
return NULL;
fi->i_time = 0;
- fi->inval_mask = 0;
+ fi->inval_mask = ~0;
fi->nodeid = 0;
fi->nlookup = 0;
fi->attr_version = 0;
}
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx,
u64 attr_valid, u32 cache_mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
fi->attr_version = atomic64_inc_return(&fc->attr_version);
fi->i_time = attr_valid;
- WRITE_ONCE(fi->inval_mask, 0);
+ /* Clear basic stats from invalid mask */
+ set_mask_bits(&fi->inval_mask, STATX_BASIC_STATS, 0);
inode->i_ino = fuse_squash_ino(attr->ino);
inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
if (!(cache_mask & STATX_CTIME)) {
inode_set_ctime(inode, attr->ctime, attr->ctimensec);
}
+ if (sx) {
+ /* Sanitize nsecs */
+ sx->btime.tv_nsec =
+ min_t(u32, sx->btime.tv_nsec, NSEC_PER_SEC - 1);
+
+ /*
+ * Btime has been queried, cache is valid (whether or not btime
+ * is available or not) so clear STATX_BTIME from inval_mask.
+ *
+ * Availability of the btime attribute is indicated in
+ * FUSE_I_BTIME
+ */
+ set_mask_bits(&fi->inval_mask, STATX_BTIME, 0);
+ if (sx->mask & STATX_BTIME) {
+ set_bit(FUSE_I_BTIME, &fi->state);
+ fi->i_btime.tv_sec = sx->btime.tv_sec;
+ fi->i_btime.tv_nsec = sx->btime.tv_nsec;
+ }
+ }
if (attr->blksize != 0)
inode->i_blkbits = ilog2(attr->blksize);
}
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ struct fuse_statx *sx,
u64 attr_valid, u64 attr_version)
{
struct fuse_conn *fc = get_fuse_conn(inode);
}
old_mtime = inode->i_mtime;
- fuse_change_attributes_common(inode, attr, attr_valid, cache_mask);
+ fuse_change_attributes_common(inode, attr, sx, attr_valid, cache_mask);
oldsize = inode->i_size;
/*
spin_lock(&fi->lock);
fi->nlookup++;
spin_unlock(&fi->lock);
- fuse_change_attributes(inode, attr, attr_valid, attr_version);
+ fuse_change_attributes(inode, attr, NULL, attr_valid, attr_version);
return inode;
}
fc->init_security = 1;
if (flags & FUSE_CREATE_SUPP_GROUP)
fc->create_supp_group = 1;
+ if (flags & FUSE_DIRECT_IO_RELAX)
+ fc->direct_io_relax = 1;
} else {
ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
- FUSE_HAS_EXPIRE_ONLY;
+ FUSE_HAS_EXPIRE_ONLY | FUSE_DIRECT_IO_RELAX;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
flags |= FUSE_MAP_ALIGNMENT;
spin_unlock(&fi->lock);
forget_all_cached_acls(inode);
- fuse_change_attributes(inode, &o->attr,
- entry_attr_timeout(o),
+ fuse_change_attributes(inode, &o->attr, NULL,
+ ATTR_TIMEOUT(o),
attr_version);
/*
* The other branch comes via fuse_iget()
*/
} else {
inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
- &o->attr, entry_attr_timeout(o),
+ &o->attr, ATTR_TIMEOUT(o),
attr_version);
if (!inode)
inode = ERR_PTR(-ENOMEM);
dput(dentry);
dentry = alias;
}
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ if (!IS_ERR(inode)) {
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fi->lock);
+ fi->nlookup--;
+ spin_unlock(&fi->lock);
+ }
return PTR_ERR(dentry);
+ }
}
if (fc->readdirplus_auto)
set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
int ret;
/*
- * Even if we didn't write any pages here, we might still be holding
+ * Even if we didn't write enough pages here, we might still be holding
* dirty pages in the ail. We forcibly flush the ail because we don't
* want balance_dirty_pages() to loop indefinitely trying to write out
* pages held in the ail that it can't find.
*/
ret = iomap_writepages(mapping, wbc, &wpc, &gfs2_writeback_ops);
- if (ret == 0)
+ if (ret == 0 && wbc->nr_to_write > 0)
set_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
return ret;
}
* not be suitable for data integrity
* writeout).
*/
- *done_index = folio->index +
- folio_nr_pages(folio);
+ *done_index = folio_next_index(folio);
ret = 1;
break;
}
int error;
down_write(&ip->i_rw_mutex);
- page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
+ page = grab_cache_page(inode->i_mapping, 0);
error = -ENOMEM;
if (!page)
goto out;
wake_up_glock(gl);
call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ wake_up(&sdp->sd_kill_wait);
}
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
*
- * Returns: 1 if there is a blocked holder at the head of the list
+ * Returns true on success (i.e., progress was made or there are no waiters).
*/
-static int do_promote(struct gfs2_glock *gl)
+static bool do_promote(struct gfs2_glock *gl)
{
struct gfs2_holder *gh, *current_gh;
* If we get here, it means we may not grant this
* holder for some reason. If this holder is at the
* head of the list, it means we have a blocked holder
- * at the head, so return 1.
+ * at the head, so return false.
*/
if (list_is_first(&gh->gh_list, &gl->gl_holders))
- return 1;
+ return false;
do_error(gl, 0);
break;
}
if (!current_gh)
current_gh = gh;
}
- return 0;
+ return true;
}
/**
if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
/* move to back of queue and try next entry */
if (ret & LM_OUT_CANCELED) {
- if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
- list_move_tail(&gh->gh_list, &gl->gl_holders);
+ list_move_tail(&gh->gh_list, &gl->gl_holders);
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
+ if (do_promote(gl))
+ goto out;
goto retry;
}
/* Some error or failed "try lock" - report it */
gh && !(gh->gh_flags & LM_FLAG_NOEXP))
goto skip_inval;
- lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
- LM_FLAG_PRIORITY);
+ lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP);
GLOCK_BUG_ON(gl, gl->gl_state == target);
GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
} else {
if (test_bit(GLF_DEMOTE, &gl->gl_flags))
gfs2_demote_wake(gl);
- if (do_promote(gl) == 0)
+ if (do_promote(gl))
goto out_unlock;
gh = find_first_waiter(gl);
gl->gl_target = gh->gh_state;
* step entirely.
*/
if (gfs2_try_evict(gl)) {
- if (test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+ if (test_bit(SDF_KILL, &sdp->sd_flags))
goto out;
if (gfs2_queue_verify_evict(gl))
return;
GFS2_BLKST_UNLINKED);
if (IS_ERR(inode)) {
if (PTR_ERR(inode) == -EAGAIN &&
- !test_bit(SDF_DEACTIVATING, &sdp->sd_flags) &&
+ !test_bit(SDF_KILL, &sdp->sd_flags) &&
gfs2_queue_verify_evict(gl))
return;
} else {
out_free:
gfs2_glock_dealloc(&gl->gl_rcu);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
- wake_up(&sdp->sd_glock_wait);
+ wake_up(&sdp->sd_kill_wait);
out:
return ret;
}
if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
continue;
- if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
- insert_pt = &gh2->gh_list;
}
trace_gfs2_glock_queue(gh, 1);
gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
if (likely(insert_pt == NULL)) {
list_add_tail(&gh->gh_list, &gl->gl_holders);
- if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
- goto do_cancel;
return;
}
list_add_tail(&gh->gh_list, insert_pt);
-do_cancel:
gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
- if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
- spin_unlock(&gl->gl_lockref.lock);
- if (sdp->sd_lockstruct.ls_ops->lm_cancel)
- sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
- spin_lock(&gl->gl_lockref.lock);
- }
+ spin_unlock(&gl->gl_lockref.lock);
+ if (sdp->sd_lockstruct.ls_ops->lm_cancel)
+ sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
+ spin_lock(&gl->gl_lockref.lock);
return;
trap_recursive:
flush_workqueue(glock_workqueue);
glock_hash_walk(clear_glock, sdp);
flush_workqueue(glock_workqueue);
- wait_event_timeout(sdp->sd_glock_wait,
+ wait_event_timeout(sdp->sd_kill_wait,
atomic_read(&sdp->sd_glock_disposal) == 0,
HZ * 600);
glock_hash_walk(dump_glock_func, sdp);
*p++ = 'e';
if (flags & LM_FLAG_ANY)
*p++ = 'A';
- if (flags & LM_FLAG_PRIORITY)
- *p++ = 'p';
if (flags & LM_FLAG_NODE_SCOPE)
*p++ = 'n';
if (flags & GL_ASYNC)
* also be granted in SHARED. The preferred state is whichever is compatible
* with other granted locks, or the specified state if no other locks exist.
*
- * LM_FLAG_PRIORITY
- * Override fairness considerations. Suppose a lock is held in a shared state
- * and there is a pending request for the deferred state. A shared lock
- * request with the priority flag would be allowed to bypass the deferred
- * request and directly join the other shared lock. A shared lock request
- * without the priority flag might be forced to wait until the deferred
- * requested had acquired and released the lock.
- *
* LM_FLAG_NODE_SCOPE
* This holder agrees to share the lock within this node. In other words,
* the glock is held in EX mode according to DLM, but local holders on the
#define LM_FLAG_TRY_1CB 0x0002
#define LM_FLAG_NOEXP 0x0004
#define LM_FLAG_ANY 0x0008
-#define LM_FLAG_PRIORITY 0x0010
#define LM_FLAG_NODE_SCOPE 0x0020
#define GL_ASYNC 0x0040
#define GL_EXACT 0x0080
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
if (!remote || sb_rdonly(sdp->sd_vfs) ||
- test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+ test_bit(SDF_KILL, &sdp->sd_flags))
return;
if (gl->gl_demote_state == LM_ST_UNLOCKED &&
s64 qd_change_sync;
unsigned int qd_slot;
- unsigned int qd_slot_count;
+ unsigned int qd_slot_ref;
struct buffer_head *qd_bh;
struct gfs2_quota_change *qd_bh_qc;
#define GFS2_QUOTA_OFF 0
#define GFS2_QUOTA_ACCOUNT 1
#define GFS2_QUOTA_ON 2
+#define GFS2_QUOTA_QUIET 3 /* on but not complaining */
#define GFS2_DATA_DEFAULT GFS2_DATA_ORDERED
#define GFS2_DATA_WRITEBACK 1
SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are
withdrawing */
- SDF_DEACTIVATING = 15,
+ SDF_KILL = 15,
SDF_EVICTING = 16,
SDF_FROZEN = 17,
};
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_freeze_gl;
struct work_struct sd_freeze_work;
- wait_queue_head_t sd_glock_wait;
+ wait_queue_head_t sd_kill_wait;
wait_queue_head_t sd_async_glock_wait;
atomic_t sd_glock_disposal;
struct completion sd_locking_init;
* gfs2_lookup_simple callers expect ENOENT
* and do not check for NULL.
*/
- if (inode == NULL)
- return ERR_PTR(-ENOENT);
- else
- return inode;
+ if (IS_ERR_OR_NULL(inode))
+ return inode ? inode : ERR_PTR(-ENOENT);
+
+ /*
+ * Must not call back into the filesystem when allocating
+ * pages in the metadata inode's address space.
+ */
+ mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+ return inode;
}
lkf |= DLM_LKF_NOQUEUEBAST;
}
- if (gfs_flags & LM_FLAG_PRIORITY) {
- lkf |= DLM_LKF_NOORDER;
- lkf |= DLM_LKF_HEADQUE;
- }
-
if (gfs_flags & LM_FLAG_ANY) {
if (req == DLM_LOCK_PR)
lkf |= DLM_LKF_ALTCW;
gfs2_log_unlock(sdp);
}
+static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
+{
+ return atomic_read(&sdp->sd_log_pinned) +
+ atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh1);
+}
+
+static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
+{
+ return sdp->sd_jdesc->jd_blocks -
+ atomic_read(&sdp->sd_log_blks_free) +
+ atomic_read(&sdp->sd_log_blks_needed) >=
+ atomic_read(&sdp->sd_log_thresh2);
+}
+
/**
* gfs2_log_commit - Commit a transaction to the log
* @sdp: the filesystem
{
log_refund(sdp, tr);
- if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||
- ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) >
- atomic_read(&sdp->sd_log_thresh2)))
+ if (gfs2_ail_flush_reqd(sdp) || gfs2_jrnl_flush_reqd(sdp))
wake_up(&sdp->sd_logd_waitq);
}
gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
}
-static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
-{
- return (atomic_read(&sdp->sd_log_pinned) +
- atomic_read(&sdp->sd_log_blks_needed) >=
- atomic_read(&sdp->sd_log_thresh1));
-}
-
-static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp)
-{
- unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free);
-
- if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags))
- return 1;
-
- return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >=
- atomic_read(&sdp->sd_log_thresh2);
-}
-
/**
* gfs2_logd - Update log tail as Active Items get flushed to in-place blocks
* @data: Pointer to GFS2 superblock
{
struct gfs2_sbd *sdp = data;
unsigned long t = 1;
- DEFINE_WAIT(wait);
while (!kthread_should_stop()) {
+ if (gfs2_withdrawn(sdp))
+ break;
- if (gfs2_withdrawn(sdp)) {
- msleep_interruptible(HZ);
- continue;
- }
/* Check for errors writing to the journal */
if (sdp->sd_log_error) {
gfs2_lm(sdp,
"prevent further damage.\n",
sdp->sd_fsname, sdp->sd_log_error);
gfs2_withdraw(sdp);
- continue;
+ break;
}
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
GFS2_LFC_LOGD_JFLUSH_REQD);
}
- if (gfs2_ail_flush_reqd(sdp)) {
+ if (test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp)) {
+ clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags);
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
gfs2_ail1_empty(sdp, 0);
try_to_freeze();
- do {
- prepare_to_wait(&sdp->sd_logd_waitq, &wait,
- TASK_INTERRUPTIBLE);
- if (!gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop())
- t = schedule_timeout(t);
- } while(t && !gfs2_ail_flush_reqd(sdp) &&
- !gfs2_jrnl_flush_reqd(sdp) &&
- !kthread_should_stop());
- finish_wait(&sdp->sd_logd_waitq, &wait);
+ t = wait_event_interruptible_timeout(sdp->sd_logd_waitq,
+ test_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags) ||
+ gfs2_ail_flush_reqd(sdp) ||
+ gfs2_jrnl_flush_reqd(sdp) ||
+ sdp->sd_log_error ||
+ gfs2_withdrawn(sdp) ||
+ kthread_should_stop(),
+ t);
}
return 0;
* Find the folio with 'index' in the journal's mapping. Search the folio for
* the journal head if requested (cleanup == false). Release refs on the
* folio so the page cache can reclaim it. We grabbed a
- * reference on this folio twice, first when we did a find_or_create_page()
+ * reference on this folio twice, first when we did a grab_cache_page()
* to obtain the folio to add it to the bio and second when we do a
* filemap_get_folio() here to get the folio to wait on while I/O on it is being
* completed.
if (!*done)
*done = gfs2_jhead_pg_srch(jd, head, &folio->page);
- /* filemap_get_folio() and the earlier find_or_create_page() */
+ /* filemap_get_folio() and the earlier grab_cache_page() */
folio_put_refs(folio, 2);
}
for (; block < je->lblock + je->blocks; block++, dblock++) {
if (!page) {
- page = find_or_create_page(mapping,
- block >> shift, GFP_NOFS);
+ page = grab_cache_page(mapping, block >> shift);
if (!page) {
ret = -ENOMEM;
done = true;
goto fail_shrinker;
error = -ENOMEM;
- gfs_recovery_wq = alloc_workqueue("gfs_recovery",
+ gfs2_recovery_wq = alloc_workqueue("gfs2_recovery",
WQ_MEM_RECLAIM | WQ_FREEZABLE, 0);
- if (!gfs_recovery_wq)
+ if (!gfs2_recovery_wq)
goto fail_wq1;
gfs2_control_wq = alloc_workqueue("gfs2_control",
if (!gfs2_control_wq)
goto fail_wq2;
- gfs2_freeze_wq = alloc_workqueue("freeze_workqueue", 0, 0);
+ gfs2_freeze_wq = alloc_workqueue("gfs2_freeze", 0, 0);
if (!gfs2_freeze_wq)
goto fail_wq3;
fail_wq3:
destroy_workqueue(gfs2_control_wq);
fail_wq2:
- destroy_workqueue(gfs_recovery_wq);
+ destroy_workqueue(gfs2_recovery_wq);
fail_wq1:
unregister_shrinker(&gfs2_qd_shrinker);
fail_shrinker:
gfs2_unregister_debugfs();
unregister_filesystem(&gfs2_fs_type);
unregister_filesystem(&gfs2meta_fs_type);
- destroy_workqueue(gfs_recovery_wq);
+ destroy_workqueue(gfs2_recovery_wq);
destroy_workqueue(gfs2_control_wq);
destroy_workqueue(gfs2_freeze_wq);
list_lru_destroy(&gfs2_qd_lru);
set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
gfs2_tune_init(&sdp->sd_tune);
- init_waitqueue_head(&sdp->sd_glock_wait);
+ init_waitqueue_head(&sdp->sd_kill_wait);
init_waitqueue_head(&sdp->sd_async_glock_wait);
atomic_set(&sdp->sd_glock_disposal, 0);
init_completion(&sdp->sd_locking_init);
struct task_struct *p;
int error = 0;
- p = kthread_run(gfs2_logd, sdp, "gfs2_logd");
+ p = kthread_create(gfs2_logd, sdp, "gfs2_logd/%s", sdp->sd_fsname);
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start logd thread: %d\n", error);
+ fs_err(sdp, "can't create logd thread: %d\n", error);
return error;
}
+ get_task_struct(p);
sdp->sd_logd_process = p;
- p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad");
+ p = kthread_create(gfs2_quotad, sdp, "gfs2_quotad/%s", sdp->sd_fsname);
if (IS_ERR(p)) {
error = PTR_ERR(p);
- fs_err(sdp, "can't start quotad thread: %d\n", error);
+ fs_err(sdp, "can't create quotad thread: %d\n", error);
goto fail;
}
+ get_task_struct(p);
sdp->sd_quotad_process = p;
+
+ wake_up_process(sdp->sd_logd_process);
+ wake_up_process(sdp->sd_quotad_process);
return 0;
fail:
kthread_stop(sdp->sd_logd_process);
+ put_task_struct(sdp->sd_logd_process);
sdp->sd_logd_process = NULL;
return error;
}
+void gfs2_destroy_threads(struct gfs2_sbd *sdp)
+{
+ if (sdp->sd_logd_process) {
+ kthread_stop(sdp->sd_logd_process);
+ put_task_struct(sdp->sd_logd_process);
+ sdp->sd_logd_process = NULL;
+ }
+ if (sdp->sd_quotad_process) {
+ kthread_stop(sdp->sd_quotad_process);
+ put_task_struct(sdp->sd_quotad_process);
+ sdp->sd_quotad_process = NULL;
+ }
+}
+
/**
* gfs2_fill_super - Read in superblock
* @sb: The VFS superblock
if (error) {
gfs2_freeze_unlock(&sdp->sd_freeze_gh);
- if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_destroy_threads(sdp);
fs_err(sdp, "can't make FS RW: %d\n", error);
goto fail_per_node;
}
{"off", GFS2_QUOTA_OFF},
{"account", GFS2_QUOTA_ACCOUNT},
{"on", GFS2_QUOTA_ON},
+ {"quiet", GFS2_QUOTA_QUIET},
{}
};
/*
* Flush and then drain the delete workqueue here (via
* destroy_workqueue()) to ensure that any delete work that
- * may be running will also see the SDF_DEACTIVATING flag.
+ * may be running will also see the SDF_KILL flag.
*/
- set_bit(SDF_DEACTIVATING, &sdp->sd_flags);
+ set_bit(SDF_KILL, &sdp->sd_flags);
gfs2_flush_delete_work(sdp);
destroy_workqueue(sdp->sd_delete_wq);
static void gfs2_qd_dealloc(struct rcu_head *rcu)
{
struct gfs2_quota_data *qd = container_of(rcu, struct gfs2_quota_data, qd_rcu);
+ struct gfs2_sbd *sdp = qd->qd_sbd;
+
kmem_cache_free(gfs2_quotad_cachep, qd);
+ if (atomic_dec_and_test(&sdp->sd_quota_count))
+ wake_up(&sdp->sd_kill_wait);
}
-static void gfs2_qd_dispose(struct list_head *list)
+static void gfs2_qd_dispose(struct gfs2_quota_data *qd)
{
- struct gfs2_quota_data *qd;
- struct gfs2_sbd *sdp;
-
- while (!list_empty(list)) {
- qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
- sdp = qd->qd_gl->gl_name.ln_sbd;
-
- list_del(&qd->qd_lru);
+ struct gfs2_sbd *sdp = qd->qd_sbd;
- /* Free from the filesystem-specific list */
- spin_lock(&qd_lock);
- list_del(&qd->qd_list);
- spin_unlock(&qd_lock);
+ spin_lock(&qd_lock);
+ list_del(&qd->qd_list);
+ spin_unlock(&qd_lock);
- spin_lock_bucket(qd->qd_hash);
- hlist_bl_del_rcu(&qd->qd_hlist);
- spin_unlock_bucket(qd->qd_hash);
+ spin_lock_bucket(qd->qd_hash);
+ hlist_bl_del_rcu(&qd->qd_hlist);
+ spin_unlock_bucket(qd->qd_hash);
+ if (!gfs2_withdrawn(sdp)) {
gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
+ gfs2_assert_warn(sdp, !qd->qd_slot_ref);
gfs2_assert_warn(sdp, !qd->qd_bh_count);
+ }
- gfs2_glock_put(qd->qd_gl);
- atomic_dec(&sdp->sd_quota_count);
+ gfs2_glock_put(qd->qd_gl);
+ call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+}
- /* Delete it from the common reclaim list */
- call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
+static void gfs2_qd_list_dispose(struct list_head *list)
+{
+ struct gfs2_quota_data *qd;
+
+ while (!list_empty(list)) {
+ qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
+ list_del(&qd->qd_lru);
+
+ gfs2_qd_dispose(qd);
}
}
struct list_lru_one *lru, spinlock_t *lru_lock, void *arg)
{
struct list_head *dispose = arg;
- struct gfs2_quota_data *qd = list_entry(item, struct gfs2_quota_data, qd_lru);
+ struct gfs2_quota_data *qd =
+ list_entry(item, struct gfs2_quota_data, qd_lru);
+ enum lru_status status;
if (!spin_trylock(&qd->qd_lockref.lock))
return LRU_SKIP;
+ status = LRU_SKIP;
if (qd->qd_lockref.count == 0) {
lockref_mark_dead(&qd->qd_lockref);
list_lru_isolate_move(lru, &qd->qd_lru, dispose);
+ status = LRU_REMOVED;
}
spin_unlock(&qd->qd_lockref.lock);
- return LRU_REMOVED;
+ return status;
}
static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
freed = list_lru_shrink_walk(&gfs2_qd_lru, sc,
gfs2_qd_isolate, &dispose);
- gfs2_qd_dispose(&dispose);
+ gfs2_qd_list_dispose(&dispose);
return freed;
}
static u64 qd2offset(struct gfs2_quota_data *qd)
{
- u64 offset;
-
- offset = qd2index(qd);
- offset *= sizeof(struct gfs2_quota);
-
- return offset;
+ return qd2index(qd) * sizeof(struct gfs2_quota);
}
static struct gfs2_quota_data *qd_alloc(unsigned hash, struct gfs2_sbd *sdp, struct kqid qid)
return NULL;
qd->qd_sbd = sdp;
- qd->qd_lockref.count = 1;
+ qd->qd_lockref.count = 0;
spin_lock_init(&qd->qd_lockref.lock);
qd->qd_id = qid;
qd->qd_slot = -1;
spin_lock_bucket(hash);
*qdp = qd = gfs2_qd_search_bucket(hash, sdp, qid);
if (qd == NULL) {
+ new_qd->qd_lockref.count++;
*qdp = new_qd;
list_add(&new_qd->qd_list, &sdp->sd_quota_list);
hlist_bl_add_head_rcu(&new_qd->qd_hlist, &qd_hash_table[hash]);
static void qd_hold(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
gfs2_assert(sdp, !__lockref_is_dead(&qd->qd_lockref));
lockref_get(&qd->qd_lockref);
}
static void qd_put(struct gfs2_quota_data *qd)
{
+ struct gfs2_sbd *sdp;
+
if (lockref_put_or_lock(&qd->qd_lockref))
return;
+ BUG_ON(__lockref_is_dead(&qd->qd_lockref));
+ sdp = qd->qd_sbd;
+ if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+ lockref_mark_dead(&qd->qd_lockref);
+ spin_unlock(&qd->qd_lockref.lock);
+
+ gfs2_qd_dispose(qd);
+ return;
+ }
+
qd->qd_lockref.count = 0;
list_lru_add(&gfs2_qd_lru, &qd->qd_lru);
spin_unlock(&qd->qd_lockref.lock);
-
}
static int slot_get(struct gfs2_quota_data *qd)
int error = 0;
spin_lock(&sdp->sd_bitmap_lock);
- if (qd->qd_slot_count != 0)
- goto out;
-
- error = -ENOSPC;
- bit = find_first_zero_bit(sdp->sd_quota_bitmap, sdp->sd_quota_slots);
- if (bit < sdp->sd_quota_slots) {
+ if (qd->qd_slot_ref == 0) {
+ bit = find_first_zero_bit(sdp->sd_quota_bitmap,
+ sdp->sd_quota_slots);
+ if (bit >= sdp->sd_quota_slots) {
+ error = -ENOSPC;
+ goto out;
+ }
set_bit(bit, sdp->sd_quota_bitmap);
qd->qd_slot = bit;
- error = 0;
-out:
- qd->qd_slot_count++;
}
+ qd->qd_slot_ref++;
+out:
spin_unlock(&sdp->sd_bitmap_lock);
-
return error;
}
struct gfs2_sbd *sdp = qd->qd_sbd;
spin_lock(&sdp->sd_bitmap_lock);
- gfs2_assert(sdp, qd->qd_slot_count);
- qd->qd_slot_count++;
+ gfs2_assert(sdp, qd->qd_slot_ref);
+ qd->qd_slot_ref++;
spin_unlock(&sdp->sd_bitmap_lock);
}
struct gfs2_sbd *sdp = qd->qd_sbd;
spin_lock(&sdp->sd_bitmap_lock);
- gfs2_assert(sdp, qd->qd_slot_count);
- if (!--qd->qd_slot_count) {
+ gfs2_assert(sdp, qd->qd_slot_ref);
+ if (!--qd->qd_slot_ref) {
BUG_ON(!test_and_clear_bit(qd->qd_slot, sdp->sd_quota_bitmap));
qd->qd_slot = -1;
}
static int bh_get(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct inode *inode = sdp->sd_qc_inode;
struct gfs2_inode *ip = GFS2_I(inode);
unsigned int block, offset;
static void bh_put(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
mutex_lock(&sdp->sd_quota_mutex);
gfs2_assert(sdp, qd->qd_bh_count);
return 1;
}
+static int qd_bh_get_or_undo(struct gfs2_sbd *sdp, struct gfs2_quota_data *qd)
+{
+ int error;
+
+ error = bh_get(qd);
+ if (!error)
+ return 0;
+
+ clear_bit(QDF_LOCKED, &qd->qd_flags);
+ slot_put(qd);
+ qd_put(qd);
+ return error;
+}
+
static int qd_fish(struct gfs2_sbd *sdp, struct gfs2_quota_data **qdp)
{
struct gfs2_quota_data *qd = NULL, *iter;
spin_unlock(&qd_lock);
if (qd) {
- error = bh_get(qd);
- if (error) {
- clear_bit(QDF_LOCKED, &qd->qd_flags);
- slot_put(qd);
- qd_put(qd);
+ error = qd_bh_get_or_undo(sdp, qd);
+ if (error)
return error;
- }
+ *qdp = qd;
}
- *qdp = qd;
-
return 0;
}
-static void qd_unlock(struct gfs2_quota_data *qd)
+static void qdsb_put(struct gfs2_quota_data *qd)
{
- gfs2_assert_warn(qd->qd_gl->gl_name.ln_sbd,
- test_bit(QDF_LOCKED, &qd->qd_flags));
- clear_bit(QDF_LOCKED, &qd->qd_flags);
bh_put(qd);
slot_put(qd);
qd_put(qd);
}
+static void qd_unlock(struct gfs2_quota_data *qd)
+{
+ gfs2_assert_warn(qd->qd_sbd, test_bit(QDF_LOCKED, &qd->qd_flags));
+ clear_bit(QDF_LOCKED, &qd->qd_flags);
+ qdsb_put(qd);
+}
+
static int qdsb_get(struct gfs2_sbd *sdp, struct kqid qid,
struct gfs2_quota_data **qdp)
{
return error;
}
-static void qdsb_put(struct gfs2_quota_data *qd)
-{
- bh_put(qd);
- slot_put(qd);
- qd_put(qd);
-}
-
/**
* gfs2_qa_get - make sure we have a quota allocations data structure,
* if necessary
static void do_qc(struct gfs2_quota_data *qd, s64 change, int qc_type)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode);
struct gfs2_quota_change *qc = qd->qd_bh_qc;
s64 x;
mutex_unlock(&sdp->sd_quota_mutex);
}
-static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
+static int gfs2_write_buf_to_page(struct gfs2_sbd *sdp, unsigned long index,
unsigned off, void *buf, unsigned bytes)
{
+ struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
struct address_space *mapping = inode->i_mapping;
struct page *page;
struct buffer_head *bh;
u64 blk;
unsigned bsize = sdp->sd_sb.sb_bsize, bnum = 0, boff = 0;
unsigned to_write = bytes, pg_off = off;
- int done = 0;
blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
boff = off % bsize;
- page = find_or_create_page(mapping, index, GFP_NOFS);
+ page = grab_cache_page(mapping, index);
if (!page)
return -ENOMEM;
if (!page_has_buffers(page))
create_empty_buffers(page, bsize, 0);
bh = page_buffers(page);
- while (!done) {
+ for(;;) {
/* Find the beginning block within the page */
if (pg_off >= ((bnum * bsize) + bsize)) {
bh = bh->b_this_page;
set_buffer_uptodate(bh);
if (bh_read(bh, REQ_META | REQ_PRIO) < 0)
goto unlock_out;
- if (gfs2_is_jdata(ip))
- gfs2_trans_add_data(ip->i_gl, bh);
- else
- gfs2_ordered_add_inode(ip);
+ gfs2_trans_add_data(ip->i_gl, bh);
/* If we need to write to the next block as well */
if (to_write > (bsize - boff)) {
boff = pg_off % bsize;
continue;
}
- done = 1;
+ break;
}
/* Write to the page, now that we have setup the buffer(s) */
return -EIO;
}
-static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
+static int gfs2_write_disk_quota(struct gfs2_sbd *sdp, struct gfs2_quota *qp,
loff_t loc)
{
unsigned long pg_beg;
unsigned pg_off, nbytes, overflow = 0;
- int pg_oflow = 0, error;
+ int error;
void *ptr;
nbytes = sizeof(struct gfs2_quota);
pg_off = offset_in_page(loc);
/* If the quota straddles a page boundary, split the write in two */
- if ((pg_off + nbytes) > PAGE_SIZE) {
- pg_oflow = 1;
+ if ((pg_off + nbytes) > PAGE_SIZE)
overflow = (pg_off + nbytes) - PAGE_SIZE;
- }
ptr = qp;
- error = gfs2_write_buf_to_page(ip, pg_beg, pg_off, ptr,
+ error = gfs2_write_buf_to_page(sdp, pg_beg, pg_off, ptr,
nbytes - overflow);
/* If there's an overflow, write the remaining bytes to the next page */
- if (!error && pg_oflow)
- error = gfs2_write_buf_to_page(ip, pg_beg + 1, 0,
+ if (!error && overflow)
+ error = gfs2_write_buf_to_page(sdp, pg_beg + 1, 0,
ptr + nbytes - overflow,
overflow);
return error;
/**
* gfs2_adjust_quota - adjust record of current block usage
- * @ip: The quota inode
+ * @sdp: The superblock
* @loc: Offset of the entry in the quota file
* @change: The amount of usage change to record
* @qd: The quota data
* Returns: 0 or -ve on error
*/
-static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
+static int gfs2_adjust_quota(struct gfs2_sbd *sdp, loff_t loc,
s64 change, struct gfs2_quota_data *qd,
struct qc_dqblk *fdq)
{
+ struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct inode *inode = &ip->i_inode;
- struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_quota q;
int err;
u64 size;
return err;
loc -= sizeof(q); /* gfs2_internal_read would've advanced the loc ptr */
- err = -EIO;
be64_add_cpu(&q.qu_value, change);
if (((s64)be64_to_cpu(q.qu_value)) < 0)
q.qu_value = 0; /* Never go negative on quota usage */
}
}
- err = gfs2_write_disk_quota(ip, &q, loc);
+ err = gfs2_write_disk_quota(sdp, &q, loc);
if (!err) {
size = loc + sizeof(struct gfs2_quota);
if (size > inode->i_size)
static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
{
- struct gfs2_sbd *sdp = (*qda)->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = (*qda)->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned int data_blocks, ind_blocks;
unsigned int nalloc = 0, blocks;
int error;
- error = gfs2_qa_get(ip);
- if (error)
- return error;
-
gfs2_write_calc_reserv(ip, sizeof(struct gfs2_quota),
&data_blocks, &ind_blocks);
ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
- if (!ghs) {
- error = -ENOMEM;
- goto out;
- }
+ if (!ghs)
+ return -ENOMEM;
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
inode_lock(&ip->i_inode);
for (x = 0; x < num_qd; x++) {
qd = qda[x];
offset = qd2offset(qd);
- error = gfs2_adjust_quota(ip, offset, qd->qd_change_sync, qd, NULL);
+ error = gfs2_adjust_quota(sdp, offset, qd->qd_change_sync, qd,
+ NULL);
if (error)
goto out_end_trans;
set_bit(QDF_REFRESH, &qd->qd_flags);
}
- error = 0;
-
out_end_trans:
gfs2_trans_end(sdp);
out_ipres:
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
-out:
- gfs2_qa_put(ip);
+ if (!error) {
+ for (x = 0; x < num_qd; x++)
+ qda[x]->qd_sync_gen = sdp->sd_quota_sync_gen;
+ }
return error;
}
static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
struct gfs2_holder *q_gh)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
struct gfs2_holder i_gh;
int error;
+ gfs2_assert_warn(sdp, sdp == qd->qd_gl->gl_name.ln_sbd);
restart:
error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
if (error)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_quota_data *qd;
u32 x;
- int error = 0;
+ int error;
- if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
+ if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+ sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
return 0;
error = gfs2_quota_hold(ip, uid, gid);
return error;
}
-static int need_sync(struct gfs2_quota_data *qd)
+static bool need_sync(struct gfs2_quota_data *qd)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
struct gfs2_tune *gt = &sdp->sd_tune;
s64 value;
unsigned int num, den;
- int do_sync = 1;
if (!qd->qd_qb.qb_limit)
- return 0;
+ return false;
spin_lock(&qd_lock);
value = qd->qd_change;
den = gt->gt_quota_scale_den;
spin_unlock(>->gt_spin);
- if (value < 0)
- do_sync = 0;
+ if (value <= 0)
+ return false;
else if ((s64)be64_to_cpu(qd->qd_qb.qb_value) >=
(s64)be64_to_cpu(qd->qd_qb.qb_limit))
- do_sync = 0;
+ return false;
else {
value *= gfs2_jindex_size(sdp) * num;
value = div_s64(value, den);
value += (s64)be64_to_cpu(qd->qd_qb.qb_value);
if (value < (s64)be64_to_cpu(qd->qd_qb.qb_limit))
- do_sync = 0;
+ return false;
}
- return do_sync;
+ return true;
}
void gfs2_quota_unlock(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_quota_data *qda[4];
+ struct gfs2_quota_data *qda[2 * GFS2_MAXQUOTAS];
unsigned int count = 0;
u32 x;
int found;
for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
struct gfs2_quota_data *qd;
- int sync;
+ bool sync;
qd = ip->i_qadata->qa_qd[x];
sync = need_sync(qd);
if (!found)
continue;
- gfs2_assert_warn(sdp, qd->qd_change_sync);
- if (bh_get(qd)) {
- clear_bit(QDF_LOCKED, &qd->qd_flags);
- slot_put(qd);
- qd_put(qd);
- continue;
- }
-
- qda[count++] = qd;
+ if (!qd_bh_get_or_undo(sdp, qd))
+ qda[count++] = qd;
}
if (count) {
static int print_message(struct gfs2_quota_data *qd, char *type)
{
- struct gfs2_sbd *sdp = qd->qd_gl->gl_name.ln_sbd;
+ struct gfs2_sbd *sdp = qd->qd_sbd;
- fs_info(sdp, "quota %s for %s %u\n",
- type,
- (qd->qd_id.type == USRQUOTA) ? "user" : "group",
- from_kqid(&init_user_ns, qd->qd_id));
+ if (sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET)
+ fs_info(sdp, "quota %s for %s %u\n",
+ type,
+ (qd->qd_id.type == USRQUOTA) ? "user" : "group",
+ from_kqid(&init_user_ns, qd->qd_id));
return 0;
}
u32 x;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON ||
+ if ((sdp->sd_args.ar_quota != GFS2_QUOTA_ON &&
+ sdp->sd_args.ar_quota != GFS2_QUOTA_QUIET) ||
gfs2_assert_warn(sdp, change))
return;
if (ip->i_diskflags & GFS2_DIF_SYSTEM)
}
}
+static bool qd_changed(struct gfs2_sbd *sdp)
+{
+ struct gfs2_quota_data *qd;
+ bool changed = false;
+
+ spin_lock(&qd_lock);
+ list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+ if (test_bit(QDF_LOCKED, &qd->qd_flags) ||
+ !test_bit(QDF_CHANGE, &qd->qd_flags))
+ continue;
+
+ changed = true;
+ break;
+ }
+ spin_unlock(&qd_lock);
+ return changed;
+}
+
int gfs2_quota_sync(struct super_block *sb, int type)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
unsigned int x;
int error = 0;
+ if (!qd_changed(sdp))
+ return 0;
+
qda = kcalloc(max_qd, sizeof(struct gfs2_quota_data *), GFP_KERNEL);
if (!qda)
return -ENOMEM;
if (num_qd) {
if (!error)
error = do_sync(num_qd, qda);
- if (!error)
- for (x = 0; x < num_qd; x++)
- qda[x]->qd_sync_gen =
- sdp->sd_quota_sync_gen;
for (x = 0; x < num_qd; x++)
qd_unlock(qda[x]);
set_bit(QDF_CHANGE, &qd->qd_flags);
qd->qd_change = qc_change;
qd->qd_slot = slot;
- qd->qd_slot_count = 1;
+ qd->qd_slot_ref = 1;
spin_lock(&qd_lock);
BUG_ON(test_and_set_bit(slot, sdp->sd_quota_bitmap));
void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
{
- struct list_head *head = &sdp->sd_quota_list;
struct gfs2_quota_data *qd;
+ LIST_HEAD(dispose);
+ int count;
- spin_lock(&qd_lock);
- while (!list_empty(head)) {
- qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
+ BUG_ON(test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags));
- list_del(&qd->qd_list);
+ spin_lock(&qd_lock);
+ list_for_each_entry(qd, &sdp->sd_quota_list, qd_list) {
+ spin_lock(&qd->qd_lockref.lock);
+ if (qd->qd_lockref.count != 0) {
+ spin_unlock(&qd->qd_lockref.lock);
+ continue;
+ }
+ lockref_mark_dead(&qd->qd_lockref);
+ spin_unlock(&qd->qd_lockref.lock);
- /* Also remove if this qd exists in the reclaim list */
list_lru_del(&gfs2_qd_lru, &qd->qd_lru);
- atomic_dec(&sdp->sd_quota_count);
- spin_unlock(&qd_lock);
-
- spin_lock_bucket(qd->qd_hash);
- hlist_bl_del_rcu(&qd->qd_hlist);
- spin_unlock_bucket(qd->qd_hash);
-
- gfs2_assert_warn(sdp, !qd->qd_change);
- gfs2_assert_warn(sdp, !qd->qd_slot_count);
- gfs2_assert_warn(sdp, !qd->qd_bh_count);
-
- gfs2_glock_put(qd->qd_gl);
- call_rcu(&qd->qd_rcu, gfs2_qd_dealloc);
-
- spin_lock(&qd_lock);
+ list_add(&qd->qd_lru, &dispose);
}
spin_unlock(&qd_lock);
- gfs2_assert_warn(sdp, !atomic_read(&sdp->sd_quota_count));
+ gfs2_qd_list_dispose(&dispose);
+
+ wait_event_timeout(sdp->sd_kill_wait,
+ (count = atomic_read(&sdp->sd_quota_count)) == 0,
+ HZ * 60);
+
+ if (count != 0)
+ fs_err(sdp, "%d left-over quota data objects\n", count);
kvfree(sdp->sd_quota_bitmap);
sdp->sd_quota_bitmap = NULL;
unsigned long statfs_timeo = 0;
unsigned long quotad_timeo = 0;
unsigned long t = 0;
- DEFINE_WAIT(wait);
while (!kthread_should_stop()) {
-
if (gfs2_withdrawn(sdp))
- goto bypass;
+ break;
+
/* Update the master statfs file */
if (sdp->sd_statfs_force_sync) {
int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
try_to_freeze();
-bypass:
t = min(quotad_timeo, statfs_timeo);
- prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
- if (!sdp->sd_statfs_force_sync)
- t -= schedule_timeout(t);
- else
+ t = wait_event_interruptible_timeout(sdp->sd_quota_wait,
+ sdp->sd_statfs_force_sync ||
+ gfs2_withdrawn(sdp) ||
+ kthread_should_stop(),
+ t);
+
+ if (sdp->sd_statfs_force_sync)
t = 0;
- finish_wait(&sdp->sd_quota_wait, &wait);
}
return 0;
memset(state, 0, sizeof(*state));
switch (sdp->sd_args.ar_quota) {
+ case GFS2_QUOTA_QUIET:
+ fallthrough;
case GFS2_QUOTA_ON:
state->s_state[USRQUOTA].flags |= QCI_LIMITS_ENFORCED;
state->s_state[GRPQUOTA].flags |= QCI_LIMITS_ENFORCED;
goto out_release;
/* Apply changes */
- error = gfs2_adjust_quota(ip, offset, 0, qd, fdq);
+ error = gfs2_adjust_quota(sdp, offset, 0, qd, fdq);
if (!error)
clear_bit(QDF_QMSG_QUIET, &qd->qd_flags);
#include "util.h"
#include "dir.h"
-struct workqueue_struct *gfs_recovery_wq;
+struct workqueue_struct *gfs2_recovery_wq;
int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk,
struct buffer_head **bh)
return -EBUSY;
/* we have JDF_RECOVERY, queue should always succeed */
- rv = queue_work(gfs_recovery_wq, &jd->jd_work);
+ rv = queue_work(gfs2_recovery_wq, &jd->jd_work);
BUG_ON(!rv);
if (wait)
#include "incore.h"
-extern struct workqueue_struct *gfs_recovery_wq;
+extern struct workqueue_struct *gfs2_recovery_wq;
static inline void gfs2_replay_incr_blk(struct gfs2_jdesc *jd, u32 *blk)
{
{
int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
- if (!test_bit(SDF_DEACTIVATING, &sdp->sd_flags))
+ if (!test_bit(SDF_KILL, &sdp->sd_flags))
gfs2_flush_delete_work(sdp);
- if (!log_write_allowed && current == sdp->sd_quotad_process)
- fs_warn(sdp, "The quotad daemon is withdrawing.\n");
- else if (sdp->sd_quotad_process)
- kthread_stop(sdp->sd_quotad_process);
- sdp->sd_quotad_process = NULL;
-
- if (!log_write_allowed && current == sdp->sd_logd_process)
- fs_warn(sdp, "The logd daemon is withdrawing.\n");
- else if (sdp->sd_logd_process)
- kthread_stop(sdp->sd_logd_process);
- sdp->sd_logd_process = NULL;
+ gfs2_destroy_threads(sdp);
if (log_write_allowed) {
gfs2_quota_sync(sdp->sd_vfs, 0);
gfs2_log_is_empty(sdp),
HZ * 5);
gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
- } else {
- wait_event_timeout(sdp->sd_log_waitq,
- gfs2_log_is_empty(sdp),
- HZ * 5);
}
gfs2_quota_cleanup(sdp);
-
- if (!log_write_allowed)
- sdp->sd_vfs->s_flags |= SB_RDONLY;
}
/**
if (!sb_rdonly(sb)) {
gfs2_make_fs_ro(sdp);
}
+ if (gfs2_withdrawn(sdp)) {
+ gfs2_destroy_threads(sdp);
+ gfs2_quota_cleanup(sdp);
+ }
WARN_ON(gfs2_withdrawing(sdp));
/* At this point, we're through modifying the disk */
case GFS2_QUOTA_ON:
state = "on";
break;
+ case GFS2_QUOTA_QUIET:
+ state = "quiet";
+ break;
default:
state = "unknown";
break;
extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
+extern void gfs2_destroy_threads(struct gfs2_sbd *sdp);
extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
s64 dinodes);
"sd_log_flush_head: %d\n"
"sd_log_flush_tail: %d\n"
"sd_log_blks_reserved: %d\n"
- "sd_log_revokes_available: %d\n",
+ "sd_log_revokes_available: %d\n"
+ "sd_log_pinned: %d\n"
+ "sd_log_thresh1: %d\n"
+ "sd_log_thresh2: %d\n",
test_bit(SDF_JOURNAL_CHECKED, &f),
test_bit(SDF_JOURNAL_LIVE, &f),
(sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
test_bit(SDF_WITHDRAW_IN_PROG, &f),
test_bit(SDF_REMOTE_WITHDRAW, &f),
test_bit(SDF_WITHDRAW_RECOVERY, &f),
- test_bit(SDF_DEACTIVATING, &f),
+ test_bit(SDF_KILL, &f),
sdp->sd_log_error,
rwsem_is_locked(&sdp->sd_log_flush_lock),
sdp->sd_log_num_revoke,
sdp->sd_log_flush_head,
sdp->sd_log_flush_tail,
sdp->sd_log_blks_reserved,
- atomic_read(&sdp->sd_log_revokes_available));
+ atomic_read(&sdp->sd_log_revokes_available),
+ atomic_read(&sdp->sd_log_pinned),
+ atomic_read(&sdp->sd_log_thresh1),
+ atomic_read(&sdp->sd_log_thresh2));
return s;
}
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
+#include <linux/kthread.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
#include <linux/delay.h>
if (!sb_rdonly(sdp->sd_vfs)) {
bool locked = mutex_trylock(&sdp->sd_freeze_mutex);
- gfs2_make_fs_ro(sdp);
+ wake_up(&sdp->sd_logd_waitq);
+ wake_up(&sdp->sd_quota_wait);
+
+ wait_event_timeout(sdp->sd_log_waitq,
+ gfs2_log_is_empty(sdp),
+ HZ * 5);
+
+ sdp->sd_vfs->s_flags |= SB_RDONLY;
if (locked)
mutex_unlock(&sdp->sd_freeze_mutex);
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
const struct lm_lockops *lm = ls->ls_ops;
- if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
- if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
- return -1;
-
- wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
- TASK_UNINTERRUPTIBLE);
- return -1;
- }
-
- set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
-
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
+ unsigned long old = READ_ONCE(sdp->sd_flags), new;
+
+ do {
+ if (old & BIT(SDF_WITHDRAWN)) {
+ wait_on_bit(&sdp->sd_flags,
+ SDF_WITHDRAW_IN_PROG,
+ TASK_UNINTERRUPTIBLE);
+ return -1;
+ }
+ new = old | BIT(SDF_WITHDRAWN) | BIT(SDF_WITHDRAW_IN_PROG);
+ } while (unlikely(!try_cmpxchg(&sdp->sd_flags, &old, new)));
+
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
/*
* Noinline to reduce binary size.
*/
-static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi)
{
- kfree(sbi->new_rec);
- kvfree(ntfs_put_shared(sbi->upcase));
- kfree(sbi->def_table);
-
wnd_close(&sbi->mft.bitmap);
wnd_close(&sbi->used.bitmap);
indx_clear(&sbi->security.index_sdh);
indx_clear(&sbi->reparse.index_r);
indx_clear(&sbi->objid.index_o);
+}
+
+static void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
+{
+ kfree(sbi->new_rec);
+ kvfree(ntfs_put_shared(sbi->upcase));
+ kfree(sbi->def_table);
kfree(sbi->compress.lznt);
#ifdef CONFIG_NTFS3_LZX_XPRESS
xpress_free_decompressor(sbi->compress.xpress);
/* Mark rw ntfs as clear, if possible. */
ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
+ ntfs3_put_sbi(sbi);
}
static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct ntfs_mount_options *opts = fc->fs_private;
struct ntfs_sb_info *sbi = fc->s_fs_info;
- if (sbi)
+ if (sbi) {
+ ntfs3_put_sbi(sbi);
ntfs3_free_sbi(sbi);
+ }
if (opts)
put_mount_options(opts);
#include <linux/hugetlb.h>
#include <linux/huge_mm.h>
#include <linux/mount.h>
+#include <linux/ksm.h>
#include <linux/seq_file.h>
#include <linux/highmem.h>
#include <linux/ptrace.h>
unsigned long swap;
unsigned long shared_hugetlb;
unsigned long private_hugetlb;
+ unsigned long ksm;
u64 pss;
u64 pss_anon;
u64 pss_file;
mss->lazyfree += size;
}
+ if (PageKsm(page))
+ mss->ksm += size;
+
mss->resident += size;
/* Accumulate the size in pages that have been accessed. */
if (young || page_is_young(page) || PageReferenced(page))
SEQ_PUT_DEC(" kB\nPrivate_Dirty: ", mss->private_dirty);
SEQ_PUT_DEC(" kB\nReferenced: ", mss->referenced);
SEQ_PUT_DEC(" kB\nAnonymous: ", mss->anonymous);
+ SEQ_PUT_DEC(" kB\nKSM: ", mss->ksm);
SEQ_PUT_DEC(" kB\nLazyFree: ", mss->lazyfree);
SEQ_PUT_DEC(" kB\nAnonHugePages: ", mss->anonymous_thp);
SEQ_PUT_DEC(" kB\nShmemPmdMapped: ", mss->shmem_thp);
int statx_flags = flags | AT_NO_AUTOMOUNT;
struct filename *name;
+ /*
+ * Work around glibc turning fstat() into fstatat(AT_EMPTY_PATH)
+ *
+ * If AT_EMPTY_PATH is set, we expect the common case to be that
+ * empty path, and avoid doing all the extra pathname work.
+ */
+ if (dfd >= 0 && flags == AT_EMPTY_PATH) {
+ char c;
+
+ ret = get_user(c, filename);
+ if (unlikely(ret))
+ return ret;
+
+ if (likely(!c))
+ return vfs_fstat(dfd, stat);
+ }
+
name = getname_flags(filename, getname_statx_lookup_flags(statx_flags), NULL);
ret = vfs_statx(dfd, name, statx_flags, stat, STATX_BASIC_STATS);
putname(name);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Generic I/O and MEMIO string operations. */
-
-#define __ide_insw insw
-#define __ide_insl insl
-#define __ide_outsw outsw
-#define __ide_outsl outsl
-
-static __inline__ void __ide_mm_insw(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- *(u16 *)addr = readw(port);
- addr += 2;
- }
-}
-
-static __inline__ void __ide_mm_insl(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- *(u32 *)addr = readl(port);
- addr += 4;
- }
-}
-
-static __inline__ void __ide_mm_outsw(void __iomem *port, void *addr, u32 count)
-{
- while (count--) {
- writew(*(u16 *)addr, port);
- addr += 2;
- }
-}
-
-static __inline__ void __ide_mm_outsl(void __iomem * port, void *addr, u32 count)
-{
- while (count--) {
- writel(*(u32 *)addr, port);
- addr += 4;
- }
-}
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
-#ifdef CONFIG_HW_PERF_EVENTS
+#if IS_ENABLED(CONFIG_HW_PERF_EVENTS) && IS_ENABLED(CONFIG_KVM)
struct kvm_pmc {
u8 idx; /* index into the pmu->pmc array */
struct kvm_pmu_events *kvm_get_pmu_events(void);
void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu);
void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
+void kvm_vcpu_pmu_resync_el0(void);
#define kvm_vcpu_has_pmu(vcpu) \
(test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features))
{
return 0;
}
+static inline void kvm_vcpu_pmu_resync_el0(void) {}
#endif
AUDIT_NFT_OP_OBJ_RESET,
AUDIT_NFT_OP_FLOWTABLE_REGISTER,
AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ AUDIT_NFT_OP_SETELEM_RESET,
+ AUDIT_NFT_OP_RULE_RESET,
AUDIT_NFT_OP_INVALID,
};
size /= sizeof(long);
while (size--)
- *ldst++ = *lsrc++;
+ data_race(*ldst++ = *lsrc++);
}
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
extern const char *ceph_mds_op_name(int op);
-
-#define CEPH_SETATTR_MODE 1
-#define CEPH_SETATTR_UID 2
-#define CEPH_SETATTR_GID 4
-#define CEPH_SETATTR_MTIME 8
-#define CEPH_SETATTR_ATIME 16
-#define CEPH_SETATTR_SIZE 32
-#define CEPH_SETATTR_CTIME 64
+#define CEPH_SETATTR_MODE (1 << 0)
+#define CEPH_SETATTR_UID (1 << 1)
+#define CEPH_SETATTR_GID (1 << 2)
+#define CEPH_SETATTR_MTIME (1 << 3)
+#define CEPH_SETATTR_ATIME (1 << 4)
+#define CEPH_SETATTR_SIZE (1 << 5)
+#define CEPH_SETATTR_CTIME (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW (1 << 8)
+#define CEPH_SETATTR_BTIME (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12)
/*
* Ceph setxattr request flags.
} __attribute__ ((packed));
union ceph_mds_request_args_ext {
- union ceph_mds_request_args old;
- struct {
- __le32 mode;
- __le32 uid;
- __le32 gid;
- struct ceph_timespec mtime;
- struct ceph_timespec atime;
- __le64 size, old_size; /* old_size needed by truncate */
- __le32 mask; /* CEPH_SETATTR_* */
- struct ceph_timespec btime;
- } __attribute__ ((packed)) setattr_ext;
+ union {
+ union ceph_mds_request_args old;
+ struct {
+ __le32 mode;
+ __le32 uid;
+ __le32 gid;
+ struct ceph_timespec mtime;
+ struct ceph_timespec atime;
+ __le64 size, old_size; /* old_size needed by truncate */
+ __le32 mask; /* CEPH_SETATTR_* */
+ struct ceph_timespec btime;
+ } __attribute__ ((packed)) setattr_ext;
+ };
};
#define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */
#define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */
#define CEPH_MDS_FLAG_ASYNC 4 /* request is asynchronous */
-struct ceph_mds_request_head_old {
+struct ceph_mds_request_head_legacy {
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
__le32 flags; /* CEPH_MDS_FLAG_* */
union ceph_mds_request_args args;
} __attribute__ ((packed));
-#define CEPH_MDS_REQUEST_HEAD_VERSION 1
+#define CEPH_MDS_REQUEST_HEAD_VERSION 2
-struct ceph_mds_request_head {
+struct ceph_mds_request_head_old {
__le16 version; /* struct version */
__le64 oldest_client_tid;
__le32 mdsmap_epoch; /* on client */
union ceph_mds_request_args_ext args;
} __attribute__ ((packed));
+struct ceph_mds_request_head {
+ __le16 version; /* struct version */
+ __le64 oldest_client_tid;
+ __le32 mdsmap_epoch; /* on client */
+ __le32 flags; /* CEPH_MDS_FLAG_* */
+ __u8 num_retry, num_fwd; /* legacy count retry and fwd attempts */
+ __le16 num_releases; /* # include cap/lease release records */
+ __le32 op; /* mds op code */
+ __le32 caller_uid, caller_gid;
+ __le64 ino; /* use this ino for openc, mkdir, mknod,
+ etc. (if replaying) */
+ union ceph_mds_request_args_ext args;
+
+ __le32 ext_num_retry; /* new count retry attempts */
+ __le32 ext_num_fwd; /* new count fwd attempts */
+} __attribute__ ((packed));
+
/* cap/lease release record */
struct ceph_mds_request_release {
__le64 ino, cap_id; /* ino and unique cap id */
struct ceph_msg;
struct ceph_connection;
+struct ceph_msg_data_cursor;
/*
* Ceph defines these callbacks for handling connection events.
int used_proto, int result,
const int *allowed_protos, int proto_cnt,
const int *allowed_modes, int mode_cnt);
+
+ /**
+ * sparse_read: read sparse data
+ * @con: connection we're reading from
+ * @cursor: data cursor for reading extents
+ * @buf: optional buffer to read into
+ *
+ * This should be called more than once, each time setting up to
+ * receive an extent into the current cursor position, and zeroing
+ * the holes between them.
+ *
+ * Returns amount of data to be read (in bytes), 0 if reading is
+ * complete, or -errno if there was an error.
+ *
+ * If @buf is set on a >0 return, then the data should be read into
+ * the provided buffer. Otherwise, it should be read into the cursor.
+ *
+ * The sparse read operation is expected to initialize the cursor
+ * with a length covering up to the end of the last extent.
+ */
+ int (*sparse_read)(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **buf);
+
};
/* use format string %s%lld */
CEPH_MSG_DATA_BIO, /* data source/destination is a bio list */
#endif /* CONFIG_BLOCK */
CEPH_MSG_DATA_BVECS, /* data source/destination is a bio_vec array */
+ CEPH_MSG_DATA_ITER, /* data source/destination is an iov_iter */
};
#ifdef CONFIG_BLOCK
bool own_pages;
};
struct ceph_pagelist *pagelist;
+ struct iov_iter iter;
};
};
struct ceph_msg_data *data; /* current data item */
size_t resid; /* bytes not yet consumed */
+ int sr_resid; /* residual sparse_read len */
bool need_crc; /* crc update needed */
union {
#ifdef CONFIG_BLOCK
struct page *page; /* page from list */
size_t offset; /* bytes from list */
};
+ struct {
+ struct iov_iter iov_iter;
+ unsigned int lastlen;
+ };
};
};
struct kref kref;
bool more_to_follow;
bool needs_out_seq;
+ bool sparse_read;
int front_alloc_len;
struct ceph_msgpool *pool;
int in_base_pos; /* bytes read */
+ /* sparse reads */
+ struct kvec in_sr_kvec; /* current location to receive into */
+ u64 in_sr_len; /* amount of data in this extent */
+
/* message in temps */
u8 in_tag; /* protocol control byte */
struct ceph_msg_header in_hdr;
void *conn_bufs[16];
int conn_buf_cnt;
+ int data_len_remain;
struct kvec in_sign_kvecs[8];
struct kvec out_sign_kvecs[8];
#endif /* CONFIG_BLOCK */
void ceph_msg_data_add_bvecs(struct ceph_msg *msg,
struct ceph_bvec_iter *bvec_pos);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter);
struct ceph_msg *ceph_msg_new2(int type, int front_len, int max_data_items,
gfp_t flags, bool can_fail);
#define CEPH_HOMELESS_OSD -1
-/* a given osd we're communicating with */
+/*
+ * A single extent in a SPARSE_READ reply.
+ *
+ * Note that these come from the OSD as little-endian values. On BE arches,
+ * we convert them in-place after receipt.
+ */
+struct ceph_sparse_extent {
+ u64 off;
+ u64 len;
+} __packed;
+
+/* Sparse read state machine state values */
+enum ceph_sparse_read_state {
+ CEPH_SPARSE_READ_HDR = 0,
+ CEPH_SPARSE_READ_EXTENTS,
+ CEPH_SPARSE_READ_DATA_LEN,
+ CEPH_SPARSE_READ_DATA,
+};
+
+/*
+ * A SPARSE_READ reply is a 32-bit count of extents, followed by an array of
+ * 64-bit offset/length pairs, and then all of the actual file data
+ * concatenated after it (sans holes).
+ *
+ * Unfortunately, we don't know how long the extent array is until we've
+ * started reading the data section of the reply. The caller should send down
+ * a destination buffer for the array, but we'll alloc one if it's too small
+ * or if the caller doesn't.
+ */
+struct ceph_sparse_read {
+ enum ceph_sparse_read_state sr_state; /* state machine state */
+ u64 sr_req_off; /* orig request offset */
+ u64 sr_req_len; /* orig request length */
+ u64 sr_pos; /* current pos in buffer */
+ int sr_index; /* current extent index */
+ __le32 sr_datalen; /* length of actual data */
+ u32 sr_count; /* extent count in reply */
+ int sr_ext_len; /* length of extent array */
+ struct ceph_sparse_extent *sr_extent; /* extent array */
+};
+
+/*
+ * A given osd we're communicating with.
+ *
+ * Note that the o_requests tree can be searched while holding the "lock" mutex
+ * or the "o_requests_lock" spinlock. Insertion or removal requires both!
+ */
struct ceph_osd {
refcount_t o_ref;
+ int o_sparse_op_idx;
struct ceph_osd_client *o_osdc;
int o_osd;
int o_incarnation;
struct rb_node o_node;
struct ceph_connection o_con;
+ spinlock_t o_requests_lock;
struct rb_root o_requests;
struct rb_root o_linger_requests;
struct rb_root o_backoff_mappings;
unsigned long lru_ttl;
struct list_head o_keepalive_item;
struct mutex lock;
+ struct ceph_sparse_read o_sparse_read;
};
#define CEPH_OSD_SLAB_OPS 2
CEPH_OSD_DATA_TYPE_BIO,
#endif /* CONFIG_BLOCK */
CEPH_OSD_DATA_TYPE_BVECS,
+ CEPH_OSD_DATA_TYPE_ITER,
};
struct ceph_osd_data {
struct ceph_bvec_iter bvec_pos;
u32 num_bvecs;
};
+ struct iov_iter iter;
};
};
u64 offset, length;
u64 truncate_size;
u32 truncate_seq;
+ int sparse_ext_cnt;
+ struct ceph_sparse_extent *sparse_ext;
struct ceph_osd_data osd_data;
} extent;
struct {
u32 src_fadvise_flags;
struct ceph_osd_data osd_data;
} copy_from;
+ struct {
+ u64 ver;
+ } assert_ver;
};
};
struct ceph_osd_client *r_osdc;
struct kref r_kref;
bool r_mempool;
+ bool r_linger; /* don't resend on failure */
struct completion r_completion; /* private to osd_client.c */
ceph_osdc_callback_t r_callback;
struct ceph_snap_context *r_snapc; /* for writes */
struct timespec64 r_mtime; /* ditto */
u64 r_data_offset; /* ditto */
- bool r_linger; /* don't resend on failure */
/* internal */
+ u64 r_version; /* data version sent in reply */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
ktime_t r_start_latency; /* ktime_t */
void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
unsigned int which,
struct ceph_bvec_iter *bvec_pos);
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter);
extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
unsigned int which,
u32 truncate_seq, u64 truncate_size,
bool use_mempool);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt);
+
+/*
+ * How big an extent array should we preallocate for a sparse read? This is
+ * just a starting value. If we get more than this back from the OSD, the
+ * receiver will reallocate.
+ */
+#define CEPH_SPARSE_EXT_ARRAY_INITIAL 16
+
+static inline int ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op)
+{
+ return __ceph_alloc_sparse_ext_map(op, CEPH_SPARSE_EXT_ARRAY_INITIAL);
+}
+
extern void ceph_osdc_get_request(struct ceph_osd_request *req);
extern void ceph_osdc_put_request(struct ceph_osd_request *req);
struct ceph_object_locator *oloc,
struct ceph_watch_item **watchers,
u32 *num_watchers);
-#endif
+/* Find offset into the buffer of the end of the extent map */
+static inline u64 ceph_sparse_ext_map_end(struct ceph_osd_req_op *op)
+{
+ struct ceph_sparse_extent *ext;
+
+ /* No extents? No data */
+ if (op->extent.sparse_ext_cnt == 0)
+ return 0;
+
+ ext = &op->extent.sparse_ext[op->extent.sparse_ext_cnt - 1];
+
+ return ext->off + ext->len - op->extent.offset;
+}
+
+#endif
__le64 cookie;
} __attribute__ ((packed)) notify;
struct {
+ __le64 unused;
+ __le64 ver;
+ } __attribute__ ((packed)) assert_ver;
+ struct {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
__EXPORT_SYMBOL_REF(sym) ASM_NL \
.previous
-#if !defined(CONFIG_MODULES) || defined(__DISABLE_EXPORTS)
+#if defined(__DISABLE_EXPORTS)
/*
* Allow symbol exports to be disabled completely so that C code may
__ADDRESSABLE(sym) \
asm(__stringify(___EXPORT_SYMBOL(sym, license, ns)))
-#endif /* CONFIG_MODULES */
+#endif
#ifdef DEFAULT_SYMBOL_NAMESPACE
#define _EXPORT_SYMBOL(sym, license) __EXPORT_SYMBOL(sym, license, __stringify(DEFAULT_SYMBOL_NAMESPACE))
int gameport_open(struct gameport *gameport, struct gameport_driver *drv, int mode);
void gameport_close(struct gameport *gameport);
-#if defined(CONFIG_GAMEPORT) || (defined(MODULE) && defined(CONFIG_GAMEPORT_MODULE))
+#if IS_REACHABLE(CONFIG_GAMEPORT)
void __gameport_register_port(struct gameport *gameport, struct module *owner);
/* use a define to avoid include chaining to get THIS_MODULE */
#define IP6SKB_JUMBOGRAM 128
#define IP6SKB_SEG6 256
#define IP6SKB_FAKEJUMBO 512
+#define IP6SKB_MULTIPATH 1024
};
#if defined(CONFIG_NET_L3_MASTER_DEV)
int kasan_populate_early_shadow(const void *shadow_start,
const void *shadow_end);
+#ifndef __HAVE_ARCH_SHADOW_MAP
static inline void *kasan_mem_to_shadow(const void *addr)
{
return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT)
+ KASAN_SHADOW_OFFSET;
}
+#endif
int kasan_add_zero_shadow(void *start, unsigned long size);
void kasan_remove_zero_shadow(void *start, unsigned long size);
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
bool kvm_make_all_cpus_request_except(struct kvm *kvm, unsigned int req,
struct kvm_vcpu *except);
-bool kvm_make_cpus_request_mask(struct kvm *kvm, unsigned int req,
- unsigned long *vcpu_bitmap);
#define KVM_USERSPACE_IRQ_SOURCE_ID 0
#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1
#endif
#ifdef KVM_ARCH_WANT_MMU_NOTIFIER
+union kvm_mmu_notifier_arg {
+ pte_t pte;
+};
+
struct kvm_gfn_range {
struct kvm_memory_slot *slot;
gfn_t start;
gfn_t end;
- pte_t pte;
+ union kvm_mmu_notifier_arg arg;
bool may_block;
};
bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
unlikely(__ret); \
})
+/*
+ * Note, "data corruption" refers to corruption of host kernel data structures,
+ * not guest data. Guest data corruption, suspected or confirmed, that is tied
+ * and contained to a single VM should *never* BUG() and potentially panic the
+ * host, i.e. use this variant of KVM_BUG() if and only if a KVM data structure
+ * is corrupted and that corruption can have a cascading effect to other parts
+ * of the hosts and/or to other VMs.
+ */
+#define KVM_BUG_ON_DATA_CORRUPTION(cond, kvm) \
+({ \
+ bool __ret = !!(cond); \
+ \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) \
+ BUG_ON(__ret); \
+ else if (WARN_ON_ONCE(__ret && !(kvm)->vm_bugged)) \
+ kvm_vm_bugged(kvm); \
+ unlikely(__ret); \
+})
+
static inline void kvm_vcpu_srcu_read_lock(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_PROVE_RCU
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu, bool yield_to_kernel_mode);
void kvm_flush_remote_tlbs(struct kvm *kvm);
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min);
unsigned long mask);
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot);
-#ifdef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
-#else /* !CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */
+#ifndef CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT
int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log);
int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
int *is_dirty, struct kvm_memory_slot **memslot);
}
#endif
-#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
-static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS
+static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
return -ENOTSUPP;
}
+#else
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm);
+#endif
+
+#ifndef __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE
+static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+ gfn_t gfn, u64 nr_pages)
+{
+ return -EOPNOTSUPP;
+}
+#else
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages);
#endif
#ifdef __KVM_HAVE_ARCH_NONCOHERENT_DMA
int (*mmap)(struct kvm_device *dev, struct vm_area_struct *vma);
};
-void kvm_device_get(struct kvm_device *dev);
-void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
ATA_LINK_RESUME_TRIES = 5,
/* how hard are we gonna try to probe/recover devices */
- ATA_PROBE_MAX_TRIES = 3,
ATA_EH_DEV_TRIES = 3,
ATA_EH_PMP_TRIES = 5,
ATA_EH_PMP_LINK_TRIES = 3,
ssize_t size);
/*
- * Obsolete
- */
- void (*phy_reset)(struct ata_port *ap);
- void (*eng_timeout)(struct ata_port *ap);
-
- /*
* ->inherits must be the last field and all the preceding
* fields must be pointers.
*/
extern int ata_ratelimit(void);
extern void ata_msleep(struct ata_port *ap, unsigned int msecs);
extern u32 ata_wait_register(struct ata_port *ap, void __iomem *reg, u32 mask,
- u32 val, unsigned long interval, unsigned long timeout);
+ u32 val, unsigned int interval, unsigned int timeout);
extern int atapi_cmd_type(u8 opcode);
extern unsigned int ata_pack_xfermask(unsigned int pio_mask,
unsigned int mwdma_mask,
* SATA specific code - drivers/ata/libata-sata.c
*/
#ifdef CONFIG_SATA_HOST
-extern const unsigned long sata_deb_timing_normal[];
-extern const unsigned long sata_deb_timing_hotplug[];
-extern const unsigned long sata_deb_timing_long[];
+extern const unsigned int sata_deb_timing_normal[];
+extern const unsigned int sata_deb_timing_hotplug[];
+extern const unsigned int sata_deb_timing_long[];
-static inline const unsigned long *
+static inline const unsigned int *
sata_ehc_deb_timing(struct ata_eh_context *ehc)
{
if (ehc->i.flags & ATA_EHI_HOTPLUGGED)
extern int sata_scr_write_flush(struct ata_link *link, int reg, u32 val);
extern int sata_set_spd(struct ata_link *link);
extern int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing, unsigned long deadline,
+ const unsigned int *timing, unsigned long deadline,
bool *online, int (*check_ready)(struct ata_link *));
-extern int sata_link_resume(struct ata_link *link, const unsigned long *params,
+extern int sata_link_resume(struct ata_link *link, const unsigned int *params,
unsigned long deadline);
extern int ata_eh_read_sense_success_ncq_log(struct ata_link *link);
extern void ata_eh_analyze_ncq_error(struct ata_link *link);
#else
-static inline const unsigned long *
+static inline const unsigned int *
sata_ehc_deb_timing(struct ata_eh_context *ehc)
{
return NULL;
}
static inline int sata_set_spd(struct ata_link *link) { return -EOPNOTSUPP; }
static inline int sata_link_hardreset(struct ata_link *link,
- const unsigned long *timing,
+ const unsigned int *timing,
unsigned long deadline,
bool *online,
int (*check_ready)(struct ata_link *))
return -EOPNOTSUPP;
}
static inline int sata_link_resume(struct ata_link *link,
- const unsigned long *params,
+ const unsigned int *params,
unsigned long deadline)
{
return -EOPNOTSUPP;
static inline void ata_eh_analyze_ncq_error(struct ata_link *link) { }
#endif
extern int sata_link_debounce(struct ata_link *link,
- const unsigned long *params, unsigned long deadline);
+ const unsigned int *params, unsigned long deadline);
extern int sata_link_scr_lpm(struct ata_link *link, enum ata_lpm_policy policy,
bool spm_wakeup);
extern int ata_slave_link_init(struct ata_port *ap);
-extern void ata_sas_port_destroy(struct ata_port *);
extern struct ata_port *ata_sas_port_alloc(struct ata_host *,
struct ata_port_info *, struct Scsi_Host *);
-extern void ata_sas_async_probe(struct ata_port *ap);
-extern int ata_sas_sync_probe(struct ata_port *ap);
-extern int ata_sas_port_init(struct ata_port *);
-extern int ata_sas_port_start(struct ata_port *ap);
+extern void ata_port_probe(struct ata_port *ap);
extern int ata_sas_tport_add(struct device *parent, struct ata_port *ap);
extern void ata_sas_tport_delete(struct ata_port *ap);
-extern void ata_sas_port_stop(struct ata_port *ap);
extern int ata_sas_slave_configure(struct scsi_device *, struct ata_port *);
extern int ata_sas_queuecmd(struct scsi_cmnd *cmd, struct ata_port *ap);
extern void ata_tf_to_fis(const struct ata_taskfile *tf,
{
struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
- if (unlikely(!qc) || !ap->ops->error_handler)
+ if (unlikely(!qc))
return qc;
if ((qc->flags & (ATA_QCFLAG_ACTIVE |
}
static inline unsigned long ata_deadline(unsigned long from_jiffies,
- unsigned long timeout_msecs)
+ unsigned int timeout_msecs)
{
return from_jiffies + msecs_to_jiffies(timeout_msecs);
}
#define PHY_ID_KSZ9477 0x00221631
/* struct phy_device dev_flags definitions */
-#define MICREL_PHY_50MHZ_CLK 0x00000001
-#define MICREL_PHY_FXEN 0x00000002
-#define MICREL_KSZ8_P1_ERRATA 0x00000003
+#define MICREL_PHY_50MHZ_CLK BIT(0)
+#define MICREL_PHY_FXEN BIT(1)
+#define MICREL_KSZ8_P1_ERRATA BIT(2)
+#define MICREL_NO_EEE BIT(3)
#define MICREL_KSZ9021_EXTREG_CTRL 0xB
#define MICREL_KSZ9021_EXTREG_DATA_WRITE 0xC
#else
-static inline int of_overlay_fdt_apply(void *overlay_fdt, u32 overlay_fdt_size,
- int *ovcs_id)
+static inline int of_overlay_fdt_apply(const void *overlay_fdt, u32 overlay_fdt_size,
+ int *ovcs_id, struct device_node *target_base)
{
return -ENOTSUPP;
}
*
* The %neg_mode argument should be tested via the phylink_mode_*() family of
* functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
*/
int pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, const unsigned long *advertising,
*
* The %mode argument should be tested via the phylink_mode_*() family of
* functions, or for PCS that set pcs->neg_mode true, should be tested
- * against the %PHYLINK_PCS_NEG_* definitions.
+ * against the PHYLINK_PCS_NEG_* definitions.
*/
void pcs_link_up(struct phylink_pcs *pcs, unsigned int neg_mode,
phy_interface_t interface, int speed, int duplex);
+++ /dev/null
-/*
- * ds2404.h - platform data structure for the DS2404 RTC.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2012 Sven Schnelle <svens@stackframe.org>
- */
-
-#ifndef __LINUX_DS2404_H
-#define __LINUX_DS2404_H
-
-struct ds2404_platform_data {
-
- unsigned int gpio_rst;
- unsigned int gpio_clk;
- unsigned int gpio_dq;
-};
-#endif
int base;
unsigned int npwm;
- struct pwm_device * (*of_xlate)(struct pwm_chip *pc,
+ struct pwm_device * (*of_xlate)(struct pwm_chip *chip,
const struct of_phandle_args *args);
unsigned int of_pwm_n_cells;
unsigned int index,
const char *label);
-struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *pc,
+struct pwm_device *of_pwm_xlate_with_flags(struct pwm_chip *chip,
const struct of_phandle_args *args);
-struct pwm_device *of_pwm_single_xlate(struct pwm_chip *pc,
+struct pwm_device *of_pwm_single_xlate(struct pwm_chip *chip,
const struct of_phandle_args *args);
struct pwm_device *pwm_get(struct device *dev, const char *con_id);
extern const struct raid6_calls raid6_vpermxor2;
extern const struct raid6_calls raid6_vpermxor4;
extern const struct raid6_calls raid6_vpermxor8;
+extern const struct raid6_calls raid6_lsx;
+extern const struct raid6_calls raid6_lasx;
struct raid6_recov_calls {
void (*data2)(int, size_t, int, int, void **);
extern const struct raid6_recov_calls raid6_recov_avx512;
extern const struct raid6_recov_calls raid6_recov_s390xc;
extern const struct raid6_recov_calls raid6_recov_neon;
+extern const struct raid6_recov_calls raid6_recov_lsx;
+extern const struct raid6_recov_calls raid6_recov_lasx;
extern const struct raid6_calls raid6_neonx1;
extern const struct raid6_calls raid6_neonx2;
#define anon_vma_init() do {} while (0)
#define anon_vma_prepare(vma) (0)
-#define anon_vma_link(vma) do {} while (0)
static inline int folio_referenced(struct folio *folio, int is_locked,
struct mem_cgroup *memcg,
time64_t range_min;
timeu64_t range_max;
+ timeu64_t alarm_offset_max;
time64_t start_secs;
time64_t offset_secs;
bool set_start_time;
unsigned int rep:1; /* enable input subsystem auto repeat */
uint16_t pinmask;
uint16_t invert;
- int irq_is_gpio;
int use_polling; /* use polling if Interrupt is not connected*/
};
#endif
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_L3SLAVE BIT(7)
#define IPSKB_NOPOLICY BIT(8)
+#define IPSKB_MULTIPATH BIT(9)
u16 frag_max_size;
};
ipcm_init(ipcm);
ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
- ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+ ipcm->sockc.tsflags = READ_ONCE(inet->sk.sk_tsflags);
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
ipcm->protocol = inet->inet_num;
if (!net->ipv6.fib6_rules_require_fldissect)
return false;
- skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ memset(flkeys, 0, sizeof(*flkeys));
+ __skb_flow_dissect(net, skb, &flow_keys_dissector,
+ flkeys, NULL, 0, 0, 0, flag);
+
fl6->fl6_sport = flkeys->ports.src;
fl6->fl6_dport = flkeys->ports.dst;
fl6->flowi6_proto = flkeys->basic.ip_proto;
if (!net->ipv4.fib_rules_require_fldissect)
return false;
- skb_flow_dissect_flow_keys(skb, flkeys, flag);
+ memset(flkeys, 0, sizeof(*flkeys));
+ __skb_flow_dissect(net, skb, &flow_keys_dissector,
+ flkeys, NULL, 0, 0, 0, flag);
+
fl4->fl4_sport = flkeys->ports.src;
fl4->fl4_dport = flkeys->ports.dst;
fl4->flowi4_proto = flkeys->basic.ip_proto;
u64_stats_inc(&tstats->tx_packets);
u64_stats_update_end(&tstats->syncp);
put_cpu_ptr(tstats);
+ return;
+ }
+
+ if (pkt_len < 0) {
+ DEV_STATS_INC(dev, tx_errors);
+ DEV_STATS_INC(dev, tx_aborted_errors);
} else {
- struct net_device_stats *err_stats = &dev->stats;
-
- if (pkt_len < 0) {
- err_stats->tx_errors++;
- err_stats->tx_aborted_errors++;
- } else {
- err_stats->tx_dropped++;
- }
+ DEV_STATS_INC(dev, tx_dropped);
}
}
#include <linux/pid.h>
#include <linux/nsproxy.h>
#include <linux/sched/signal.h>
+#include <net/compat.h>
/* Well, we should have at least one descriptor open
* to accept passed FDs 8)
static __inline__ void scm_pidfd_recv(struct msghdr *msg, struct scm_cookie *scm)
{
struct file *pidfd_file = NULL;
- int pidfd;
+ int len, pidfd;
- /*
- * put_cmsg() doesn't return an error if CMSG is truncated,
+ /* put_cmsg() doesn't return an error if CMSG is truncated,
* that's why we need to opencode these checks here.
*/
- if ((msg->msg_controllen <= sizeof(struct cmsghdr)) ||
- (msg->msg_controllen - sizeof(struct cmsghdr)) < sizeof(int)) {
+ if (msg->msg_flags & MSG_CMSG_COMPAT)
+ len = sizeof(struct compat_cmsghdr) + sizeof(int);
+ else
+ len = sizeof(struct cmsghdr) + sizeof(int);
+
+ if (msg->msg_controllen < len) {
msg->msg_flags |= MSG_CTRUNC;
return;
}
WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
+static inline void sk_forward_alloc_add(struct sock *sk, int val)
+{
+ /* Paired with lockless reads of sk->sk_forward_alloc */
+ WRITE_ONCE(sk->sk_forward_alloc, sk->sk_forward_alloc + val);
+}
+
void sk_stream_write_space(struct sock *sk);
/* OOB backlog add */
if (sk->sk_prot->forward_alloc_get)
return sk->sk_prot->forward_alloc_get(sk);
#endif
- return sk->sk_forward_alloc;
+ return READ_ONCE(sk->sk_forward_alloc);
}
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (!sk_has_account(sk))
return;
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
}
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
if (!sk_has_account(sk))
return;
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
sk_mem_reclaim(sk);
}
static inline void sockcm_init(struct sockcm_cookie *sockc,
const struct sock *sk)
{
- *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
+ *sockc = (struct sockcm_cookie) {
+ .tsflags = READ_ONCE(sk->sk_tsflags)
+ };
}
int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
static inline void
sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
- ktime_t kt = skb->tstamp;
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
-
+ u32 tsflags = READ_ONCE(sk->sk_tsflags);
+ ktime_t kt = skb->tstamp;
/*
* generate control messages if
* - receive time stamping in software requested
* - hardware time stamps available and wanted
*/
if (sock_flag(sk, SOCK_RCVTSTAMP) ||
- (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
- (kt && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
+ (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+ (kt && tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
(hwtstamps->hwtstamp &&
- (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
sock_write_timestamp(sk, kt);
#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_RECV_CMSGS || sk->sk_tsflags & TSFLAGS_ANY)
+ if (sk->sk_flags & FLAGS_RECV_CMSGS ||
+ READ_ONCE(sk->sk_tsflags) & TSFLAGS_ANY)
__sock_recv_cmsgs(msg, sk, skb);
else if (unlikely(sock_flag(sk, SOCK_TIMESTAMP)))
sock_write_timestamp(sk, skb->tstamp);
struct snd_pcm_substream *substream);
int (*process)(struct snd_pcm_substream *substream,
int channel, unsigned long hwoff,
- struct iov_iter *buf, unsigned long bytes);
+ unsigned long bytes);
dma_filter_fn compat_filter_fn;
struct device *dma_dev;
const char *chan_names[SNDRV_PCM_STREAM_LAST + 1];
struct snd_pcm_audio_tstamp_report *audio_tstamp_report);
int (*copy)(struct snd_soc_component *component,
struct snd_pcm_substream *substream, int channel,
- unsigned long pos, struct iov_iter *buf,
+ unsigned long pos, struct iov_iter *iter,
unsigned long bytes);
struct page *(*page)(struct snd_soc_component *component,
struct snd_pcm_substream *substream,
int snd_soc_pcm_component_sync_stop(struct snd_pcm_substream *substream);
int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
int channel, unsigned long pos,
- struct iov_iter *buf, unsigned long bytes);
+ struct iov_iter *iter, unsigned long bytes);
struct page *snd_soc_pcm_component_page(struct snd_pcm_substream *substream,
unsigned long offset);
int snd_soc_pcm_component_mmap(struct snd_pcm_substream *substream,
* - add FUSE_EXT_GROUPS
* - add FUSE_CREATE_SUPP_GROUP
* - add FUSE_HAS_EXPIRE_ONLY
+ *
+ * 7.39
+ * - add FUSE_DIRECT_IO_RELAX
+ * - add FUSE_STATX and related structures
*/
#ifndef _LINUX_FUSE_H
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 38
+#define FUSE_KERNEL_MINOR_VERSION 39
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
uint32_t flags;
};
+/*
+ * The following structures are bit-for-bit compatible with the statx(2) ABI in
+ * Linux.
+ */
+struct fuse_sx_time {
+ int64_t tv_sec;
+ uint32_t tv_nsec;
+ int32_t __reserved;
+};
+
+struct fuse_statx {
+ uint32_t mask;
+ uint32_t blksize;
+ uint64_t attributes;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint16_t mode;
+ uint16_t __spare0[1];
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t attributes_mask;
+ struct fuse_sx_time atime;
+ struct fuse_sx_time btime;
+ struct fuse_sx_time ctime;
+ struct fuse_sx_time mtime;
+ uint32_t rdev_major;
+ uint32_t rdev_minor;
+ uint32_t dev_major;
+ uint32_t dev_minor;
+ uint64_t __spare2[14];
+};
+
struct fuse_kstatfs {
uint64_t blocks;
uint64_t bfree;
* FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
* symlink and mknod (single group that matches parent)
* FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
+ * FUSE_DIRECT_IO_RELAX: relax restrictions in FOPEN_DIRECT_IO mode, for now
+ * allow shared mmap
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
#define FUSE_HAS_INODE_DAX (1ULL << 33)
#define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35)
+#define FUSE_DIRECT_IO_RELAX (1ULL << 36)
/**
* CUSE INIT request/reply flags
FUSE_REMOVEMAPPING = 49,
FUSE_SYNCFS = 50,
FUSE_TMPFILE = 51,
+ FUSE_STATX = 52,
/* CUSE specific operations */
CUSE_INIT = 4096,
struct fuse_attr attr;
};
+struct fuse_statx_in {
+ uint32_t getattr_flags;
+ uint32_t reserved;
+ uint64_t fh;
+ uint32_t sx_flags;
+ uint32_t sx_mask;
+};
+
+struct fuse_statx_out {
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t flags;
+ uint64_t spare[2];
+ struct fuse_statx stat;
+};
+
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
* @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
* @NFTA_RULE_ID: uniquely identifies a rule in a transaction (NLA_U32)
* @NFTA_RULE_POSITION_ID: transaction unique identifier of the previous rule (NLA_U32)
+ * @NFTA_RULE_CHAIN_ID: add the rule to chain by ID, alternative to @NFTA_RULE_CHAIN (NLA_U32)
*/
enum nft_rule_attributes {
NFTA_RULE_UNSPEC,
{ AUDIT_NFT_OP_OBJ_RESET, "nft_reset_obj" },
{ AUDIT_NFT_OP_FLOWTABLE_REGISTER, "nft_register_flowtable" },
{ AUDIT_NFT_OP_FLOWTABLE_UNREGISTER, "nft_unregister_flowtable" },
+ { AUDIT_NFT_OP_SETELEM_RESET, "nft_reset_setelem" },
+ { AUDIT_NFT_OP_RULE_RESET, "nft_reset_rule" },
{ AUDIT_NFT_OP_INVALID, "nft_invalid" },
};
void *value, u64 map_flags, gfp_t gfp_flags)
{
struct bpf_local_storage_data *old_sdata = NULL;
- struct bpf_local_storage_elem *selem = NULL;
+ struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
struct bpf_local_storage *local_storage;
unsigned long flags;
int err;
}
}
- if (gfp_flags == GFP_KERNEL) {
- selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
- if (!selem)
- return ERR_PTR(-ENOMEM);
- }
+ /* A lookup has just been done before and concluded a new selem is
+ * needed. The chance of an unnecessary alloc is unlikely.
+ */
+ alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
+ if (!alloc_selem)
+ return ERR_PTR(-ENOMEM);
raw_spin_lock_irqsave(&local_storage->lock, flags);
* simple.
*/
err = -EAGAIN;
- goto unlock_err;
+ goto unlock;
}
old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
err = check_flags(old_sdata, map_flags);
if (err)
- goto unlock_err;
+ goto unlock;
if (old_sdata && (map_flags & BPF_F_LOCK)) {
copy_map_value_locked(&smap->map, old_sdata->data, value,
goto unlock;
}
- if (gfp_flags != GFP_KERNEL) {
- /* local_storage->lock is held. Hence, we are sure
- * we can unlink and uncharge the old_sdata successfully
- * later. Hence, instead of charging the new selem now
- * and then uncharge the old selem later (which may cause
- * a potential but unnecessary charge failure), avoid taking
- * a charge at all here (the "!old_sdata" check) and the
- * old_sdata will not be uncharged later during
- * bpf_selem_unlink_storage_nolock().
- */
- selem = bpf_selem_alloc(smap, owner, value, !old_sdata, gfp_flags);
- if (!selem) {
- err = -ENOMEM;
- goto unlock_err;
- }
- }
-
+ alloc_selem = NULL;
/* First, link the new selem to the map */
bpf_selem_link_map(smap, selem);
if (old_sdata) {
bpf_selem_unlink_map(SELEM(old_sdata));
bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
- false, false);
+ true, false);
}
unlock:
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- return SDATA(selem);
-
-unlock_err:
- raw_spin_unlock_irqrestore(&local_storage->lock, flags);
- if (selem) {
+ if (alloc_selem) {
mem_uncharge(smap, owner, smap->elem_size);
- bpf_selem_free(selem, smap, true);
+ bpf_selem_free(alloc_selem, smap, true);
}
- return ERR_PTR(err);
+ return err ? ERR_PTR(err) : SDATA(selem);
}
static u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
* of the loop will set the free_cgroup_storage to true.
*/
free_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false, true);
+ local_storage, selem, true, true);
}
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
run_ctx.bpf_cookie = 0;
- run_ctx.saved_run_ctx = NULL;
if (!__bpf_prog_enter_sleepable_recur(prog, &run_ctx)) {
/* recursion detected */
+ __bpf_prog_exit_sleepable_recur(prog, 0, &run_ctx);
bpf_prog_put(prog);
return -EBUSY;
}
migrate_disable();
might_fault();
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
bpf_prog_inc_misses_counter(prog);
return 0;
}
-
- run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
-
return bpf_prog_start_time();
}
+# Help: Debugging for CI systems and finding regressions
+#
# The config is based on running daily CI for enterprise Linux distros to
# seek regressions on linux-next builds on different bare-metal and virtual
# platforms. It can be used for example,
+# Help: Bootable as a KVM guest
CONFIG_NET=y
CONFIG_NET_CORE=y
CONFIG_NETDEVICES=y
+# Help: Disable Power Management
+
CONFIG_PM=n
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
+# Help: Enable Rust
CONFIG_RUST=y
+# Help: Debugging options for tip tree testing
CONFIG_X86_DEBUG_FPU=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_VM=y
+# Help: Bootable as a Xen guest
+#
# global stuff - these enable us to allow some
# of the not so generic stuff below for xen
CONFIG_PARAVIRT=y
{
return log_buf;
}
-EXPORT_SYMBOL_GPL(log_buf_addr_get);
/* Return log buffer size */
u32 log_buf_len_get(void)
{
return log_buf_len;
}
-EXPORT_SYMBOL_GPL(log_buf_len_get);
/*
* Define how much of the log buffer we could take at maximum. The value
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
+raid6_pq-$(CONFIG_LOONGARCH) += loongarch_simd.o recov_loongarch_simd.o
hostprogs += mktables
&raid6_neonx2,
&raid6_neonx1,
#endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+ &raid6_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ &raid6_lsx,
+#endif
+#endif
#if defined(__ia64__)
&raid6_intx32,
&raid6_intx16,
#if defined(CONFIG_KERNEL_MODE_NEON)
&raid6_recov_neon,
#endif
+#ifdef CONFIG_LOONGARCH
+#ifdef CONFIG_CPU_HAS_LASX
+ &raid6_recov_lasx,
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ &raid6_recov_lsx,
+#endif
+#endif
&raid6_recov_intx1,
NULL
};
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * raid6/loongarch.h
+ *
+ * Definitions common to LoongArch RAID-6 code only
+ */
+
+#ifndef _LIB_RAID6_LOONGARCH_H
+#define _LIB_RAID6_LOONGARCH_H
+
+#ifdef __KERNEL__
+
+#include <asm/cpu-features.h>
+#include <asm/fpu.h>
+
+#else /* for user-space testing */
+
+#include <sys/auxv.h>
+
+/* have to supply these defines for glibc 2.37- and musl */
+#ifndef HWCAP_LOONGARCH_LSX
+#define HWCAP_LOONGARCH_LSX (1 << 4)
+#endif
+#ifndef HWCAP_LOONGARCH_LASX
+#define HWCAP_LOONGARCH_LASX (1 << 5)
+#endif
+
+#define kernel_fpu_begin()
+#define kernel_fpu_end()
+
+#define cpu_has_lsx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LSX)
+#define cpu_has_lasx (getauxval(AT_HWCAP) & HWCAP_LOONGARCH_LASX)
+
+#endif /* __KERNEL__ */
+
+#endif /* _LIB_RAID6_LOONGARCH_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * RAID6 syndrome calculations in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Based on the generic RAID-6 code (int.uc):
+ *
+ * Copyright 2002-2004 H. Peter Anvin
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * The vector algorithms are currently priority 0, which means the generic
+ * scalar algorithms are not being disabled if vector support is present.
+ * This is like the similar LoongArch RAID5 XOR code, with the main reason
+ * repeated here: it cannot be ruled out at this point of time, that some
+ * future (maybe reduced) models could run the vector algorithms slower than
+ * the scalar ones, maybe for errata or micro-op reasons. It may be
+ * appropriate to revisit this after one or two more uarch generations.
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+#define NSIZE 16
+
+static int raid6_has_lsx(void)
+{
+ return cpu_has_lsx;
+}
+
+static void raid6_lsx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $vr0, $vr1, $vr2, $vr3: wp
+ * $vr4, $vr5, $vr6, $vr7: wq
+ * $vr8, $vr9, $vr10, $vr11: wd
+ * $vr12, $vr13, $vr14, $vr15: w2
+ * $vr16, $vr17, $vr18, $vr19: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*4) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+ asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+ asm volatile("vori.b $vr4, $vr0, 0");
+ asm volatile("vori.b $vr5, $vr1, 0");
+ asm volatile("vori.b $vr6, $vr2, 0");
+ asm volatile("vori.b $vr7, $vr3, 0");
+ for (z = z0-1; z >= 0; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+ asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("vxor.v $vr16, $vr16, $vr12");
+ asm volatile("vxor.v $vr17, $vr17, $vr13");
+ asm volatile("vxor.v $vr18, $vr18, $vr14");
+ asm volatile("vxor.v $vr19, $vr19, $vr15");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr8");
+ asm volatile("vxor.v $vr5, $vr17, $vr9");
+ asm volatile("vxor.v $vr6, $vr18, $vr10");
+ asm volatile("vxor.v $vr7, $vr19, $vr11");
+ }
+ /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+ asm volatile("vst $vr0, %0" : "=m"(p[d+NSIZE*0]));
+ asm volatile("vst $vr1, %0" : "=m"(p[d+NSIZE*1]));
+ asm volatile("vst $vr2, %0" : "=m"(p[d+NSIZE*2]));
+ asm volatile("vst $vr3, %0" : "=m"(p[d+NSIZE*3]));
+ /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+ asm volatile("vst $vr4, %0" : "=m"(q[d+NSIZE*0]));
+ asm volatile("vst $vr5, %0" : "=m"(q[d+NSIZE*1]));
+ asm volatile("vst $vr6, %0" : "=m"(q[d+NSIZE*2]));
+ asm volatile("vst $vr7, %0" : "=m"(q[d+NSIZE*3]));
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_lsx_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $vr0, $vr1, $vr2, $vr3: wp
+ * $vr4, $vr5, $vr6, $vr7: wq
+ * $vr8, $vr9, $vr10, $vr11: wd
+ * $vr12, $vr13, $vr14, $vr15: w2
+ * $vr16, $vr17, $vr18, $vr19: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*4) {
+ /* P/Q data pages */
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("vld $vr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("vld $vr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("vld $vr2, %0" : : "m"(dptr[z0][d+2*NSIZE]));
+ asm volatile("vld $vr3, %0" : : "m"(dptr[z0][d+3*NSIZE]));
+ asm volatile("vori.b $vr4, $vr0, 0");
+ asm volatile("vori.b $vr5, $vr1, 0");
+ asm volatile("vori.b $vr6, $vr2, 0");
+ asm volatile("vori.b $vr7, $vr3, 0");
+ for (z = z0-1; z >= start; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("vld $vr8, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("vld $vr9, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ asm volatile("vld $vr10, %0" : : "m"(dptr[z][d+2*NSIZE]));
+ asm volatile("vld $vr11, %0" : : "m"(dptr[z][d+3*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("vxor.v $vr16, $vr16, $vr12");
+ asm volatile("vxor.v $vr17, $vr17, $vr13");
+ asm volatile("vxor.v $vr18, $vr18, $vr14");
+ asm volatile("vxor.v $vr19, $vr19, $vr15");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr8");
+ asm volatile("vxor.v $vr5, $vr17, $vr9");
+ asm volatile("vxor.v $vr6, $vr18, $vr10");
+ asm volatile("vxor.v $vr7, $vr19, $vr11");
+ }
+
+ /* P/Q left side optimization */
+ for (z = start-1; z >= 0; z--) {
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("vslti.b $vr12, $vr4, 0");
+ asm volatile("vslti.b $vr13, $vr5, 0");
+ asm volatile("vslti.b $vr14, $vr6, 0");
+ asm volatile("vslti.b $vr15, $vr7, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("vslli.b $vr16, $vr4, 1");
+ asm volatile("vslli.b $vr17, $vr5, 1");
+ asm volatile("vslli.b $vr18, $vr6, 1");
+ asm volatile("vslli.b $vr19, $vr7, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("vandi.b $vr12, $vr12, 0x1d");
+ asm volatile("vandi.b $vr13, $vr13, 0x1d");
+ asm volatile("vandi.b $vr14, $vr14, 0x1d");
+ asm volatile("vandi.b $vr15, $vr15, 0x1d");
+ /* wq$$ = w1$$ ^ w2$$; */
+ asm volatile("vxor.v $vr4, $vr16, $vr12");
+ asm volatile("vxor.v $vr5, $vr17, $vr13");
+ asm volatile("vxor.v $vr6, $vr18, $vr14");
+ asm volatile("vxor.v $vr7, $vr19, $vr15");
+ }
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ */
+ asm volatile(
+ "vld $vr20, %0\n\t"
+ "vld $vr21, %1\n\t"
+ "vld $vr22, %2\n\t"
+ "vld $vr23, %3\n\t"
+ "vld $vr24, %4\n\t"
+ "vld $vr25, %5\n\t"
+ "vld $vr26, %6\n\t"
+ "vld $vr27, %7\n\t"
+ "vxor.v $vr20, $vr20, $vr0\n\t"
+ "vxor.v $vr21, $vr21, $vr1\n\t"
+ "vxor.v $vr22, $vr22, $vr2\n\t"
+ "vxor.v $vr23, $vr23, $vr3\n\t"
+ "vxor.v $vr24, $vr24, $vr4\n\t"
+ "vxor.v $vr25, $vr25, $vr5\n\t"
+ "vxor.v $vr26, $vr26, $vr6\n\t"
+ "vxor.v $vr27, $vr27, $vr7\n\t"
+ "vst $vr20, %0\n\t"
+ "vst $vr21, %1\n\t"
+ "vst $vr22, %2\n\t"
+ "vst $vr23, %3\n\t"
+ "vst $vr24, %4\n\t"
+ "vst $vr25, %5\n\t"
+ "vst $vr26, %6\n\t"
+ "vst $vr27, %7\n\t"
+ : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+ "+m"(p[d+NSIZE*2]), "+m"(p[d+NSIZE*3]),
+ "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1]),
+ "+m"(q[d+NSIZE*2]), "+m"(q[d+NSIZE*3])
+ );
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lsx = {
+ raid6_lsx_gen_syndrome,
+ raid6_lsx_xor_syndrome,
+ raid6_has_lsx,
+ "lsx",
+ .priority = 0 /* see the comment near the top of the file for reason */
+};
+
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+#define NSIZE 32
+
+static int raid6_has_lasx(void)
+{
+ return cpu_has_lasx;
+}
+
+static void raid6_lasx_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $xr0, $xr1: wp
+ * $xr2, $xr3: wq
+ * $xr4, $xr5: wd
+ * $xr6, $xr7: w2
+ * $xr8, $xr9: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*2) {
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("xvori.b $xr2, $xr0, 0");
+ asm volatile("xvori.b $xr3, $xr1, 0");
+ for (z = z0-1; z >= 0; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("xvxor.v $xr8, $xr8, $xr6");
+ asm volatile("xvxor.v $xr9, $xr9, $xr7");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr4");
+ asm volatile("xvxor.v $xr3, $xr9, $xr5");
+ }
+ /* *(unative_t *)&p[d+NSIZE*$$] = wp$$; */
+ asm volatile("xvst $xr0, %0" : "=m"(p[d+NSIZE*0]));
+ asm volatile("xvst $xr1, %0" : "=m"(p[d+NSIZE*1]));
+ /* *(unative_t *)&q[d+NSIZE*$$] = wq$$; */
+ asm volatile("xvst $xr2, %0" : "=m"(q[d+NSIZE*0]));
+ asm volatile("xvst $xr3, %0" : "=m"(q[d+NSIZE*1]));
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_lasx_xor_syndrome(int disks, int start, int stop,
+ size_t bytes, void **ptrs)
+{
+ u8 **dptr = (u8 **)ptrs;
+ u8 *p, *q;
+ int d, z, z0;
+
+ z0 = stop; /* P/Q right side optimization */
+ p = dptr[disks-2]; /* XOR parity */
+ q = dptr[disks-1]; /* RS syndrome */
+
+ kernel_fpu_begin();
+
+ /*
+ * $xr0, $xr1: wp
+ * $xr2, $xr3: wq
+ * $xr4, $xr5: wd
+ * $xr6, $xr7: w2
+ * $xr8, $xr9: w1
+ */
+ for (d = 0; d < bytes; d += NSIZE*2) {
+ /* P/Q data pages */
+ /* wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; */
+ asm volatile("xvld $xr0, %0" : : "m"(dptr[z0][d+0*NSIZE]));
+ asm volatile("xvld $xr1, %0" : : "m"(dptr[z0][d+1*NSIZE]));
+ asm volatile("xvori.b $xr2, $xr0, 0");
+ asm volatile("xvori.b $xr3, $xr1, 0");
+ for (z = z0-1; z >= start; z--) {
+ /* wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; */
+ asm volatile("xvld $xr4, %0" : : "m"(dptr[z][d+0*NSIZE]));
+ asm volatile("xvld $xr5, %0" : : "m"(dptr[z][d+1*NSIZE]));
+ /* wp$$ ^= wd$$; */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* w1$$ ^= w2$$; */
+ asm volatile("xvxor.v $xr8, $xr8, $xr6");
+ asm volatile("xvxor.v $xr9, $xr9, $xr7");
+ /* wq$$ = w1$$ ^ wd$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr4");
+ asm volatile("xvxor.v $xr3, $xr9, $xr5");
+ }
+
+ /* P/Q left side optimization */
+ for (z = start-1; z >= 0; z--) {
+ /* w2$$ = MASK(wq$$); */
+ asm volatile("xvslti.b $xr6, $xr2, 0");
+ asm volatile("xvslti.b $xr7, $xr3, 0");
+ /* w1$$ = SHLBYTE(wq$$); */
+ asm volatile("xvslli.b $xr8, $xr2, 1");
+ asm volatile("xvslli.b $xr9, $xr3, 1");
+ /* w2$$ &= NBYTES(0x1d); */
+ asm volatile("xvandi.b $xr6, $xr6, 0x1d");
+ asm volatile("xvandi.b $xr7, $xr7, 0x1d");
+ /* wq$$ = w1$$ ^ w2$$; */
+ asm volatile("xvxor.v $xr2, $xr8, $xr6");
+ asm volatile("xvxor.v $xr3, $xr9, $xr7");
+ }
+ /*
+ * *(unative_t *)&p[d+NSIZE*$$] ^= wp$$;
+ * *(unative_t *)&q[d+NSIZE*$$] ^= wq$$;
+ */
+ asm volatile(
+ "xvld $xr10, %0\n\t"
+ "xvld $xr11, %1\n\t"
+ "xvld $xr12, %2\n\t"
+ "xvld $xr13, %3\n\t"
+ "xvxor.v $xr10, $xr10, $xr0\n\t"
+ "xvxor.v $xr11, $xr11, $xr1\n\t"
+ "xvxor.v $xr12, $xr12, $xr2\n\t"
+ "xvxor.v $xr13, $xr13, $xr3\n\t"
+ "xvst $xr10, %0\n\t"
+ "xvst $xr11, %1\n\t"
+ "xvst $xr12, %2\n\t"
+ "xvst $xr13, %3\n\t"
+ : "+m"(p[d+NSIZE*0]), "+m"(p[d+NSIZE*1]),
+ "+m"(q[d+NSIZE*0]), "+m"(q[d+NSIZE*1])
+ );
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_calls raid6_lasx = {
+ raid6_lasx_gen_syndrome,
+ raid6_lasx_xor_syndrome,
+ raid6_has_lasx,
+ "lasx",
+ .priority = 0 /* see the comment near the top of the file for reason */
+};
+#undef NSIZE
+#endif /* CONFIG_CPU_HAS_LASX */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * RAID6 recovery algorithms in LoongArch SIMD (LSX & LASX)
+ *
+ * Copyright (C) 2023 WANG Xuerui <git@xen0n.name>
+ *
+ * Originally based on recov_avx2.c and recov_ssse3.c:
+ *
+ * Copyright (C) 2012 Intel Corporation
+ * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
+ */
+
+#include <linux/raid/pq.h>
+#include "loongarch.h"
+
+/*
+ * Unlike with the syndrome calculation algorithms, there's no boot-time
+ * selection of recovery algorithms by benchmarking, so we have to specify
+ * the priorities and hope the future cores will all have decent vector
+ * support (i.e. no LASX slower than LSX, or even scalar code).
+ */
+
+#ifdef CONFIG_CPU_HAS_LSX
+static int raid6_has_lsx(void)
+{
+ return cpu_has_lsx;
+}
+
+static void raid6_2data_recov_lsx(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks - 2] = p;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /*
+ * vr20, vr21: qmul
+ * vr22, vr23: pbmul
+ */
+ asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+ asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+
+ while (bytes) {
+ /* vr4 - vr7: Q */
+ asm volatile("vld $vr4, %0" : : "m" (q[0]));
+ asm volatile("vld $vr5, %0" : : "m" (q[16]));
+ asm volatile("vld $vr6, %0" : : "m" (q[32]));
+ asm volatile("vld $vr7, %0" : : "m" (q[48]));
+ /* vr4 - vr7: Q + Qxy */
+ asm volatile("vld $vr8, %0" : : "m" (dq[0]));
+ asm volatile("vld $vr9, %0" : : "m" (dq[16]));
+ asm volatile("vld $vr10, %0" : : "m" (dq[32]));
+ asm volatile("vld $vr11, %0" : : "m" (dq[48]));
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+ /* vr0 - vr3: P */
+ asm volatile("vld $vr0, %0" : : "m" (p[0]));
+ asm volatile("vld $vr1, %0" : : "m" (p[16]));
+ asm volatile("vld $vr2, %0" : : "m" (p[32]));
+ asm volatile("vld $vr3, %0" : : "m" (p[48]));
+ /* vr0 - vr3: P + Pxy */
+ asm volatile("vld $vr8, %0" : : "m" (dp[0]));
+ asm volatile("vld $vr9, %0" : : "m" (dp[16]));
+ asm volatile("vld $vr10, %0" : : "m" (dp[32]));
+ asm volatile("vld $vr11, %0" : : "m" (dp[48]));
+ asm volatile("vxor.v $vr0, $vr0, $vr8");
+ asm volatile("vxor.v $vr1, $vr1, $vr9");
+ asm volatile("vxor.v $vr2, $vr2, $vr10");
+ asm volatile("vxor.v $vr3, $vr3, $vr11");
+
+ /* vr8 - vr11: higher 4 bits of each byte of (Q + Qxy) */
+ asm volatile("vsrli.b $vr8, $vr4, 4");
+ asm volatile("vsrli.b $vr9, $vr5, 4");
+ asm volatile("vsrli.b $vr10, $vr6, 4");
+ asm volatile("vsrli.b $vr11, $vr7, 4");
+ /* vr4 - vr7: lower 4 bits of each byte of (Q + Qxy) */
+ asm volatile("vandi.b $vr4, $vr4, 0x0f");
+ asm volatile("vandi.b $vr5, $vr5, 0x0f");
+ asm volatile("vandi.b $vr6, $vr6, 0x0f");
+ asm volatile("vandi.b $vr7, $vr7, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("vshuf.b $vr4, $vr20, $vr20, $vr4");
+ asm volatile("vshuf.b $vr5, $vr20, $vr20, $vr5");
+ asm volatile("vshuf.b $vr6, $vr20, $vr20, $vr6");
+ asm volatile("vshuf.b $vr7, $vr20, $vr20, $vr7");
+ /* lookup from qmul[16] */
+ asm volatile("vshuf.b $vr8, $vr21, $vr21, $vr8");
+ asm volatile("vshuf.b $vr9, $vr21, $vr21, $vr9");
+ asm volatile("vshuf.b $vr10, $vr21, $vr21, $vr10");
+ asm volatile("vshuf.b $vr11, $vr21, $vr21, $vr11");
+ /* vr16 - vr19: B(Q + Qxy) */
+ asm volatile("vxor.v $vr16, $vr8, $vr4");
+ asm volatile("vxor.v $vr17, $vr9, $vr5");
+ asm volatile("vxor.v $vr18, $vr10, $vr6");
+ asm volatile("vxor.v $vr19, $vr11, $vr7");
+
+ /* vr4 - vr7: higher 4 bits of each byte of (P + Pxy) */
+ asm volatile("vsrli.b $vr4, $vr0, 4");
+ asm volatile("vsrli.b $vr5, $vr1, 4");
+ asm volatile("vsrli.b $vr6, $vr2, 4");
+ asm volatile("vsrli.b $vr7, $vr3, 4");
+ /* vr12 - vr15: lower 4 bits of each byte of (P + Pxy) */
+ asm volatile("vandi.b $vr12, $vr0, 0x0f");
+ asm volatile("vandi.b $vr13, $vr1, 0x0f");
+ asm volatile("vandi.b $vr14, $vr2, 0x0f");
+ asm volatile("vandi.b $vr15, $vr3, 0x0f");
+ /* lookup from pbmul[0] */
+ asm volatile("vshuf.b $vr12, $vr22, $vr22, $vr12");
+ asm volatile("vshuf.b $vr13, $vr22, $vr22, $vr13");
+ asm volatile("vshuf.b $vr14, $vr22, $vr22, $vr14");
+ asm volatile("vshuf.b $vr15, $vr22, $vr22, $vr15");
+ /* lookup from pbmul[16] */
+ asm volatile("vshuf.b $vr4, $vr23, $vr23, $vr4");
+ asm volatile("vshuf.b $vr5, $vr23, $vr23, $vr5");
+ asm volatile("vshuf.b $vr6, $vr23, $vr23, $vr6");
+ asm volatile("vshuf.b $vr7, $vr23, $vr23, $vr7");
+ /* vr4 - vr7: A(P + Pxy) */
+ asm volatile("vxor.v $vr4, $vr4, $vr12");
+ asm volatile("vxor.v $vr5, $vr5, $vr13");
+ asm volatile("vxor.v $vr6, $vr6, $vr14");
+ asm volatile("vxor.v $vr7, $vr7, $vr15");
+
+ /* vr4 - vr7: A(P + Pxy) + B(Q + Qxy) = Dx */
+ asm volatile("vxor.v $vr4, $vr4, $vr16");
+ asm volatile("vxor.v $vr5, $vr5, $vr17");
+ asm volatile("vxor.v $vr6, $vr6, $vr18");
+ asm volatile("vxor.v $vr7, $vr7, $vr19");
+ asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+ asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+ asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+ asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+ /* vr0 - vr3: P + Pxy + Dx = Dy */
+ asm volatile("vxor.v $vr0, $vr0, $vr4");
+ asm volatile("vxor.v $vr1, $vr1, $vr5");
+ asm volatile("vxor.v $vr2, $vr2, $vr6");
+ asm volatile("vxor.v $vr3, $vr3, $vr7");
+ asm volatile("vst $vr0, %0" : "=m" (dp[0]));
+ asm volatile("vst $vr1, %0" : "=m" (dp[16]));
+ asm volatile("vst $vr2, %0" : "=m" (dp[32]));
+ asm volatile("vst $vr3, %0" : "=m" (dp[48]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lsx(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ /* vr22, vr23: qmul */
+ asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+
+ while (bytes) {
+ /* vr0 - vr3: P + Dx */
+ asm volatile("vld $vr0, %0" : : "m" (p[0]));
+ asm volatile("vld $vr1, %0" : : "m" (p[16]));
+ asm volatile("vld $vr2, %0" : : "m" (p[32]));
+ asm volatile("vld $vr3, %0" : : "m" (p[48]));
+ /* vr4 - vr7: Qx */
+ asm volatile("vld $vr4, %0" : : "m" (dq[0]));
+ asm volatile("vld $vr5, %0" : : "m" (dq[16]));
+ asm volatile("vld $vr6, %0" : : "m" (dq[32]));
+ asm volatile("vld $vr7, %0" : : "m" (dq[48]));
+ /* vr4 - vr7: Q + Qx */
+ asm volatile("vld $vr8, %0" : : "m" (q[0]));
+ asm volatile("vld $vr9, %0" : : "m" (q[16]));
+ asm volatile("vld $vr10, %0" : : "m" (q[32]));
+ asm volatile("vld $vr11, %0" : : "m" (q[48]));
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+
+ /* vr8 - vr11: higher 4 bits of each byte of (Q + Qx) */
+ asm volatile("vsrli.b $vr8, $vr4, 4");
+ asm volatile("vsrli.b $vr9, $vr5, 4");
+ asm volatile("vsrli.b $vr10, $vr6, 4");
+ asm volatile("vsrli.b $vr11, $vr7, 4");
+ /* vr4 - vr7: lower 4 bits of each byte of (Q + Qx) */
+ asm volatile("vandi.b $vr4, $vr4, 0x0f");
+ asm volatile("vandi.b $vr5, $vr5, 0x0f");
+ asm volatile("vandi.b $vr6, $vr6, 0x0f");
+ asm volatile("vandi.b $vr7, $vr7, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("vshuf.b $vr4, $vr22, $vr22, $vr4");
+ asm volatile("vshuf.b $vr5, $vr22, $vr22, $vr5");
+ asm volatile("vshuf.b $vr6, $vr22, $vr22, $vr6");
+ asm volatile("vshuf.b $vr7, $vr22, $vr22, $vr7");
+ /* lookup from qmul[16] */
+ asm volatile("vshuf.b $vr8, $vr23, $vr23, $vr8");
+ asm volatile("vshuf.b $vr9, $vr23, $vr23, $vr9");
+ asm volatile("vshuf.b $vr10, $vr23, $vr23, $vr10");
+ asm volatile("vshuf.b $vr11, $vr23, $vr23, $vr11");
+ /* vr4 - vr7: qmul(Q + Qx) = Dx */
+ asm volatile("vxor.v $vr4, $vr4, $vr8");
+ asm volatile("vxor.v $vr5, $vr5, $vr9");
+ asm volatile("vxor.v $vr6, $vr6, $vr10");
+ asm volatile("vxor.v $vr7, $vr7, $vr11");
+ asm volatile("vst $vr4, %0" : "=m" (dq[0]));
+ asm volatile("vst $vr5, %0" : "=m" (dq[16]));
+ asm volatile("vst $vr6, %0" : "=m" (dq[32]));
+ asm volatile("vst $vr7, %0" : "=m" (dq[48]));
+
+ /* vr0 - vr3: P + Dx + Dx = P */
+ asm volatile("vxor.v $vr0, $vr0, $vr4");
+ asm volatile("vxor.v $vr1, $vr1, $vr5");
+ asm volatile("vxor.v $vr2, $vr2, $vr6");
+ asm volatile("vxor.v $vr3, $vr3, $vr7");
+ asm volatile("vst $vr0, %0" : "=m" (p[0]));
+ asm volatile("vst $vr1, %0" : "=m" (p[16]));
+ asm volatile("vst $vr2, %0" : "=m" (p[32]));
+ asm volatile("vst $vr3, %0" : "=m" (p[48]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lsx = {
+ .data2 = raid6_2data_recov_lsx,
+ .datap = raid6_datap_recov_lsx,
+ .valid = raid6_has_lsx,
+ .name = "lsx",
+ .priority = 1,
+};
+#endif /* CONFIG_CPU_HAS_LSX */
+
+#ifdef CONFIG_CPU_HAS_LASX
+static int raid6_has_lasx(void)
+{
+ return cpu_has_lasx;
+}
+
+static void raid6_2data_recov_lasx(int disks, size_t bytes, int faila,
+ int failb, void **ptrs)
+{
+ u8 *p, *q, *dp, *dq;
+ const u8 *pbmul; /* P multiplier table for B data */
+ const u8 *qmul; /* Q multiplier table (for both) */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data pages
+ * Use the dead data pages as temporary storage for
+ * delta p and delta q
+ */
+ dp = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 2] = dp;
+ dq = (u8 *)ptrs[failb];
+ ptrs[failb] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dp;
+ ptrs[failb] = dq;
+ ptrs[disks - 2] = p;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ pbmul = raid6_vgfmul[raid6_gfexi[failb - faila]];
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ raid6_gfexp[failb]]];
+
+ kernel_fpu_begin();
+
+ /*
+ * xr20, xr21: qmul
+ * xr22, xr23: pbmul
+ */
+ asm volatile("vld $vr20, %0" : : "m" (qmul[0]));
+ asm volatile("vld $vr21, %0" : : "m" (qmul[16]));
+ asm volatile("vld $vr22, %0" : : "m" (pbmul[0]));
+ asm volatile("vld $vr23, %0" : : "m" (pbmul[16]));
+ asm volatile("xvreplve0.q $xr20, $xr20");
+ asm volatile("xvreplve0.q $xr21, $xr21");
+ asm volatile("xvreplve0.q $xr22, $xr22");
+ asm volatile("xvreplve0.q $xr23, $xr23");
+
+ while (bytes) {
+ /* xr0, xr1: Q */
+ asm volatile("xvld $xr0, %0" : : "m" (q[0]));
+ asm volatile("xvld $xr1, %0" : : "m" (q[32]));
+ /* xr0, xr1: Q + Qxy */
+ asm volatile("xvld $xr4, %0" : : "m" (dq[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (dq[32]));
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+ /* xr2, xr3: P */
+ asm volatile("xvld $xr2, %0" : : "m" (p[0]));
+ asm volatile("xvld $xr3, %0" : : "m" (p[32]));
+ /* xr2, xr3: P + Pxy */
+ asm volatile("xvld $xr4, %0" : : "m" (dp[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (dp[32]));
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr4, xr5: higher 4 bits of each byte of (Q + Qxy) */
+ asm volatile("xvsrli.b $xr4, $xr0, 4");
+ asm volatile("xvsrli.b $xr5, $xr1, 4");
+ /* xr0, xr1: lower 4 bits of each byte of (Q + Qxy) */
+ asm volatile("xvandi.b $xr0, $xr0, 0x0f");
+ asm volatile("xvandi.b $xr1, $xr1, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("xvshuf.b $xr0, $xr20, $xr20, $xr0");
+ asm volatile("xvshuf.b $xr1, $xr20, $xr20, $xr1");
+ /* lookup from qmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr21, $xr21, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr21, $xr21, $xr5");
+ /* xr6, xr7: B(Q + Qxy) */
+ asm volatile("xvxor.v $xr6, $xr4, $xr0");
+ asm volatile("xvxor.v $xr7, $xr5, $xr1");
+
+ /* xr4, xr5: higher 4 bits of each byte of (P + Pxy) */
+ asm volatile("xvsrli.b $xr4, $xr2, 4");
+ asm volatile("xvsrli.b $xr5, $xr3, 4");
+ /* xr0, xr1: lower 4 bits of each byte of (P + Pxy) */
+ asm volatile("xvandi.b $xr0, $xr2, 0x0f");
+ asm volatile("xvandi.b $xr1, $xr3, 0x0f");
+ /* lookup from pbmul[0] */
+ asm volatile("xvshuf.b $xr0, $xr22, $xr22, $xr0");
+ asm volatile("xvshuf.b $xr1, $xr22, $xr22, $xr1");
+ /* lookup from pbmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+ /* xr0, xr1: A(P + Pxy) */
+ asm volatile("xvxor.v $xr0, $xr0, $xr4");
+ asm volatile("xvxor.v $xr1, $xr1, $xr5");
+
+ /* xr0, xr1: A(P + Pxy) + B(Q + Qxy) = Dx */
+ asm volatile("xvxor.v $xr0, $xr0, $xr6");
+ asm volatile("xvxor.v $xr1, $xr1, $xr7");
+
+ /* xr2, xr3: P + Pxy + Dx = Dy */
+ asm volatile("xvxor.v $xr2, $xr2, $xr0");
+ asm volatile("xvxor.v $xr3, $xr3, $xr1");
+
+ asm volatile("xvst $xr0, %0" : "=m" (dq[0]));
+ asm volatile("xvst $xr1, %0" : "=m" (dq[32]));
+ asm volatile("xvst $xr2, %0" : "=m" (dp[0]));
+ asm volatile("xvst $xr3, %0" : "=m" (dp[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dp += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+static void raid6_datap_recov_lasx(int disks, size_t bytes, int faila,
+ void **ptrs)
+{
+ u8 *p, *q, *dq;
+ const u8 *qmul; /* Q multiplier table */
+
+ p = (u8 *)ptrs[disks - 2];
+ q = (u8 *)ptrs[disks - 1];
+
+ /*
+ * Compute syndrome with zero for the missing data page
+ * Use the dead data page as temporary storage for delta q
+ */
+ dq = (u8 *)ptrs[faila];
+ ptrs[faila] = (void *)raid6_empty_zero_page;
+ ptrs[disks - 1] = dq;
+
+ raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+ /* Restore pointer table */
+ ptrs[faila] = dq;
+ ptrs[disks - 1] = q;
+
+ /* Now, pick the proper data tables */
+ qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+ kernel_fpu_begin();
+
+ /* xr22, xr23: qmul */
+ asm volatile("vld $vr22, %0" : : "m" (qmul[0]));
+ asm volatile("xvreplve0.q $xr22, $xr22");
+ asm volatile("vld $vr23, %0" : : "m" (qmul[16]));
+ asm volatile("xvreplve0.q $xr23, $xr23");
+
+ while (bytes) {
+ /* xr0, xr1: P + Dx */
+ asm volatile("xvld $xr0, %0" : : "m" (p[0]));
+ asm volatile("xvld $xr1, %0" : : "m" (p[32]));
+ /* xr2, xr3: Qx */
+ asm volatile("xvld $xr2, %0" : : "m" (dq[0]));
+ asm volatile("xvld $xr3, %0" : : "m" (dq[32]));
+ /* xr2, xr3: Q + Qx */
+ asm volatile("xvld $xr4, %0" : : "m" (q[0]));
+ asm volatile("xvld $xr5, %0" : : "m" (q[32]));
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr4, xr5: higher 4 bits of each byte of (Q + Qx) */
+ asm volatile("xvsrli.b $xr4, $xr2, 4");
+ asm volatile("xvsrli.b $xr5, $xr3, 4");
+ /* xr2, xr3: lower 4 bits of each byte of (Q + Qx) */
+ asm volatile("xvandi.b $xr2, $xr2, 0x0f");
+ asm volatile("xvandi.b $xr3, $xr3, 0x0f");
+ /* lookup from qmul[0] */
+ asm volatile("xvshuf.b $xr2, $xr22, $xr22, $xr2");
+ asm volatile("xvshuf.b $xr3, $xr22, $xr22, $xr3");
+ /* lookup from qmul[16] */
+ asm volatile("xvshuf.b $xr4, $xr23, $xr23, $xr4");
+ asm volatile("xvshuf.b $xr5, $xr23, $xr23, $xr5");
+ /* xr2, xr3: qmul(Q + Qx) = Dx */
+ asm volatile("xvxor.v $xr2, $xr2, $xr4");
+ asm volatile("xvxor.v $xr3, $xr3, $xr5");
+
+ /* xr0, xr1: P + Dx + Dx = P */
+ asm volatile("xvxor.v $xr0, $xr0, $xr2");
+ asm volatile("xvxor.v $xr1, $xr1, $xr3");
+
+ asm volatile("xvst $xr2, %0" : "=m" (dq[0]));
+ asm volatile("xvst $xr3, %0" : "=m" (dq[32]));
+ asm volatile("xvst $xr0, %0" : "=m" (p[0]));
+ asm volatile("xvst $xr1, %0" : "=m" (p[32]));
+
+ bytes -= 64;
+ p += 64;
+ q += 64;
+ dq += 64;
+ }
+
+ kernel_fpu_end();
+}
+
+const struct raid6_recov_calls raid6_recov_lasx = {
+ .data2 = raid6_2data_recov_lasx,
+ .datap = raid6_datap_recov_lasx,
+ .valid = raid6_has_lasx,
+ .name = "lasx",
+ .priority = 2,
+};
+#endif /* CONFIG_CPU_HAS_LASX */
gcc -c -x c - >/dev/null && rm ./-.o && echo yes)
endif
+ifeq ($(ARCH),loongarch64)
+ CFLAGS += -I../../../arch/loongarch/include -DCONFIG_LOONGARCH=1
+ CFLAGS += $(shell echo 'vld $$vr0, $$zero, 0' | \
+ gcc -c -x assembler - >/dev/null 2>&1 && \
+ rm ./-.o && echo -DCONFIG_CPU_HAS_LSX=1)
+ CFLAGS += $(shell echo 'xvld $$xr0, $$zero, 0' | \
+ gcc -c -x assembler - >/dev/null 2>&1 && \
+ rm ./-.o && echo -DCONFIG_CPU_HAS_LASX=1)
+endif
+
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o avx512.o recov_avx512.o
CFLAGS += -DCONFIG_X86
CFLAGS += -DCONFIG_ALTIVEC
OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \
vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o
+else ifeq ($(ARCH),loongarch64)
+ OBJS += loongarch_simd.o recov_loongarch_simd.o
endif
.c.o:
* bdi.wb->list_lock (zap_pte_range->set_page_dirty)
* ->inode->i_lock (zap_pte_range->set_page_dirty)
* ->private_lock (zap_pte_range->block_dirty_folio)
- *
- * ->i_mmap_rwsem
- * ->tasklist_lock (memory_failure, collect_procs_ao)
*/
static void page_cache_delete(struct address_space *mapping,
return 0;
}
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr,
unsigned long end)
{
if (!p)
return -ENOMEM;
} else {
- pud_populate(&init_mm, pud,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pmd_init(p);
+ pud_populate(&init_mm, pud, p);
}
}
zero_pmd_populate(pud, addr, next);
return 0;
}
+void __weak __meminit pud_init(void *addr)
+{
+}
+
static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr,
unsigned long end)
{
if (!p)
return -ENOMEM;
} else {
- p4d_populate(&init_mm, p4d,
- early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pud_init(p);
+ p4d_populate(&init_mm, p4d, p);
}
}
zero_pud_populate(p4d, addr, next);
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+#ifndef __HAVE_ARCH_SHADOW_MAP
static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
{
return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
<< KASAN_SHADOW_SCALE_SHIFT);
}
+#endif
static __always_inline bool addr_has_metadata(const void *addr)
{
+#ifdef __HAVE_ARCH_SHADOW_MAP
+ return (kasan_mem_to_shadow((void *)addr) != NULL);
+#else
return (kasan_reset_tag(addr) >=
kasan_shadow_to_mem((void *)KASAN_SHADOW_START));
+#endif
}
/**
*/
static unsigned long kfence_init_pool(void)
{
- unsigned long addr = (unsigned long)__kfence_pool;
+ unsigned long addr;
struct page *pages;
int i;
if (!arch_kfence_init_pool())
- return addr;
+ return (unsigned long)__kfence_pool;
+ addr = (unsigned long)__kfence_pool;
pages = virt_to_page(__kfence_pool);
/*
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_online_page(pfn);
+ if (!(pfn & 63))
+ cond_resched();
+
if (!page)
continue;
if (page_count(page) == 0)
continue;
scan_block(page, page + 1, NULL);
- if (!(pfn & 63))
- cond_resched();
}
}
put_online_mems();
struct anon_vma *av = rmap_item->anon_vma;
anon_vma_lock_read(av);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
unsigned long addr;
}
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
}
INIT_LIST_HEAD(&memcg->deferred_split_queue.split_queue);
memcg->deferred_split_queue.split_queue_len = 0;
#endif
- idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
lru_gen_init_memcg(memcg);
return memcg;
fail:
if (alloc_shrinker_info(memcg))
goto offline_kmem;
- /* Online state pins memcg ID, memcg ID pins CSS */
- refcount_set(&memcg->id.ref, 1);
- css_get(css);
-
if (unlikely(mem_cgroup_is_root(memcg)))
queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
FLUSH_TIME);
lru_gen_online_memcg(memcg);
+
+ /* Online state pins memcg ID, memcg ID pins CSS */
+ refcount_set(&memcg->id.ref, 1);
+ css_get(css);
+
+ /*
+ * Ensure mem_cgroup_from_id() works once we're fully online.
+ *
+ * We could do this earlier and require callers to filter with
+ * css_tryget_online(). But right now there are no users that
+ * need earlier access, and the workingset code relies on the
+ * cgroup tree linkage (mem_cgroup_get_nr_swap_pages()). So
+ * publish it here at the end of onlining. This matches the
+ * regular ID destruction during offlining.
+ */
+ idr_replace(&mem_cgroup_idr, memcg, memcg->id.id);
+
return 0;
offline_kmem:
memcg_offline_kmem(memcg);
return -EINVAL;
if (!(flags & (MFD_EXEC | MFD_NOEXEC_SEAL))) {
- pr_info_ratelimited(
+ pr_warn_once(
"%s[%d]: memfd_create() called without MFD_EXEC or MFD_NOEXEC_SEAL set\n",
current->comm, task_pid_nr(current));
}
* on behalf of the thread group. Return task_struct of the (first found)
* dedicated thread if found, and return NULL otherwise.
*
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
*/
static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
{
return;
pgoff = page_to_pgoff(page);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
pgoff_t pgoff;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
pgoff = page_to_pgoff(page);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, force_early);
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
struct task_struct *tsk;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);
add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */
collect_procs_file(page, tokill, force_early);
}
-struct hwp_walk {
+struct hwpoison_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
return 0;
}
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
int ret = 0;
pte_t *ptep, *mapped_pte;
spinlock_t *ptl;
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
#define hwpoison_hugetlb_range NULL
#endif
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
int flags)
{
int ret;
- struct hwp_walk priv = {
+ struct hwpoison_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;
return -EFAULT;
mmap_read_lock(p->mm);
- ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
- if (PageReserved(p) || PageSlab(p) || PageTable(p))
+ if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
return true;
if (!(PageLRU(hpage) || PageHuge(p)))
return true;
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ if (folio_test_slab(folio) || PageTable(&folio->page) ||
+ folio_test_reserved(folio) || PageOffline(&folio->page))
goto unlock_mutex;
/*
do {
page = NULL;
spin_lock_irqsave(&zone->lock, flags);
- /*
- * order-0 request can reach here when the pcplist is skipped
- * due to non-CMA allocation context. HIGHATOMIC area is
- * reserved for high-order atomic allocation, so order-0
- * request should skip it.
- */
if (alloc_flags & ALLOC_HIGHATOMIC)
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
if (likely(pcp_allowed_order(order))) {
- /*
- * MIGRATE_MOVABLE pcplist could have the pages on CMA area and
- * we need to skip it when CMA area isn't allowed.
- */
- if (!IS_ENABLED(CONFIG_CMA) || alloc_flags & ALLOC_CMA ||
- migratetype != MIGRATE_MOVABLE) {
- page = rmqueue_pcplist(preferred_zone, zone, order,
- migratetype, alloc_flags);
- if (likely(page))
- goto out;
- }
+ page = rmqueue_pcplist(preferred_zone, zone, order,
+ migratetype, alloc_flags);
+ if (likely(page))
+ goto out;
}
page = rmqueue_buddy(preferred_zone, zone, order, alloc_flags,
if (vmalloc_dump_obj(object))
return;
- if (virt_addr_valid(object))
+ if (is_vmalloc_addr(object))
+ type = "vmalloc memory";
+ else if (virt_addr_valid(object))
type = "non-slab/vmalloc memory";
else if (object == NULL)
type = "NULL pointer";
#ifdef CONFIG_PRINTK
bool vmalloc_dump_obj(void *object)
{
- struct vm_struct *vm;
void *objp = (void *)PAGE_ALIGN((unsigned long)object);
+ const void *caller;
+ struct vm_struct *vm;
+ struct vmap_area *va;
+ unsigned long addr;
+ unsigned int nr_pages;
- vm = find_vm_area(objp);
- if (!vm)
+ if (!spin_trylock(&vmap_area_lock))
+ return false;
+ va = __find_vmap_area((unsigned long)objp, &vmap_area_root);
+ if (!va) {
+ spin_unlock(&vmap_area_lock);
return false;
+ }
+
+ vm = va->vm;
+ if (!vm) {
+ spin_unlock(&vmap_area_lock);
+ return false;
+ }
+ addr = (unsigned long)vm->addr;
+ caller = vm->caller;
+ nr_pages = vm->nr_pages;
+ spin_unlock(&vmap_area_lock);
pr_cont(" %u-page vmalloc region starting at %#lx allocated at %pS\n",
- vm->nr_pages, (unsigned long)vm->addr, vm->caller);
+ nr_pages, addr, caller);
return true;
}
#endif
int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
{
+ asm volatile ("");
return (long)arg;
}
struct sock_exterr_skb *serr;
struct sk_buff *skb;
char *state = "UNK";
+ u32 tsflags;
int err;
jsk = j1939_sk(sk);
if (!(jsk->state & J1939_SOCK_ERRQUEUE))
return;
+ tsflags = READ_ONCE(sk->sk_tsflags);
switch (type) {
case J1939_ERRQUEUE_TX_ACK:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_ACK))
return;
break;
case J1939_ERRQUEUE_TX_SCHED:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED))
+ if (!(tsflags & SOF_TIMESTAMPING_TX_SCHED))
return;
break;
case J1939_ERRQUEUE_TX_ABORT:
case J1939_ERRQUEUE_RX_DPO:
fallthrough;
case J1939_ERRQUEUE_RX_ABORT:
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
+ if (!(tsflags & SOF_TIMESTAMPING_RX_SOFTWARE))
return;
break;
default:
}
serr->opt_stats = true;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ if (tsflags & SOF_TIMESTAMPING_OPT_ID)
serr->ee.ee_data = session->tskey;
netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n",
return true;
}
+static void ceph_msg_data_iter_cursor_init(struct ceph_msg_data_cursor *cursor,
+ size_t length)
+{
+ struct ceph_msg_data *data = cursor->data;
+
+ cursor->iov_iter = data->iter;
+ cursor->lastlen = 0;
+ iov_iter_truncate(&cursor->iov_iter, length);
+ cursor->resid = iov_iter_count(&cursor->iov_iter);
+}
+
+static struct page *ceph_msg_data_iter_next(struct ceph_msg_data_cursor *cursor,
+ size_t *page_offset, size_t *length)
+{
+ struct page *page;
+ ssize_t len;
+
+ if (cursor->lastlen)
+ iov_iter_revert(&cursor->iov_iter, cursor->lastlen);
+
+ len = iov_iter_get_pages2(&cursor->iov_iter, &page, PAGE_SIZE,
+ 1, page_offset);
+ BUG_ON(len < 0);
+
+ cursor->lastlen = len;
+
+ /*
+ * FIXME: The assumption is that the pages represented by the iov_iter
+ * are pinned, with the references held by the upper-level
+ * callers, or by virtue of being under writeback. Eventually,
+ * we'll get an iov_iter_get_pages2 variant that doesn't take
+ * page refs. Until then, just put the page ref.
+ */
+ VM_BUG_ON_PAGE(!PageWriteback(page) && page_count(page) < 2, page);
+ put_page(page);
+
+ *length = min_t(size_t, len, cursor->resid);
+ return page;
+}
+
+static bool ceph_msg_data_iter_advance(struct ceph_msg_data_cursor *cursor,
+ size_t bytes)
+{
+ BUG_ON(bytes > cursor->resid);
+ cursor->resid -= bytes;
+
+ if (bytes < cursor->lastlen) {
+ cursor->lastlen -= bytes;
+ } else {
+ iov_iter_advance(&cursor->iov_iter, bytes - cursor->lastlen);
+ cursor->lastlen = 0;
+ }
+
+ return cursor->resid;
+}
+
/*
* Message data is handled (sent or received) in pieces, where each
* piece resides on a single page. The network layer might not
case CEPH_MSG_DATA_BVECS:
ceph_msg_data_bvecs_cursor_init(cursor, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ ceph_msg_data_iter_cursor_init(cursor, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
/* BUG(); */
cursor->total_resid = length;
cursor->data = msg->data;
+ cursor->sr_resid = 0;
__ceph_msg_data_cursor_init(cursor);
}
case CEPH_MSG_DATA_BVECS:
page = ceph_msg_data_bvecs_next(cursor, page_offset, length);
break;
+ case CEPH_MSG_DATA_ITER:
+ page = ceph_msg_data_iter_next(cursor, page_offset, length);
+ break;
case CEPH_MSG_DATA_NONE:
default:
page = NULL;
case CEPH_MSG_DATA_BVECS:
new_piece = ceph_msg_data_bvecs_advance(cursor, bytes);
break;
+ case CEPH_MSG_DATA_ITER:
+ new_piece = ceph_msg_data_iter_advance(cursor, bytes);
+ break;
case CEPH_MSG_DATA_NONE:
default:
BUG();
}
EXPORT_SYMBOL(ceph_msg_data_add_bvecs);
+void ceph_msg_data_add_iter(struct ceph_msg *msg,
+ struct iov_iter *iter)
+{
+ struct ceph_msg_data *data;
+
+ data = ceph_msg_data_add(msg);
+ data->type = CEPH_MSG_DATA_ITER;
+ data->iter = *iter;
+
+ msg->data_length += iov_iter_count(&data->iter);
+}
+
/*
* construct a new message with given type, size
* the new msg has a ref count of 1.
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
- /* Initialize data cursor */
-
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ /* Initialize data cursor if it's not a sparse read */
+ if (!msg->sparse_read)
+ ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
}
/*
prepare_read_tag(con);
}
-static int read_partial_message_section(struct ceph_connection *con,
- struct kvec *section,
- unsigned int sec_len, u32 *crc)
+static int read_partial_message_chunk(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
{
int ret, left;
section->iov_len += ret;
}
if (section->iov_len == sec_len)
- *crc = crc32c(0, section->iov_base, section->iov_len);
+ *crc = crc32c(*crc, section->iov_base, section->iov_len);
return 1;
}
+static inline int read_partial_message_section(struct ceph_connection *con,
+ struct kvec *section,
+ unsigned int sec_len, u32 *crc)
+{
+ *crc = 0;
+ return read_partial_message_chunk(con, section, sec_len, crc);
+}
+
+static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
+
+ if (do_bounce && unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ while (cursor->sr_resid > 0) {
+ struct page *page, *rpage;
+ size_t off, len;
+ int ret;
+
+ page = ceph_msg_data_next(cursor, &off, &len);
+ rpage = do_bounce ? con->bounce_page : page;
+
+ /* clamp to what remains in extent */
+ len = min_t(int, len, cursor->sr_resid);
+ ret = ceph_tcp_recvpage(con->sock, rpage, (int)off, len);
+ if (ret <= 0)
+ return ret;
+ *crc = ceph_crc32c_page(*crc, rpage, off, ret);
+ ceph_msg_data_advance(cursor, (size_t)ret);
+ cursor->sr_resid -= ret;
+ if (do_bounce)
+ memcpy_page(page, off, rpage, off, ret);
+ }
+ return 1;
+}
+
+static int read_sparse_msg_data(struct ceph_connection *con)
+{
+ struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
+ bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
+ u32 crc = 0;
+ int ret = 1;
+
+ if (do_datacrc)
+ crc = con->in_data_crc;
+
+ do {
+ if (con->v1.in_sr_kvec.iov_base)
+ ret = read_partial_message_chunk(con,
+ &con->v1.in_sr_kvec,
+ con->v1.in_sr_len,
+ &crc);
+ else if (cursor->sr_resid > 0)
+ ret = read_sparse_msg_extent(con, &crc);
+
+ if (ret <= 0) {
+ if (do_datacrc)
+ con->in_data_crc = crc;
+ return ret;
+ }
+
+ memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
+ ret = con->ops->sparse_read(con, cursor,
+ (char **)&con->v1.in_sr_kvec.iov_base);
+ con->v1.in_sr_len = ret;
+ } while (ret > 0);
+
+ if (do_datacrc)
+ con->in_data_crc = crc;
+
+ return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+}
+
static int read_partial_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
if (!m->num_data_items)
return -EIO;
- if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
+ if (m->sparse_read)
+ ret = read_sparse_msg_data(con);
+ else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
ret = read_partial_msg_data(con);
#include <linux/ceph/ceph_debug.h>
#include <crypto/aead.h>
-#include <crypto/algapi.h> /* for crypto_memneq() */
#include <crypto/hash.h>
#include <crypto/sha2.h>
+#include <crypto/utils.h>
#include <linux/bvec.h>
#include <linux/crc32c.h>
#include <linux/net.h>
#define FRAME_LATE_STATUS_COMPLETE 0xe
#define FRAME_LATE_STATUS_ABORTED_MASK 0xf
-#define IN_S_HANDLE_PREAMBLE 1
-#define IN_S_HANDLE_CONTROL 2
-#define IN_S_HANDLE_CONTROL_REMAINDER 3
-#define IN_S_PREPARE_READ_DATA 4
-#define IN_S_PREPARE_READ_DATA_CONT 5
-#define IN_S_PREPARE_READ_ENC_PAGE 6
-#define IN_S_HANDLE_EPILOGUE 7
-#define IN_S_FINISH_SKIP 8
+#define IN_S_HANDLE_PREAMBLE 1
+#define IN_S_HANDLE_CONTROL 2
+#define IN_S_HANDLE_CONTROL_REMAINDER 3
+#define IN_S_PREPARE_READ_DATA 4
+#define IN_S_PREPARE_READ_DATA_CONT 5
+#define IN_S_PREPARE_READ_ENC_PAGE 6
+#define IN_S_PREPARE_SPARSE_DATA 7
+#define IN_S_PREPARE_SPARSE_DATA_CONT 8
+#define IN_S_HANDLE_EPILOGUE 9
+#define IN_S_FINISH_SKIP 10
#define OUT_S_QUEUE_DATA 1
#define OUT_S_QUEUE_DATA_CONT 2
}
}
+/**
+ * init_sgs_pages: set up scatterlist on an array of page pointers
+ * @sg: scatterlist to populate
+ * @pages: pointer to page array
+ * @dpos: position in the array to start (bytes)
+ * @dlen: len to add to sg (bytes)
+ * @pad: pointer to pad destination (if any)
+ *
+ * Populate the scatterlist from the page array, starting at an arbitrary
+ * byte in the array and running for a specified length.
+ */
+static void init_sgs_pages(struct scatterlist **sg, struct page **pages,
+ int dpos, int dlen, u8 *pad)
+{
+ int idx = dpos >> PAGE_SHIFT;
+ int off = offset_in_page(dpos);
+ int resid = dlen;
+
+ do {
+ int len = min(resid, (int)PAGE_SIZE - off);
+
+ sg_set_page(*sg, pages[idx], len, off);
+ *sg = sg_next(*sg);
+ off = 0;
+ ++idx;
+ resid -= len;
+ } while (resid);
+
+ if (need_padding(dlen)) {
+ sg_set_buf(*sg, pad, padding_len(dlen));
+ *sg = sg_next(*sg);
+ }
+}
+
static int setup_message_sgs(struct sg_table *sgt, struct ceph_msg *msg,
u8 *front_pad, u8 *middle_pad, u8 *data_pad,
- void *epilogue, bool add_tag)
+ void *epilogue, struct page **pages, int dpos,
+ bool add_tag)
{
struct ceph_msg_data_cursor cursor;
struct scatterlist *cur_sg;
+ int dlen = data_len(msg);
int sg_cnt;
int ret;
if (middle_len(msg))
sg_cnt += calc_sg_cnt(msg->middle->vec.iov_base,
middle_len(msg));
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- sg_cnt += calc_sg_cnt_cursor(&cursor);
+ if (dlen) {
+ if (pages) {
+ sg_cnt += calc_pages_for(dpos, dlen);
+ if (need_padding(dlen))
+ sg_cnt++;
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ sg_cnt += calc_sg_cnt_cursor(&cursor);
+ }
}
ret = sg_alloc_table(sgt, sg_cnt, GFP_NOIO);
if (middle_len(msg))
init_sgs(&cur_sg, msg->middle->vec.iov_base, middle_len(msg),
middle_pad);
- if (data_len(msg)) {
- ceph_msg_data_cursor_init(&cursor, msg, data_len(msg));
- init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ if (dlen) {
+ if (pages) {
+ init_sgs_pages(&cur_sg, pages, dpos, dlen, data_pad);
+ } else {
+ ceph_msg_data_cursor_init(&cursor, msg, dlen);
+ init_sgs_cursor(&cur_sg, &cursor, data_pad);
+ }
}
WARN_ON(!sg_is_last(cur_sg));
padded_len(rem_len) + CEPH_GCM_TAG_LEN);
}
+/* Process sparse read data that lives in a buffer */
+static int process_v2_sparse_read(struct ceph_connection *con,
+ struct page **pages, int spos)
+{
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+ int ret;
+
+ for (;;) {
+ char *buf = NULL;
+
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0)
+ return ret;
+
+ dout("%s: sparse_read return %x buf %p\n", __func__, ret, buf);
+
+ do {
+ int idx = spos >> PAGE_SHIFT;
+ int soff = offset_in_page(spos);
+ struct page *spage = con->v2.in_enc_pages[idx];
+ int len = min_t(int, ret, PAGE_SIZE - soff);
+
+ if (buf) {
+ memcpy_from_page(buf, spage, soff, len);
+ buf += len;
+ } else {
+ struct bio_vec bv;
+
+ get_bvec_at(cursor, &bv);
+ len = min_t(int, len, bv.bv_len);
+ memcpy_page(bv.bv_page, bv.bv_offset,
+ spage, soff, len);
+ ceph_msg_data_advance(cursor, len);
+ }
+ spos += len;
+ ret -= len;
+ } while (ret);
+ }
+}
+
static int decrypt_tail(struct ceph_connection *con)
{
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
+ struct page **pages = NULL;
+ bool sparse = con->in_msg->sparse_read;
+ int dpos = 0;
int tail_len;
int ret;
if (ret)
goto out;
+ if (sparse) {
+ dpos = padded_len(front_len(con->in_msg) + padded_len(middle_len(con->in_msg)));
+ pages = con->v2.in_enc_pages;
+ }
+
ret = setup_message_sgs(&sgt, con->in_msg, FRONT_PAD(con->v2.in_buf),
- MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
- con->v2.in_buf, true);
+ MIDDLE_PAD(con->v2.in_buf), DATA_PAD(con->v2.in_buf),
+ con->v2.in_buf, pages, dpos, true);
if (ret)
goto out;
if (ret)
goto out;
+ if (sparse && data_len(con->in_msg)) {
+ ret = process_v2_sparse_read(con, con->v2.in_enc_pages, dpos);
+ if (ret)
+ goto out;
+ }
+
WARN_ON(!con->v2.in_enc_page_cnt);
ceph_release_page_vector(con->v2.in_enc_pages,
con->v2.in_enc_page_cnt);
encode_epilogue_secure(con, false);
ret = setup_message_sgs(&sgt, con->out_msg, zerop, zerop, zerop,
- &con->v2.out_epil, false);
+ &con->v2.out_epil, NULL, 0, false);
if (ret)
goto out;
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
}
+static int prepare_sparse_read_cont(struct ceph_connection *con)
+{
+ int ret;
+ struct bio_vec bv;
+ char *buf = NULL;
+ struct ceph_msg_data_cursor *cursor = &con->v2.in_cursor;
+
+ WARN_ON(con->v2.in_state != IN_S_PREPARE_SPARSE_DATA_CONT);
+
+ if (iov_iter_is_bvec(&con->v2.in_iter)) {
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ con->in_data_crc = crc32c(con->in_data_crc,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ get_bvec_at(cursor, &bv);
+ memcpy_to_page(bv.bv_page, bv.bv_offset,
+ page_address(con->bounce_page),
+ con->v2.in_bvec.bv_len);
+ } else {
+ con->in_data_crc = ceph_crc32c_page(con->in_data_crc,
+ con->v2.in_bvec.bv_page,
+ con->v2.in_bvec.bv_offset,
+ con->v2.in_bvec.bv_len);
+ }
+
+ ceph_msg_data_advance(cursor, con->v2.in_bvec.bv_len);
+ cursor->sr_resid -= con->v2.in_bvec.bv_len;
+ dout("%s: advance by 0x%x sr_resid 0x%x\n", __func__,
+ con->v2.in_bvec.bv_len, cursor->sr_resid);
+ WARN_ON_ONCE(cursor->sr_resid > cursor->total_resid);
+ if (cursor->sr_resid) {
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= bv.bv_len;
+ return 0;
+ }
+ } else if (iov_iter_is_kvec(&con->v2.in_iter)) {
+ /* On first call, we have no kvec so don't compute crc */
+ if (con->v2.in_kvec_cnt) {
+ WARN_ON_ONCE(con->v2.in_kvec_cnt > 1);
+ con->in_data_crc = crc32c(con->in_data_crc,
+ con->v2.in_kvecs[0].iov_base,
+ con->v2.in_kvecs[0].iov_len);
+ }
+ } else {
+ return -EIO;
+ }
+
+ /* get next extent */
+ ret = con->ops->sparse_read(con, cursor, &buf);
+ if (ret <= 0) {
+ if (ret < 0)
+ return ret;
+
+ reset_in_kvecs(con);
+ add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
+ con->v2.in_state = IN_S_HANDLE_EPILOGUE;
+ return 0;
+ }
+
+ if (buf) {
+ /* receive into buffer */
+ reset_in_kvecs(con);
+ add_in_kvec(con, buf, ret);
+ con->v2.data_len_remain -= ret;
+ return 0;
+ }
+
+ if (ret > cursor->total_resid) {
+ pr_warn("%s: ret 0x%x total_resid 0x%zx resid 0x%zx\n",
+ __func__, ret, cursor->total_resid, cursor->resid);
+ return -EIO;
+ }
+ get_bvec_at(cursor, &bv);
+ if (bv.bv_len > cursor->sr_resid)
+ bv.bv_len = cursor->sr_resid;
+ if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) {
+ if (unlikely(!con->bounce_page)) {
+ con->bounce_page = alloc_page(GFP_NOIO);
+ if (!con->bounce_page) {
+ pr_err("failed to allocate bounce page\n");
+ return -ENOMEM;
+ }
+ }
+
+ bv.bv_page = con->bounce_page;
+ bv.bv_offset = 0;
+ }
+ set_in_bvec(con, &bv);
+ con->v2.data_len_remain -= ret;
+ return ret;
+}
+
+static int prepare_sparse_read_data(struct ceph_connection *con)
+{
+ struct ceph_msg *msg = con->in_msg;
+
+ dout("%s: starting sparse read\n", __func__);
+
+ if (WARN_ON_ONCE(!con->ops->sparse_read))
+ return -EOPNOTSUPP;
+
+ if (!con_secure(con))
+ con->in_data_crc = -1;
+
+ reset_in_kvecs(con);
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA_CONT;
+ con->v2.data_len_remain = data_len(msg);
+ return prepare_sparse_read_cont(con);
+}
+
static int prepare_read_tail_plain(struct ceph_connection *con)
{
struct ceph_msg *msg = con->in_msg;
}
if (data_len(msg)) {
- con->v2.in_state = IN_S_PREPARE_READ_DATA;
+ if (msg->sparse_read)
+ con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
+ else
+ con->v2.in_state = IN_S_PREPARE_READ_DATA;
} else {
add_in_kvec(con, con->v2.in_buf, CEPH_EPILOGUE_PLAIN_LEN);
con->v2.in_state = IN_S_HANDLE_EPILOGUE;
prepare_read_enc_page(con);
ret = 0;
break;
+ case IN_S_PREPARE_SPARSE_DATA:
+ ret = prepare_sparse_read_data(con);
+ break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ ret = prepare_sparse_read_cont(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
ret = handle_epilogue(con);
break;
con->v2.in_state = IN_S_FINISH_SKIP;
}
+static void revoke_at_prepare_sparse_data(struct ceph_connection *con)
+{
+ int resid; /* current piece of data */
+ int remaining;
+
+ WARN_ON(con_secure(con));
+ WARN_ON(!data_len(con->in_msg));
+ WARN_ON(!iov_iter_is_bvec(&con->v2.in_iter));
+ resid = iov_iter_count(&con->v2.in_iter);
+ dout("%s con %p resid %d\n", __func__, con, resid);
+
+ remaining = CEPH_EPILOGUE_PLAIN_LEN + con->v2.data_len_remain;
+ con->v2.in_iter.count -= resid;
+ set_in_skip(con, resid + remaining);
+ con->v2.in_state = IN_S_FINISH_SKIP;
+}
+
static void revoke_at_handle_epilogue(struct ceph_connection *con)
{
int resid;
void ceph_con_v2_revoke_incoming(struct ceph_connection *con)
{
switch (con->v2.in_state) {
+ case IN_S_PREPARE_SPARSE_DATA:
case IN_S_PREPARE_READ_DATA:
revoke_at_prepare_read_data(con);
break;
case IN_S_PREPARE_READ_ENC_PAGE:
revoke_at_prepare_read_enc_page(con);
break;
+ case IN_S_PREPARE_SPARSE_DATA_CONT:
+ revoke_at_prepare_sparse_data(con);
+ break;
case IN_S_HANDLE_EPILOGUE:
revoke_at_handle_epilogue(con);
break;
osd_data->num_bvecs = num_bvecs;
}
+static void ceph_osd_iter_init(struct ceph_osd_data *osd_data,
+ struct iov_iter *iter)
+{
+ osd_data->type = CEPH_OSD_DATA_TYPE_ITER;
+ osd_data->iter = *iter;
+}
+
static struct ceph_osd_data *
osd_req_op_raw_data_in(struct ceph_osd_request *osd_req, unsigned int which)
{
}
EXPORT_SYMBOL(osd_req_op_extent_osd_data_bvec_pos);
+/**
+ * osd_req_op_extent_osd_iter - Set up an operation with an iterator buffer
+ * @osd_req: The request to set up
+ * @which: Index of the operation in which to set the iter
+ * @iter: The buffer iterator
+ */
+void osd_req_op_extent_osd_iter(struct ceph_osd_request *osd_req,
+ unsigned int which, struct iov_iter *iter)
+{
+ struct ceph_osd_data *osd_data;
+
+ osd_data = osd_req_op_data(osd_req, which, extent, osd_data);
+ ceph_osd_iter_init(osd_data, iter);
+}
+EXPORT_SYMBOL(osd_req_op_extent_osd_iter);
+
static void osd_req_op_cls_request_info_pagelist(
struct ceph_osd_request *osd_req,
unsigned int which, struct ceph_pagelist *pagelist)
#endif /* CONFIG_BLOCK */
case CEPH_OSD_DATA_TYPE_BVECS:
return osd_data->bvec_pos.iter.bi_size;
+ case CEPH_OSD_DATA_TYPE_ITER:
+ return iov_iter_count(&osd_data->iter);
default:
WARN(true, "unrecognized data type %d\n", (int)osd_data->type);
return 0;
switch (op->op) {
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
+ kfree(op->extent.sparse_ext);
ceph_osd_data_release(&op->extent.osd_data);
break;
case CEPH_OSD_OP_CALL:
/* reply */
case CEPH_OSD_OP_STAT:
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_LIST_WATCHERS:
*num_reply_data_items += 1;
break;
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_WRITEFULL && opcode != CEPH_OSD_OP_ZERO &&
- opcode != CEPH_OSD_OP_TRUNCATE);
+ opcode != CEPH_OSD_OP_TRUNCATE && opcode != CEPH_OSD_OP_SPARSE_READ);
op->extent.offset = offset;
op->extent.length = length;
#endif
} else if (osd_data->type == CEPH_OSD_DATA_TYPE_BVECS) {
ceph_msg_data_add_bvecs(msg, &osd_data->bvec_pos);
+ } else if (osd_data->type == CEPH_OSD_DATA_TYPE_ITER) {
+ ceph_msg_data_add_iter(msg, &osd_data->iter);
} else {
BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_NONE);
}
case CEPH_OSD_OP_STAT:
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
case CEPH_OSD_OP_WRITE:
case CEPH_OSD_OP_WRITEFULL:
case CEPH_OSD_OP_ZERO:
dst->copy_from.src_fadvise_flags =
cpu_to_le32(src->copy_from.src_fadvise_flags);
break;
+ case CEPH_OSD_OP_ASSERT_VER:
+ dst->assert_ver.unused = cpu_to_le64(0);
+ dst->assert_ver.ver = cpu_to_le64(src->assert_ver.ver);
+ break;
default:
pr_err("unsupported osd opcode %s\n",
ceph_osd_op_name(src->op));
BUG_ON(opcode != CEPH_OSD_OP_READ && opcode != CEPH_OSD_OP_WRITE &&
opcode != CEPH_OSD_OP_ZERO && opcode != CEPH_OSD_OP_TRUNCATE &&
- opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE);
+ opcode != CEPH_OSD_OP_CREATE && opcode != CEPH_OSD_OP_DELETE &&
+ opcode != CEPH_OSD_OP_SPARSE_READ);
req = ceph_osdc_alloc_request(osdc, snapc, num_ops, use_mempool,
GFP_NOFS);
if (flags & CEPH_OSD_FLAG_WRITE)
req->r_data_offset = off;
- if (num_ops > 1)
+ if (num_ops > 1) {
+ int num_req_ops, num_rep_ops;
+
/*
- * This is a special case for ceph_writepages_start(), but it
- * also covers ceph_uninline_data(). If more multi-op request
- * use cases emerge, we will need a separate helper.
+ * If this is a multi-op write request, assume that we'll need
+ * request ops. If it's a multi-op read then assume we'll need
+ * reply ops. Anything else and call it -EINVAL.
*/
- r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_ops, 0);
- else
+ if (flags & CEPH_OSD_FLAG_WRITE) {
+ num_req_ops = num_ops;
+ num_rep_ops = 0;
+ } else if (flags & CEPH_OSD_FLAG_READ) {
+ num_req_ops = 0;
+ num_rep_ops = num_ops;
+ } else {
+ r = -EINVAL;
+ goto fail;
+ }
+
+ r = __ceph_osdc_alloc_messages(req, GFP_NOFS, num_req_ops,
+ num_rep_ops);
+ } else {
r = ceph_osdc_alloc_messages(req, GFP_NOFS);
+ }
if (r)
goto fail;
}
EXPORT_SYMBOL(ceph_osdc_new_request);
+int __ceph_alloc_sparse_ext_map(struct ceph_osd_req_op *op, int cnt)
+{
+ op->extent.sparse_ext_cnt = cnt;
+ op->extent.sparse_ext = kmalloc_array(cnt,
+ sizeof(*op->extent.sparse_ext),
+ GFP_NOFS);
+ if (!op->extent.sparse_ext)
+ return -ENOMEM;
+ return 0;
+}
+EXPORT_SYMBOL(__ceph_alloc_sparse_ext_map);
+
/*
* We keep osd requests in an rbtree, sorted by ->r_tid.
*/
{
refcount_set(&osd->o_ref, 1);
RB_CLEAR_NODE(&osd->o_node);
+ spin_lock_init(&osd->o_requests_lock);
osd->o_requests = RB_ROOT;
osd->o_linger_requests = RB_ROOT;
osd->o_backoff_mappings = RB_ROOT;
mutex_init(&osd->lock);
}
+static void ceph_init_sparse_read(struct ceph_sparse_read *sr)
+{
+ kfree(sr->sr_extent);
+ memset(sr, '\0', sizeof(*sr));
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+}
+
static void osd_cleanup(struct ceph_osd *osd)
{
WARN_ON(!RB_EMPTY_NODE(&osd->o_node));
WARN_ON(!list_empty(&osd->o_osd_lru));
WARN_ON(!list_empty(&osd->o_keepalive_item));
+ ceph_init_sparse_read(&osd->o_sparse_read);
+
if (osd->o_auth.authorizer) {
WARN_ON(osd_homeless(osd));
ceph_auth_destroy_authorizer(osd->o_auth.authorizer);
osd_init(osd);
osd->o_osdc = osdc;
osd->o_osd = onum;
+ osd->o_sparse_op_idx = -1;
+
+ ceph_init_sparse_read(&osd->o_sparse_read);
ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
atomic_inc(&osd->o_osdc->num_homeless);
get_osd(osd);
+ spin_lock(&osd->o_requests_lock);
insert_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
req->r_osd = osd;
}
req, req->r_tid);
req->r_osd = NULL;
+ spin_lock(&osd->o_requests_lock);
erase_request(&osd->o_requests, req);
+ spin_unlock(&osd->o_requests_lock);
put_osd(osd);
if (!osd_homeless(osd))
&op->raw_data_in);
break;
case CEPH_OSD_OP_READ:
+ case CEPH_OSD_OP_SPARSE_READ:
ceph_osdc_msg_data_add(reply_msg,
&op->extent.osd_data);
break;
req->r_end_latency = ktime_get();
- if (req->r_osd)
+ if (req->r_osd) {
+ ceph_init_sparse_read(&req->r_osd->o_sparse_read);
unlink_request(req->r_osd, req);
+ }
atomic_dec(&osdc->num_requests);
/*
* one (type of) reply back.
*/
WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+ req->r_version = m.user_version;
req->r_result = m.result ?: data_len;
finish_request(req);
mutex_unlock(&osd->lock);
ceph_msg_put(msg);
}
+/* How much sparse data was requested? */
+static u64 sparse_data_requested(struct ceph_osd_request *req)
+{
+ u64 len = 0;
+
+ if (req->r_flags & CEPH_OSD_FLAG_READ) {
+ int i;
+
+ for (i = 0; i < req->r_num_ops; ++i) {
+ struct ceph_osd_req_op *op = &req->r_ops[i];
+
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ len += op->extent.length;
+ }
+ }
+ return len;
+}
+
/*
* Lookup and return message for incoming reply. Don't try to do
* anything about a larger than preallocated data portion of the
int front_len = le32_to_cpu(hdr->front_len);
int data_len = le32_to_cpu(hdr->data_len);
u64 tid = le64_to_cpu(hdr->tid);
+ u64 srlen;
down_read(&osdc->lock);
if (!osd_registered(osd)) {
req->r_reply = m;
}
- if (data_len > req->r_reply->data_length) {
+ srlen = sparse_data_requested(req);
+ if (!srlen && data_len > req->r_reply->data_length) {
pr_warn("%s osd%d tid %llu data %d > preallocated %zu, skipping\n",
__func__, osd->o_osd, req->r_tid, data_len,
req->r_reply->data_length);
}
m = ceph_msg_get(req->r_reply);
+ m->sparse_read = (bool)srlen;
+
dout("get_reply tid %lld %p\n", tid, m);
out_unlock_session:
return ceph_auth_check_message_signature(auth, msg);
}
+static void advance_cursor(struct ceph_msg_data_cursor *cursor, size_t len,
+ bool zero)
+{
+ while (len) {
+ struct page *page;
+ size_t poff, plen;
+
+ page = ceph_msg_data_next(cursor, &poff, &plen);
+ if (plen > len)
+ plen = len;
+ if (zero)
+ zero_user_segment(page, poff, poff + plen);
+ len -= plen;
+ ceph_msg_data_advance(cursor, plen);
+ }
+}
+
+static int prep_next_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ struct ceph_osd_request *req;
+ struct ceph_osd_req_op *op;
+
+ spin_lock(&o->o_requests_lock);
+ req = lookup_request(&o->o_requests, le64_to_cpu(con->in_msg->hdr.tid));
+ if (!req) {
+ spin_unlock(&o->o_requests_lock);
+ return -EBADR;
+ }
+
+ if (o->o_sparse_op_idx < 0) {
+ u64 srlen = sparse_data_requested(req);
+
+ dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
+ __func__, o->o_osd, srlen);
+ ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ } else {
+ u64 end;
+
+ op = &req->r_ops[o->o_sparse_op_idx];
+
+ WARN_ON_ONCE(op->extent.sparse_ext);
+
+ /* hand back buffer we took earlier */
+ op->extent.sparse_ext = sr->sr_extent;
+ sr->sr_extent = NULL;
+ op->extent.sparse_ext_cnt = sr->sr_count;
+ sr->sr_ext_len = 0;
+ dout("%s: [%d] completed extent array len %d cursor->resid %zd\n",
+ __func__, o->o_osd, op->extent.sparse_ext_cnt, cursor->resid);
+ /* Advance to end of data for this operation */
+ end = ceph_sparse_ext_map_end(op);
+ if (end < sr->sr_req_len)
+ advance_cursor(cursor, sr->sr_req_len - end, false);
+ }
+
+ ceph_init_sparse_read(sr);
+
+ /* find next op in this request (if any) */
+ while (++o->o_sparse_op_idx < req->r_num_ops) {
+ op = &req->r_ops[o->o_sparse_op_idx];
+ if (op->op == CEPH_OSD_OP_SPARSE_READ)
+ goto found;
+ }
+
+ /* reset for next sparse read request */
+ spin_unlock(&o->o_requests_lock);
+ o->o_sparse_op_idx = -1;
+ return 0;
+found:
+ sr->sr_req_off = op->extent.offset;
+ sr->sr_req_len = op->extent.length;
+ sr->sr_pos = sr->sr_req_off;
+ dout("%s: [%d] new sparse read op at idx %d 0x%llx~0x%llx\n", __func__,
+ o->o_osd, o->o_sparse_op_idx, sr->sr_req_off, sr->sr_req_len);
+
+ /* hand off request's sparse extent map buffer */
+ sr->sr_ext_len = op->extent.sparse_ext_cnt;
+ op->extent.sparse_ext_cnt = 0;
+ sr->sr_extent = op->extent.sparse_ext;
+ op->extent.sparse_ext = NULL;
+
+ spin_unlock(&o->o_requests_lock);
+ return 1;
+}
+
+#ifdef __BIG_ENDIAN
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+ int i;
+
+ for (i = 0; i < sr->sr_count; i++) {
+ struct ceph_sparse_extent *ext = &sr->sr_extent[i];
+
+ ext->off = le64_to_cpu((__force __le64)ext->off);
+ ext->len = le64_to_cpu((__force __le64)ext->len);
+ }
+}
+#else
+static inline void convert_extent_map(struct ceph_sparse_read *sr)
+{
+}
+#endif
+
+#define MAX_EXTENTS 4096
+
+static int osd_sparse_read(struct ceph_connection *con,
+ struct ceph_msg_data_cursor *cursor,
+ char **pbuf)
+{
+ struct ceph_osd *o = con->private;
+ struct ceph_sparse_read *sr = &o->o_sparse_read;
+ u32 count = sr->sr_count;
+ u64 eoff, elen;
+ int ret;
+
+ switch (sr->sr_state) {
+ case CEPH_SPARSE_READ_HDR:
+next_op:
+ ret = prep_next_sparse_read(con, cursor);
+ if (ret <= 0)
+ return ret;
+
+ /* number of extents */
+ ret = sizeof(sr->sr_count);
+ *pbuf = (char *)&sr->sr_count;
+ sr->sr_state = CEPH_SPARSE_READ_EXTENTS;
+ break;
+ case CEPH_SPARSE_READ_EXTENTS:
+ /* Convert sr_count to host-endian */
+ count = le32_to_cpu((__force __le32)sr->sr_count);
+ sr->sr_count = count;
+ dout("[%d] got %u extents\n", o->o_osd, count);
+
+ if (count > 0) {
+ if (!sr->sr_extent || count > sr->sr_ext_len) {
+ /*
+ * Apply a hard cap to the number of extents.
+ * If we have more, assume something is wrong.
+ */
+ if (count > MAX_EXTENTS) {
+ dout("%s: OSD returned 0x%x extents in a single reply!\n",
+ __func__, count);
+ return -EREMOTEIO;
+ }
+
+ /* no extent array provided, or too short */
+ kfree(sr->sr_extent);
+ sr->sr_extent = kmalloc_array(count,
+ sizeof(*sr->sr_extent),
+ GFP_NOIO);
+ if (!sr->sr_extent)
+ return -ENOMEM;
+ sr->sr_ext_len = count;
+ }
+ ret = count * sizeof(*sr->sr_extent);
+ *pbuf = (char *)sr->sr_extent;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_LEN;
+ break;
+ }
+ /* No extents? Read data len */
+ fallthrough;
+ case CEPH_SPARSE_READ_DATA_LEN:
+ convert_extent_map(sr);
+ ret = sizeof(sr->sr_datalen);
+ *pbuf = (char *)&sr->sr_datalen;
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ break;
+ case CEPH_SPARSE_READ_DATA:
+ if (sr->sr_index >= count) {
+ sr->sr_state = CEPH_SPARSE_READ_HDR;
+ goto next_op;
+ }
+
+ eoff = sr->sr_extent[sr->sr_index].off;
+ elen = sr->sr_extent[sr->sr_index].len;
+
+ dout("[%d] ext %d off 0x%llx len 0x%llx\n",
+ o->o_osd, sr->sr_index, eoff, elen);
+
+ if (elen > INT_MAX) {
+ dout("Sparse read extent length too long (0x%llx)\n",
+ elen);
+ return -EREMOTEIO;
+ }
+
+ /* zero out anything from sr_pos to start of extent */
+ if (sr->sr_pos < eoff)
+ advance_cursor(cursor, eoff - sr->sr_pos, true);
+
+ /* Set position to end of extent */
+ sr->sr_pos = eoff + elen;
+
+ /* send back the new length and nullify the ptr */
+ cursor->sr_resid = elen;
+ ret = elen;
+ *pbuf = NULL;
+
+ /* Bump the array index */
+ ++sr->sr_index;
+ break;
+ }
+ return ret;
+}
+
static const struct ceph_connection_operations osd_con_ops = {
.get = osd_get_con,
.put = osd_put_con,
+ .sparse_read = osd_sparse_read,
.alloc_msg = osd_alloc_msg,
.dispatch = osd_dispatch,
.fault = osd_fault,
memset(&keys, 0, sizeof(keys));
__skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric,
- &keys, NULL, 0, 0, 0,
- FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+ &keys, NULL, 0, 0, 0, 0);
return __flow_hash_from_keys(&keys, &hashrnd);
}
bool *pfmemalloc)
{
bool ret_pfmemalloc = false;
- unsigned int obj_size;
+ size_t obj_size;
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
- *size = obj_size = kmalloc_size_roundup(obj_size);
+
+ obj_size = kmalloc_size_roundup(obj_size);
+ /* The following cast might truncate high-order bits of obj_size, this
+ * is harmless because kmalloc(obj_size >= 2^32) will fail anyway.
+ */
+ *size = (unsigned int)obj_size;
+
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
- skb_frag_t *frag = skb_shinfo(head_skb)->frags;
unsigned int mss = skb_shinfo(head_skb)->gso_size;
unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
- struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int partial_segs = 0;
unsigned int headroom;
unsigned int len = head_skb->len;
+ struct sk_buff *frag_skb;
+ skb_frag_t *frag;
__be16 proto;
bool csum, sg;
- int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
- int pos;
+ int nfrags, pos;
if ((skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY) &&
mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) {
headroom = skb_headroom(head_skb);
pos = skb_headlen(head_skb);
+ if (skb_orphan_frags(head_skb, GFP_ATOMIC))
+ return ERR_PTR(-ENOMEM);
+
+ nfrags = skb_shinfo(head_skb)->nr_frags;
+ frag = skb_shinfo(head_skb)->frags;
+ frag_skb = head_skb;
+
do {
struct sk_buff *nskb;
skb_frag_t *nskb_frag;
(skb_headlen(list_skb) == len || sg)) {
BUG_ON(skb_headlen(list_skb) > len);
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
frag++;
}
- nskb = skb_clone(list_skb, GFP_ATOMIC);
list_skb = list_skb->next;
- if (unlikely(!nskb))
- goto err;
-
if (unlikely(pskb_trim(nskb, len))) {
kfree_skb(nskb);
goto err;
skb_shinfo(nskb)->flags |= skb_shinfo(head_skb)->flags &
SKBFL_SHARED_FRAG;
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
+ if (skb_zerocopy_clone(nskb, frag_skb, GFP_ATOMIC))
goto err;
while (pos < offset + len) {
if (i >= nfrags) {
+ if (skb_orphan_frags(list_skb, GFP_ATOMIC) ||
+ skb_zerocopy_clone(nskb, list_skb,
+ GFP_ATOMIC))
+ goto err;
+
i = 0;
nfrags = skb_shinfo(list_skb)->nr_frags;
frag = skb_shinfo(list_skb)->frags;
i--;
frag--;
}
- if (skb_orphan_frags(frag_skb, GFP_ATOMIC) ||
- skb_zerocopy_clone(nskb, frag_skb,
- GFP_ATOMIC))
- goto err;
list_skb = list_skb->next;
}
serr->ee.ee_info = tstype;
serr->opt_stats = opt_stats;
serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk_is_tcp(sk))
serr->ee.ee_data -= atomic_read(&sk->sk_tskey);
{
struct sk_buff *skb;
bool tsonly, opt_stats = false;
+ u32 tsflags;
if (!sk)
return;
- if (!hwtstamps && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if (!hwtstamps && !(tsflags & SOF_TIMESTAMPING_OPT_TX_SWHW) &&
skb_shinfo(orig_skb)->tx_flags & SKBTX_IN_PROGRESS)
return;
- tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
+ tsonly = tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
if (!skb_may_tx_timestamp(sk, tsonly))
return;
if (tsonly) {
#ifdef CONFIG_INET
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk_is_tcp(sk)) {
skb = tcp_get_timestamping_opt_stats(sk, orig_skb,
ack_skb);
static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
u32 off, u32 len, bool ingress)
{
+ int err = 0;
+
if (!ingress) {
if (!sock_writeable(psock->sk))
return -EAGAIN;
return skb_send_sock(psock->sk, skb, off, len);
}
- return sk_psock_skb_ingress(psock, skb, off, len);
+ skb_get(skb);
+ err = sk_psock_skb_ingress(psock, skb, off, len);
+ if (err < 0)
+ kfree_skb(skb);
+ return err;
}
static void sk_psock_skb_state(struct sk_psock *psock,
} while (len);
skb = skb_dequeue(&psock->ingress_skb);
- if (!ingress) {
- kfree_skb(skb);
- }
+ kfree_skb(skb);
}
end:
mutex_unlock(&psock->work_mutex);
return false;
if (!sk)
return true;
- switch (sk->sk_family) {
+ /* IPV6_ADDRFORM can change sk->sk_family under us. */
+ switch (READ_ONCE(sk->sk_family)) {
case AF_INET:
return inet_test_bit(MC_LOOP, sk);
#if IS_ENABLED(CONFIG_IPV6)
if (!match)
return -EINVAL;
- sk->sk_bind_phc = phc_index;
+ WRITE_ONCE(sk->sk_bind_phc, phc_index);
return 0;
}
return ret;
}
- sk->sk_tsflags = val;
+ WRITE_ONCE(sk->sk_tsflags, val);
sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
mem_cgroup_uncharge_skmem(sk->sk_memcg, pages);
return -ENOMEM;
}
- sk->sk_forward_alloc += pages << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, pages << PAGE_SHIFT);
WRITE_ONCE(sk->sk_reserved_mem,
sk->sk_reserved_mem + (pages << PAGE_SHIFT));
case SO_TIMESTAMPING_OLD:
lv = sizeof(v.timestamping);
- v.timestamping.flags = sk->sk_tsflags;
- v.timestamping.bind_phc = sk->sk_bind_phc;
+ v.timestamping.flags = READ_ONCE(sk->sk_tsflags);
+ v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc);
break;
case SO_RCVTIMEO_OLD:
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
break;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
break;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
break;
timeo = schedule_timeout(timeo);
}
goto failure;
err = -EPIPE;
- if (sk->sk_shutdown & SEND_SHUTDOWN)
+ if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
goto failure;
if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
{
int ret, amt = sk_mem_pages(size);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
ret = __sk_mem_raise_allocated(sk, size, amt, kind);
if (!ret)
- sk->sk_forward_alloc -= amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT));
return ret;
}
EXPORT_SYMBOL(__sk_mem_schedule);
void __sk_mem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- sk->sk_forward_alloc -= amount << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT));
__sk_mem_reduce_allocated(sk, amount);
}
EXPORT_SYMBOL(__sk_mem_reclaim);
mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
- mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+ mem[SK_MEMINFO_FWD_ALLOC] = sk_forward_alloc_get(sk);
mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
struct bpf_map map;
struct sock **sks;
struct sk_psock_progs progs;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
#define SOCK_CREATE_FLAG_MASK \
return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&stab->map, attr);
- raw_spin_lock_init(&stab->lock);
+ spin_lock_init(&stab->lock);
stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries *
sizeof(struct sock *),
struct sock *sk;
int err = 0;
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
sk = *psk;
if (!sk_test || sk_test == sk)
sk = xchg(psk, NULL);
else
err = -EINVAL;
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return err;
}
psock = sk_psock(sk);
WARN_ON_ONCE(!psock);
- raw_spin_lock_bh(&stab->lock);
+ spin_lock_bh(&stab->lock);
osk = stab->sks[idx];
if (osk && flags == BPF_NOEXIST) {
ret = -EEXIST;
stab->sks[idx] = sk;
if (osk)
sock_map_unref(osk, &stab->sks[idx]);
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&stab->lock);
+ spin_unlock_bh(&stab->lock);
if (psock)
sk_psock_put(sk, psock);
out_free:
struct bpf_shtab_bucket {
struct hlist_head head;
- raw_spinlock_t lock;
+ spinlock_t lock;
};
struct bpf_shtab {
* is okay since it's going away only after RCU grace period.
* However, we need to check whether it's still present.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem_probe = sock_hash_lookup_elem_raw(&bucket->head, elem->hash,
elem->key, map->key_size);
if (elem_probe && elem_probe == elem) {
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
}
static long sock_hash_delete_elem(struct bpf_map *map, void *key)
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem) {
hlist_del_rcu(&elem->node);
sock_hash_free_elem(htab, elem);
ret = 0;
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return ret;
}
hash = sock_hash_bucket_hash(key, key_size);
bucket = sock_hash_select_bucket(htab, hash);
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
elem = sock_hash_lookup_elem_raw(&bucket->head, hash, key, key_size);
if (elem && flags == BPF_NOEXIST) {
ret = -EEXIST;
sock_map_unref(elem->sk, elem);
sock_hash_free_elem(htab, elem);
}
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
return 0;
out_unlock:
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
sk_psock_put(sk, psock);
out_free:
sk_psock_free_link(link);
for (i = 0; i < htab->buckets_num; i++) {
INIT_HLIST_HEAD(&htab->buckets[i].head);
- raw_spin_lock_init(&htab->buckets[i].lock);
+ spin_lock_init(&htab->buckets[i].lock);
}
return &htab->map;
* exists, psock exists and holds a ref to socket. That
* lets us to grab a socket ref too.
*/
- raw_spin_lock_bh(&bucket->lock);
+ spin_lock_bh(&bucket->lock);
hlist_for_each_entry(elem, &bucket->head, node)
sock_hold(elem->sk);
hlist_move_list(&bucket->head, &unlink_list);
- raw_spin_unlock_bh(&bucket->lock);
+ spin_unlock_bh(&bucket->lock);
/* Process removed entries out of atomic context to
* block for socket lock before deleting the psock's
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = sock_net(skb->sk);
- struct handshake_req *req = NULL;
- struct socket *sock = NULL;
+ struct handshake_req *req;
+ struct socket *sock;
int fd, status, err;
if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
return -EINVAL;
fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
- err = 0;
sock = sockfd_lookup(fd, &err);
- if (err) {
- err = -EBADF;
- goto out_status;
- }
+ if (!sock)
+ return err;
req = handshake_req_hash_lookup(sock->sk);
if (!req) {
err = -EBUSY;
+ trace_handshake_cmd_done_err(net, req, sock->sk, err);
fput(sock->file);
- goto out_status;
+ return err;
}
trace_handshake_cmd_done(net, req, sock->sk, fd);
handshake_complete(req, status, info);
fput(sock->file);
return 0;
-
-out_status:
- trace_handshake_cmd_done_err(net, req, sock->sk, err);
- return err;
}
static unsigned int handshake_net_id;
hlist_del(&nexthop_nh->nh_hash);
} endfor_nexthops(fi)
}
- fi->fib_dead = 1;
+ /* Paired with READ_ONCE() from fib_table_lookup() */
+ WRITE_ONCE(fi->fib_dead, 1);
fib_info_put(fi);
}
spin_unlock_bh(&fib_info_lock);
link_it:
ofi = fib_find_info(fi);
if (ofi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
refcount_inc(&ofi->fib_treeref);
failure:
if (fi) {
+ /* fib_table_lookup() should not see @fi yet. */
fi->fib_dead = 1;
free_fib_info(fi);
}
if (fa->fa_dscp &&
inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
continue;
- if (fi->fib_dead)
+ /* Paired with WRITE_ONCE() in fib_release_info() */
+ if (READ_ONCE(fi->fib_dead))
continue;
if (fa->fa_info->fib_scope < flp->flowi4_scope)
continue;
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
- unsigned int size = mtu;
+ unsigned int size;
+ size = min(mtu, IP_MAX_MTU);
while (1) {
skb = alloc_skb(size + hlen + tlen,
GFP_ATOMIC | __GFP_NOWARN);
struct ip_options *opt = &(IPCB(skb)->opt);
__IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
} else if (rt->rt_type == RTN_BROADCAST)
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
+ /* OUTOCTETS should be counted after fragment */
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
+
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
skb = skb_expand_head(skb, hh_len);
if (!skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
{
struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
- IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
-
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
paged = !!cork->gso_size;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
* or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
*/
info = PKTINFO_SKB_CB(skb);
- if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
+ if (!(READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_CMSG) ||
!info->ipi_ifindex)
return false;
struct ip_options *opt = &(IPCB(skb)->opt);
IP_INC_STATS(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
}
}
- if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_SOFTWARE)
has_timestamping = true;
else
tss->ts[0] = (struct timespec64) {0};
}
if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) {
- if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)
+ if (READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_RAW_HARDWARE)
has_timestamping = true;
else
tss->ts[2] = (struct timespec64) {0};
if (delta <= 0)
return;
amt = sk_mem_pages(delta);
- sk->sk_forward_alloc += amt << PAGE_SHIFT;
+ sk_forward_alloc_add(sk, amt << PAGE_SHIFT);
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
spin_lock(&sk_queue->lock);
- sk->sk_forward_alloc += size;
+ sk_forward_alloc_add(sk, size);
amt = (sk->sk_forward_alloc - partial) & ~(PAGE_SIZE - 1);
- sk->sk_forward_alloc -= amt;
+ sk_forward_alloc_add(sk, -amt);
if (amt)
__sk_mem_reduce_allocated(sk, amt >> PAGE_SHIFT);
goto uncharge_drop;
}
- sk->sk_forward_alloc -= size;
+ sk_forward_alloc_add(sk, -size);
/* no need to setup a destructor, we will explicitly release the
* forward allocated memory on dequeue
* idev->desync_factor if it's larger
*/
cnf_temp_preferred_lft = READ_ONCE(idev->cnf.temp_prefered_lft);
- max_desync_factor = min_t(__u32,
+ max_desync_factor = min_t(long,
idev->cnf.max_desync_factor,
cnf_temp_preferred_lft - regen_advance);
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
- if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+ IP6CB(skb)->flags & IP6SKB_MULTIPATH)
return NULL;
return skb;
struct dst_entry *dst = skb_dst(skb);
__IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
#ifdef CONFIG_NET_SWITCHDEV
if (skb->offload_l3_fwd_mark) {
orig_mtu = mtu;
if (cork->tx_flags & SKBTX_ANY_TSTAMP &&
- sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
+ READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID)
tskey = atomic_inc_return(&sk->sk_tskey) - 1;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
{
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
return -EINVAL;
ipcm6_init_sk(&ipc6, np);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
- ipc6.sockc.tsflags = sk->sk_tsflags;
+ ipc6.sockc.tsflags = READ_ONCE(sk->sk_tsflags);
ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
* that all multiplexors and psocks have been destroyed.
*/
WARN_ON(!list_empty(&knet->mux_list));
+
+ mutex_destroy(&knet->mutex);
}
static struct pernet_operations kcm_net_ops = {
__kfree_skb(skb);
}
+static void mptcp_rmem_fwd_alloc_add(struct sock *sk, int size)
+{
+ WRITE_ONCE(mptcp_sk(sk)->rmem_fwd_alloc,
+ mptcp_sk(sk)->rmem_fwd_alloc + size);
+}
+
static void mptcp_rmem_charge(struct sock *sk, int size)
{
- mptcp_sk(sk)->rmem_fwd_alloc -= size;
+ mptcp_rmem_fwd_alloc_add(sk, -size);
}
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
{
amount >>= PAGE_SHIFT;
- mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT;
+ mptcp_rmem_charge(sk, amount << PAGE_SHIFT);
__sk_mem_reduce_allocated(sk, amount);
}
struct mptcp_sock *msk = mptcp_sk(sk);
int reclaimable;
- msk->rmem_fwd_alloc += size;
+ mptcp_rmem_fwd_alloc_add(sk, size);
reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
/* see sk_mem_uncharge() for the rationale behind the following schema */
if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV))
return false;
- msk->rmem_fwd_alloc += amount;
+ mptcp_rmem_fwd_alloc_add(sk, amount);
return true;
}
}
/* data successfully copied into the write queue */
- sk->sk_forward_alloc -= total_ts;
+ sk_forward_alloc_add(sk, -total_ts);
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
* inet_sock_destruct() will dispose it
*/
- sk->sk_forward_alloc += msk->rmem_fwd_alloc;
- msk->rmem_fwd_alloc = 0;
+ sk_forward_alloc_add(sk, msk->rmem_fwd_alloc);
+ WRITE_ONCE(msk->rmem_fwd_alloc, 0);
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
mptcp_free_local_addr_list(msk);
static int mptcp_forward_alloc_get(const struct sock *sk)
{
- return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+ return READ_ONCE(sk->sk_forward_alloc) +
+ READ_ONCE(mptcp_sk(sk)->rmem_fwd_alloc);
}
static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
#define IP_SET_HASH_WITH_PROTO
#define IP_SET_HASH_WITH_NETS
#define IPSET_NET_COUNT 2
+#define IP_SET_HASH_WITH_NET0
/* IPv4 variant */
[NFT_MSG_NEWFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_REGISTER,
[NFT_MSG_GETFLOWTABLE] = AUDIT_NFT_OP_INVALID,
[NFT_MSG_DELFLOWTABLE] = AUDIT_NFT_OP_FLOWTABLE_UNREGISTER,
+ [NFT_MSG_GETSETELEM_RESET] = AUDIT_NFT_OP_SETELEM_RESET,
};
static void nft_validate_state_update(struct nft_table *table, u8 new_validate_state)
nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
+static void audit_log_rule_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u",
+ table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_rule_dump_ctx {
char *table;
char *chain;
cont_skip:
(*idx)++;
}
+
+ if (reset && *idx)
+ audit_log_rule_reset(table, cb->seq, *idx);
+
return 0;
}
if (err < 0)
goto err_fill_rule_info;
+ if (reset)
+ audit_log_rule_reset(table, nft_pernet(net)->base_seq, 1);
+
return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid);
err_fill_rule_info:
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
+static void audit_log_nft_set_reset(const struct nft_table *table,
+ unsigned int base_seq,
+ unsigned int nentries)
+{
+ char *buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, base_seq);
+
+ audit_log_nfcfg(buf, table->family, nentries,
+ AUDIT_NFT_OP_SETELEM_RESET, GFP_ATOMIC);
+ kfree(buf);
+}
+
struct nft_set_dump_ctx {
const struct nft_set *set;
struct nft_ctx ctx;
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
- const struct nft_set *set, bool reset)
+ const struct nft_set *set, bool reset,
+ unsigned int base_seq)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
elem.priv = catchall->elem;
ret = nf_tables_fill_setelem(skb, set, &elem, reset);
+ if (reset && !ret)
+ audit_log_nft_set_reset(set->table, base_seq, 1);
break;
}
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
- args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
- rcu_read_unlock();
-
+ args.iter.err = nft_set_catchall_dump(net, skb, set,
+ reset, cb->seq);
nla_nest_end(skb, nest);
nlmsg_end(skb, nlh);
+ if (reset && args.iter.count > args.iter.skip)
+ audit_log_nft_set_reset(table, cb->seq,
+ args.iter.count - args.iter.skip);
+
+ rcu_read_unlock();
+
if (args.iter.err && args.iter.err != -EMSGSIZE)
return args.iter.err;
if (args.iter.count == cb->args[0])
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_cur(info->net);
u8 family = info->nfmsg->nfgen_family;
+ int rem, err = 0, nelems = 0;
struct net *net = info->net;
struct nft_table *table;
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
bool reset = false;
- int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
genmask, 0);
NL_SET_BAD_ATTR(extack, attr);
break;
}
+ nelems++;
}
+ if (reset)
+ audit_log_nft_set_reset(table, nft_pernet(net)->base_seq,
+ nelems);
+
return err;
}
f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+ if (f->opt_num > ARRAY_SIZE(f->opt))
+ return -EINVAL;
+
+ if (!memchr(f->genre, 0, MAXGENRELEN) ||
+ !memchr(f->subtype, 0, MAXGENRELEN) ||
+ !memchr(f->version, 0, MAXGENRELEN))
+ return -EINVAL;
+
kf = kmalloc(sizeof(struct nf_osf_finger), GFP_KERNEL);
if (!kf)
return -ENOMEM;
return opt[offset + 1];
}
+static int nft_skb_copy_to_reg(const struct sk_buff *skb, int offset, u32 *dest, unsigned int len)
+{
+ if (len % NFT_REG32_SIZE)
+ dest[len / NFT_REG32_SIZE] = 0;
+
+ return skb_copy_bits(skb, offset, dest, len);
+}
+
static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset, dest, priv->len) < 0)
goto err;
return;
err:
if (priv->flags & NFT_EXTHDR_F_PRESENT) {
*dest = 1;
} else {
- dest[priv->len / NFT_REG32_SIZE] = 0;
+ if (priv->len % NFT_REG32_SIZE)
+ dest[priv->len / NFT_REG32_SIZE] = 0;
memcpy(dest, opt + offset, priv->len);
}
if (!tcph)
goto err;
+ if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
+ goto err;
+
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
union {
__be16 v16;
if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
goto err;
- if (skb_ensure_writable(pkt->skb,
- nft_thoff(pkt) + i + priv->len))
- goto err;
-
- tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
- &tcphdr_len);
- if (!tcph)
- goto err;
-
offset = i + priv->offset;
switch (priv->len) {
if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
goto drop;
- opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
- if (!opt)
- goto err;
+ tcph = (struct tcphdr *)(pkt->skb->data + nft_thoff(pkt));
+ opt = (u8 *)tcph;
+
for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
unsigned int j;
offset + ntohs(sch->length) > pkt->skb->len)
break;
- dest[priv->len / NFT_REG32_SIZE] = 0;
- if (skb_copy_bits(pkt->skb, offset + priv->offset,
- dest, priv->len) < 0)
+ if (nft_skb_copy_to_reg(pkt->skb, offset + priv->offset,
+ dest, priv->len) < 0)
break;
return;
}
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
int d, err;
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
- /* perform garbage collection to avoid bogus overlap reports. */
- if (nft_set_elem_expired(&rbe->ext)) {
+ /* perform garbage collection to avoid bogus overlap reports
+ * but skip new elements in this transaction.
+ */
+ if (nft_set_elem_expired(&rbe->ext) &&
+ nft_set_elem_active(&rbe->ext, cur_genmask)) {
err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
{
const struct xt_sctp_info *info = par->matchinfo;
+ if (info->flag_count > ARRAY_SIZE(info->flag_info))
+ return -EINVAL;
if (info->flags & ~XT_SCTP_VALID_FLAGS)
return -EINVAL;
if (info->invflags & ~XT_SCTP_VALID_FLAGS)
return ret ^ data->invert;
}
+static int u32_mt_checkentry(const struct xt_mtchk_param *par)
+{
+ const struct xt_u32 *data = par->matchinfo;
+ const struct xt_u32_test *ct;
+ unsigned int i;
+
+ if (data->ntests > ARRAY_SIZE(data->tests))
+ return -EINVAL;
+
+ for (i = 0; i < data->ntests; ++i) {
+ ct = &data->tests[i];
+
+ if (ct->nnums > ARRAY_SIZE(ct->location) ||
+ ct->nvalues > ARRAY_SIZE(ct->value))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct xt_match xt_u32_mt_reg __read_mostly = {
.name = "u32",
.revision = 0,
.family = NFPROTO_UNSPEC,
.match = u32_mt,
+ .checkentry = u32_mt_checkentry,
.matchsize = sizeof(struct xt_u32),
.me = THIS_MODULE,
};
struct pie_params p_params;
u32 ecn_prob;
u32 flows_cnt;
+ u32 flows_cursor;
u32 quantum;
u32 memory_limit;
u32 new_flow_count;
static void fq_pie_timer(struct timer_list *t)
{
struct fq_pie_sched_data *q = from_timer(q, t, adapt_timer);
+ unsigned long next, tupdate;
struct Qdisc *sch = q->sch;
spinlock_t *root_lock; /* to lock qdisc for probability calculations */
- u32 idx;
+ int max_cnt, i;
rcu_read_lock();
root_lock = qdisc_lock(qdisc_root_sleeping(sch));
spin_lock(root_lock);
- for (idx = 0; idx < q->flows_cnt; idx++)
- pie_calculate_probability(&q->p_params, &q->flows[idx].vars,
- q->flows[idx].backlog);
-
- /* reset the timer to fire after 'tupdate' jiffies. */
- if (q->p_params.tupdate)
- mod_timer(&q->adapt_timer, jiffies + q->p_params.tupdate);
+ /* Limit this expensive loop to 2048 flows per round. */
+ max_cnt = min_t(int, q->flows_cnt - q->flows_cursor, 2048);
+ for (i = 0; i < max_cnt; i++) {
+ pie_calculate_probability(&q->p_params,
+ &q->flows[q->flows_cursor].vars,
+ q->flows[q->flows_cursor].backlog);
+ q->flows_cursor++;
+ }
+ tupdate = q->p_params.tupdate;
+ next = 0;
+ if (q->flows_cursor >= q->flows_cnt) {
+ q->flows_cursor = 0;
+ next = tupdate;
+ }
+ if (tupdate)
+ mod_timer(&q->adapt_timer, jiffies + next);
spin_unlock(root_lock);
rcu_read_unlock();
}
.priv_size = sizeof(struct plug_sched_data),
.enqueue = plug_enqueue,
.dequeue = plug_dequeue,
- .peek = qdisc_peek_head,
+ .peek = qdisc_peek_dequeued,
.init = plug_init,
.change = plug_change,
.reset = qdisc_reset_queue,
}
/* Dequeue head packet of the head class in the DRR queue of the aggregate. */
-static void agg_dequeue(struct qfq_aggregate *agg,
- struct qfq_class *cl, unsigned int len)
+static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
+ struct qfq_class *cl, unsigned int len)
{
- qdisc_dequeue_peeked(cl->qdisc);
+ struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
+
+ if (!skb)
+ return NULL;
cl->deficit -= (int) len;
cl->deficit += agg->lmax;
list_move_tail(&cl->alist, &agg->active);
}
+
+ return skb;
}
static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
if (!skb)
return NULL;
- qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
+
+ skb = agg_dequeue(in_serv_agg, cl, len);
+
+ if (!skb) {
+ sch->q.qlen++;
+ return NULL;
+ }
+
+ qdisc_qstats_backlog_dec(sch, skb);
qdisc_bstats_update(sch, skb);
- agg_dequeue(in_serv_agg, cl, len);
/* If lmax is lowered, through qfq_change_class, for a class
* owning pending packets with larger size than the new value
* of lmax, then the following condition may hold.
assoc->init_retries, assoc->shutdown_retries,
assoc->rtx_data_chunks,
refcount_read(&sk->sk_wmem_alloc),
- sk->sk_wmem_queued,
+ READ_ONCE(sk->sk_wmem_queued),
sk->sk_sndbuf,
sk->sk_rcvbuf);
seq_printf(seq, "\n");
#include <net/sctp/stream_sched.h>
/* Forward declarations for internal helper functions. */
-static bool sctp_writeable(struct sock *sk);
+static bool sctp_writeable(const struct sock *sk);
static void sctp_wfree(struct sk_buff *skb);
static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
size_t msg_len);
refcount_add(sizeof(struct sctp_chunk), &sk->sk_wmem_alloc);
asoc->sndbuf_used += chunk->skb->truesize + sizeof(struct sctp_chunk);
- sk->sk_wmem_queued += chunk->skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, chunk->skb->truesize + sizeof(struct sctp_chunk));
sk_mem_charge(sk, chunk->skb->truesize);
}
struct sock *sk = asoc->base.sk;
sk_mem_uncharge(sk, skb->truesize);
- sk->sk_wmem_queued -= skb->truesize + sizeof(struct sctp_chunk);
+ sk_wmem_queued_add(sk, -(skb->truesize + sizeof(struct sctp_chunk)));
asoc->sndbuf_used -= skb->truesize + sizeof(struct sctp_chunk);
WARN_ON(refcount_sub_and_test(sizeof(struct sctp_chunk),
&sk->sk_wmem_alloc));
* UDP-style sockets or TCP-style sockets, this code should work.
* - Daisy
*/
-static bool sctp_writeable(struct sock *sk)
+static bool sctp_writeable(const struct sock *sk)
{
- return sk->sk_sndbuf > sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) > READ_ONCE(sk->sk_wmem_queued);
}
/* Wait for an association to go into ESTABLISHED state. If timeout is 0,
static ktime_t get_timestamp(struct sock *sk, struct sk_buff *skb, int *if_index)
{
- bool cycles = sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC;
+ bool cycles = READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_BIND_PHC;
struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
struct net_device *orig_dev;
ktime_t hwtstamp;
int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW);
struct scm_timestamping_internal tss;
-
int empty = 1, false_tstamp = 0;
struct skb_shared_hwtstamps *shhwtstamps =
skb_hwtstamps(skb);
int if_index;
ktime_t hwtstamp;
+ u32 tsflags;
/* Race occurred between timestamp enabling and packet
receiving. Fill in the current time for now. */
}
memset(&tss, 0, sizeof(tss));
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
+ tsflags = READ_ONCE(sk->sk_tsflags);
+ if ((tsflags & SOF_TIMESTAMPING_SOFTWARE) &&
ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0))
empty = 0;
if (shhwtstamps &&
- (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
+ (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
!skb_is_swtx_tstamp(skb, false_tstamp)) {
if_index = 0;
if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP_NETDEV)
else
hwtstamp = shhwtstamps->hwtstamp;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_BIND_PHC)
+ if (tsflags & SOF_TIMESTAMPING_BIND_PHC)
hwtstamp = ptp_convert_timestamp(&hwtstamp,
- sk->sk_bind_phc);
+ READ_ONCE(sk->sk_bind_phc));
if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) {
empty = 0;
- if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
+ if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) &&
!skb_is_err_queue(skb))
put_ts_pktinfo(msg, skb, if_index);
}
* What the above comment does talk about? --ANK(980817)
*/
- if (unix_tot_inflight)
+ if (READ_ONCE(unix_tot_inflight))
unix_gc(); /* Garbage collect fds */
}
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
}
- user->unix_inflight++;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1);
spin_unlock(&unix_gc_lock);
}
/* Paired with READ_ONCE() in wait_for_unix_gc() */
WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
}
- user->unix_inflight--;
+ WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1);
spin_unlock(&unix_gc_lock);
}
{
struct user_struct *user = current_user();
- if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE)))
return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
return false;
}
for (copied = 0, i = skb_shinfo(skb)->nr_frags; copied < len; i++) {
if (unlikely(i >= MAX_SKB_FRAGS))
- return ERR_PTR(-EFAULT);
+ return ERR_PTR(-EOVERFLOW);
page = pool->umem->pgs[addr >> PAGE_SHIFT];
get_page(page);
skb_put(skb, len);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err))
+ if (unlikely(err)) {
+ kfree_skb(skb);
goto free_err;
+ }
} else {
int nr_frags = skb_shinfo(skb)->nr_frags;
struct page *page;
u8 *vaddr;
if (unlikely(nr_frags == (MAX_SKB_FRAGS - 1) && xp_mb_desc(desc))) {
- err = -EFAULT;
+ err = -EOVERFLOW;
goto free_err;
}
return skb;
free_err:
- if (err == -EAGAIN) {
- xsk_cq_cancel_locked(xs, 1);
- } else {
- xsk_set_destructor_arg(skb);
- xsk_drop_skb(skb);
+ if (err == -EOVERFLOW) {
+ /* Drop the packet */
+ xsk_set_destructor_arg(xs->skb);
+ xsk_drop_skb(xs->skb);
xskq_cons_release(xs->tx);
+ } else {
+ /* Let application retry */
+ xsk_cq_cancel_locked(xs, 1);
}
return ERR_PTR(err);
skb = xsk_build_skb(xs, &desc);
if (IS_ERR(skb)) {
err = PTR_ERR(skb);
- if (err == -EAGAIN)
+ if (err != -EOVERFLOW)
goto out;
err = 0;
continue;
sock_diag_save_cookie(sk, msg->xdiag_cookie);
mutex_lock(&xs->mutex);
+ if (READ_ONCE(xs->state) == XSK_UNBOUND)
+ goto out_nlmsg_trim;
+
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb))
goto out_nlmsg_trim;
# They are independent, and can be combined like W=12 or W=123e.
# ==========================================================================
-KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+# Default set of warnings, always enabled
+KBUILD_CFLAGS += -Wall
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -Werror=implicit-function-declaration
+KBUILD_CFLAGS += -Werror=implicit-int
+KBUILD_CFLAGS += -Werror=return-type
+KBUILD_CFLAGS += -Werror=strict-prototypes
+KBUILD_CFLAGS += -Wno-format-security
+KBUILD_CFLAGS += -Wno-trigraphs
+KBUILD_CFLAGS += $(call cc-disable-warning,frame-address,)
+KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
+
+ifneq ($(CONFIG_FRAME_WARN),0)
+KBUILD_CFLAGS += -Wframe-larger-than=$(CONFIG_FRAME_WARN)
+endif
+
+KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
+KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
+
+ifdef CONFIG_CC_IS_CLANG
+# The kernel builds with '-std=gnu11' so use of GNU extensions is acceptable.
+KBUILD_CFLAGS += -Wno-gnu
+else
+
+# gcc inanely warns about local variables called 'main'
+KBUILD_CFLAGS += -Wno-main
+endif
+
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+
+# These result in bogus false positives
+KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
+
+# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
+KBUILD_CFLAGS += -Wvla
+
+# disable pointer signed / unsigned warnings in gcc 4.0
+KBUILD_CFLAGS += -Wno-pointer-sign
+
+# In order to make sure new function cast mismatches are not introduced
+# in the kernel (to avoid tripping CFI checking), the kernel should be
+# globally built with -Wcast-function-type.
+KBUILD_CFLAGS += $(call cc-option, -Wcast-function-type)
+
+# The allocators already balk at large sizes, so silence the compiler
+# warnings for bounds checks involving those possible values. While
+# -Wno-alloc-size-larger-than would normally be used here, earlier versions
+# of gcc (<9.1) weirdly don't handle the option correctly when _other_
+# warnings are produced (?!). Using -Walloc-size-larger-than=SIZE_MAX
+# doesn't work (as it is documented to), silently resolving to "0" prior to
+# version 9.1 (and producing an error more recently). Numeric values larger
+# than PTRDIFF_MAX also don't work prior to version 9.1, which are silently
+# ignored, continuing to default to PTRDIFF_MAX. So, left with no other
+# choice, we must perform a versioned check to disable this warning.
+# https://lore.kernel.org/lkml/20210824115859.187f272f@canb.auug.org.au
+KBUILD_CFLAGS-$(call gcc-min-version, 90100) += -Wno-alloc-size-larger-than
+KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
+
+# Prohibit date/time macros, which would make the build non-deterministic
+KBUILD_CFLAGS += -Werror=date-time
+
+# enforce correct pointer usage
+KBUILD_CFLAGS += $(call cc-option,-Werror=incompatible-pointer-types)
+
+# Require designated initializers for all marked structures
+KBUILD_CFLAGS += $(call cc-option,-Werror=designated-init)
+
+# Warn if there is an enum types mismatch
+KBUILD_CFLAGS += $(call cc-option,-Wenum-conversion)
# backward compatibility
KBUILD_EXTRA_WARN ?= $(KBUILD_ENABLE_EXTRA_GCC_CHECKS)
KBUILD_CFLAGS += -Wextra -Wunused -Wno-unused-parameter
KBUILD_CFLAGS += -Wmissing-declarations
+KBUILD_CFLAGS += $(call cc-option, -Wrestrict)
KBUILD_CFLAGS += -Wmissing-format-attribute
KBUILD_CFLAGS += -Wmissing-prototypes
KBUILD_CFLAGS += -Wold-style-definition
KBUILD_CFLAGS += $(call cc-option, -Wunused-but-set-variable)
KBUILD_CFLAGS += $(call cc-option, -Wunused-const-variable)
KBUILD_CFLAGS += $(call cc-option, -Wpacked-not-aligned)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-overflow)
+KBUILD_CFLAGS += $(call cc-option, -Wformat-truncation)
+KBUILD_CFLAGS += $(call cc-option, -Wstringop-overflow)
KBUILD_CFLAGS += $(call cc-option, -Wstringop-truncation)
-# The following turn off the warnings enabled by -Wextra
-KBUILD_CFLAGS += -Wno-missing-field-initializers
-KBUILD_CFLAGS += -Wno-sign-compare
-KBUILD_CFLAGS += -Wno-type-limits
-KBUILD_CFLAGS += -Wno-shift-negative-value
KBUILD_CPPFLAGS += -Wundef
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
# Some diagnostics enabled by default are noisy.
# Suppress them by using -Wno... except for W=1.
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+KBUILD_CFLAGS += $(call cc-disable-warning, restrict)
+KBUILD_CFLAGS += $(call cc-disable-warning, packed-not-aligned)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-overflow)
+KBUILD_CFLAGS += $(call cc-disable-warning, stringop-truncation)
ifdef CONFIG_CC_IS_CLANG
-KBUILD_CFLAGS += -Wno-initializer-overrides
# Clang before clang-16 would warn on default argument promotions.
ifneq ($(call clang-min-version, 160000),y)
# Disable -Wformat
KBUILD_CFLAGS += -Wformat-insufficient-args
endif
endif
-KBUILD_CFLAGS += -Wno-sign-compare
KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast)
KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare
KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access)
KBUILD_CFLAGS += $(call cc-option, -Wmaybe-uninitialized)
KBUILD_CFLAGS += $(call cc-option, -Wunused-macros)
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Winitializer-overrides
+endif
+
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-missing-field-initializers
+KBUILD_CFLAGS += -Wno-type-limits
+KBUILD_CFLAGS += -Wno-shift-negative-value
+
+ifdef CONFIG_CC_IS_CLANG
+KBUILD_CFLAGS += -Wno-initializer-overrides
+else
+KBUILD_CFLAGS += -Wno-maybe-uninitialized
+endif
+
endif
#
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN3
+else
+
+# The following turn off the warnings enabled by -Wextra
+KBUILD_CFLAGS += -Wno-sign-compare
+
endif
#
include include/config/auto.conf
include $(srctree)/scripts/Kbuild.include
+install-y :=
+
+ifeq ($(KBUILD_EXTMOD)$(sign-only),)
+
+# remove the old directory and symlink
+$(shell rm -fr $(MODLIB)/kernel $(MODLIB)/build)
+
+install-$(CONFIG_MODULES) += $(addprefix $(MODLIB)/, build modules.order)
+
+$(MODLIB)/build: FORCE
+ $(call cmd,symlink)
+
+quiet_cmd_symlink = SYMLINK $@
+ cmd_symlink = ln -s $(CURDIR) $@
+
+$(MODLIB)/modules.order: modules.order FORCE
+ $(call cmd,install_modorder)
+
+quiet_cmd_install_modorder = INSTALL $@
+ cmd_install_modorder = sed 's:^\(.*\)\.o$$:kernel/\1.ko:' $< > $@
+
+# Install modules.builtin(.modinfo) even when CONFIG_MODULES is disabled.
+install-y += $(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo)
+
+$(addprefix $(MODLIB)/, modules.builtin modules.builtin.modinfo): $(MODLIB)/%: % FORCE
+ $(call cmd,install)
+
+endif
+
modules := $(call read-file, $(MODORDER))
ifeq ($(KBUILD_EXTMOD),)
suffix-$(CONFIG_MODULE_COMPRESS_ZSTD) := .zst
modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules))
+install-$(CONFIG_MODULES) += $(modules)
-__modinst: $(modules)
+__modinst: $(install-y)
@:
#
# Installation
#
quiet_cmd_install = INSTALL $@
- cmd_install = mkdir -p $(dir $@); cp $< $@
+ cmd_install = cp $< $@
# Strip
#
# Signing
# Don't stop modules_install even if we can't sign external modules.
#
-ifeq ($(CONFIG_MODULE_SIG_ALL),y)
ifeq ($(filter pkcs11:%, $(CONFIG_MODULE_SIG_KEY)),)
sig-key := $(if $(wildcard $(CONFIG_MODULE_SIG_KEY)),,$(srctree)/)$(CONFIG_MODULE_SIG_KEY)
else
quiet_cmd_sign = SIGN $@
cmd_sign = scripts/sign-file $(CONFIG_MODULE_SIG_HASH) "$(sig-key)" certs/signing_key.x509 $@ \
$(if $(KBUILD_EXTMOD),|| true)
-else
+
+ifeq ($(sign-only),)
+
+# During modules_install, modules are signed only when CONFIG_MODULE_SIG_ALL=y.
+ifndef CONFIG_MODULE_SIG_ALL
quiet_cmd_sign :=
cmd_sign := :
endif
-ifeq ($(modules_sign_only),)
+# Create necessary directories
+$(shell mkdir -p $(sort $(dir $(install-y))))
$(dst)/%.ko: $(extmod_prefix)%.ko FORCE
$(call cmd,install)
$(call cmd,strip)
$(call cmd,sign)
+ifdef CONFIG_MODULES
+__modinst: depmod
+
+PHONY += depmod
+depmod: $(install-y)
+ $(call cmd,depmod)
+
+quiet_cmd_depmod = DEPMOD $(MODLIB)
+ cmd_depmod = $(srctree)/scripts/depmod.sh $(KERNELRELEASE)
+endif
+
else
$(dst)/%.ko: FORCE
MODPOST = scripts/mod/modpost
modpost-args = \
+ $(if $(CONFIG_MODULES),-M) \
$(if $(CONFIG_MODVERSIONS),-m) \
$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \
$(if $(CONFIG_SECTION_MISMATCH_WARN_ONLY),,-E) \
samples scripts security sound tools usr virt \
.config Makefile \
Kbuild Kconfig COPYING $(wildcard localversion*)
-MKSPEC := $(srctree)/scripts/package/mkspec
quiet_cmd_src_tar = TAR $(2).tar.gz
cmd_src_tar = \
$(linux-tarballs): .tmp_HEAD FORCE
$(call if_changed,archive)
-# rpm-pkg
+# rpm-pkg srcrpm-pkg binrpm-pkg
# ---------------------------------------------------------------------------
-PHONY += rpm-pkg
-rpm-pkg: srpm = $(shell rpmspec --srpm --query --queryformat='%{name}-%{VERSION}-%{RELEASE}.src.rpm' kernel.spec)
-rpm-pkg: srcrpm-pkg
- +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -rb $(srpm) \
- --define='_smp_mflags %{nil}'
-# srcrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += srcrpm-pkg
-srcrpm-pkg: linux.tar.gz
- $(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
- +rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -bs kernel.spec \
- --define='_smp_mflags %{nil}' --define='_sourcedir rpmbuild/SOURCES' --define='_srcrpmdir .'
+quiet_cmd_mkspec = GEN $@
+ cmd_mkspec = $(srctree)/scripts/package/mkspec > $@
-# binrpm-pkg
-# ---------------------------------------------------------------------------
-PHONY += binrpm-pkg
-binrpm-pkg:
- $(MAKE) -f $(srctree)/Makefile
- $(CONFIG_SHELL) $(MKSPEC) prebuilt > $(objtree)/binkernel.spec
- +rpmbuild $(RPMOPTS) --define "_builddir $(objtree)" --target \
- $(UTS_MACHINE)-linux -bb $(objtree)/binkernel.spec
+kernel.spec: FORCE
+ $(call cmd,mkspec)
+
+PHONY += rpm-sources
+rpm-sources: linux.tar.gz
+ $(Q)mkdir -p rpmbuild/SOURCES
+ $(Q)ln -f linux.tar.gz rpmbuild/SOURCES/linux.tar.gz
+ $(Q)cp $(KCONFIG_CONFIG) rpmbuild/SOURCES/config
+ $(Q)$(srctree)/scripts/package/gen-diff-patch rpmbuild/SOURCES/diff.patch
+
+PHONY += rpm-pkg srcrpm-pkg binrpm-pkg
+
+rpm-pkg: private build-type := a
+srcrpm-pkg: private build-type := s
+binrpm-pkg: private build-type := b
+
+rpm-pkg srcrpm-pkg: rpm-sources
+rpm-pkg srcrpm-pkg binrpm-pkg: kernel.spec
+ +$(strip rpmbuild -b$(build-type) kernel.spec \
+ --define='_topdir $(abspath rpmbuild)' \
+ $(if $(filter a b, $(build-type)), \
+ --target $(UTS_MACHINE)-linux --build-in-place --noprep --define='_smp_mflags %{nil}' \
+ $$(rpm -q rpm >/dev/null 2>&1 || echo --nodeps)) \
+ $(if $(filter b, $(build-type)), \
+ --without devel) \
+ $(RPMOPTS))
# deb-pkg srcdeb-pkg bindeb-pkg
# ---------------------------------------------------------------------------
$(if $(findstring source, $(build-type)), \
--unsigned-source --compression=$(KDEB_SOURCE_COMPRESS)) \
$(if $(findstring binary, $(build-type)), \
- -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
+ --rules-file='$(MAKE) -f debian/rules' --jobs=1 -r$(KBUILD_PKG_ROOTCMD) -a$$(cat debian/arch), \
--no-check-builddeps) \
$(DPKG_FLAGS))
-PHONY += intdeb-pkg
-intdeb-pkg:
- +$(CONFIG_SHELL) $(srctree)/scripts/package/builddeb
-
# snap-pkg
# ---------------------------------------------------------------------------
PHONY += snap-pkg
Break down helper function protocol into smaller chunks: return type,
name, distincts arguments.
"""
- arg_re = re.compile('((\w+ )*?(\w+|...))( (\**)(\w+))?$')
+ arg_re = re.compile(r'((\w+ )*?(\w+|...))( (\**)(\w+))?$')
res = {}
- proto_re = re.compile('(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
+ proto_re = re.compile(r'(.+) (\**)(\w+)\(((([^,]+)(, )?){1,5})\)$')
capture = proto_re.match(self.proto)
res['ret_type'] = capture.group(1)
return Helper(proto=proto, desc=desc, ret=ret)
def parse_symbol(self):
- p = re.compile(' \* ?(BPF\w+)$')
+ p = re.compile(r' \* ?(BPF\w+)$')
capture = p.match(self.line)
if not capture:
raise NoSyscallCommandFound
- end_re = re.compile(' \* ?NOTES$')
+ end_re = re.compile(r' \* ?NOTES$')
end = end_re.match(self.line)
if end:
raise NoSyscallCommandFound
# - Same as above, with "const" and/or "struct" in front of type
# - "..." (undefined number of arguments, for bpf_trace_printk())
# There is at least one term ("void"), and at most five arguments.
- p = re.compile(' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
+ p = re.compile(r' \* ?((.+) \**\w+\((((const )?(struct )?(\w+|\.\.\.)( \**\w+)?)(, )?){1,5}\))$')
capture = p.match(self.line)
if not capture:
raise NoHelperFound
return capture.group(1)
def parse_desc(self, proto):
- p = re.compile(' \* ?(?:\t| {5,8})Description$')
+ p = re.compile(r' \* ?(?:\t| {5,8})Description$')
capture = p.match(self.line)
if not capture:
raise Exception("No description section found for " + proto)
if self.line == ' *\n':
desc += '\n'
else:
- p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+ p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
desc_present = True
return desc
def parse_ret(self, proto):
- p = re.compile(' \* ?(?:\t| {5,8})Return$')
+ p = re.compile(r' \* ?(?:\t| {5,8})Return$')
capture = p.match(self.line)
if not capture:
raise Exception("No return section found for " + proto)
if self.line == ' *\n':
ret += '\n'
else:
- p = re.compile(' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
+ p = re.compile(r' \* ?(?:\t| {5,8})(?:\t| {8})(.*)')
capture = p.match(self.line)
if capture:
ret_present = True
self.seek_to('enum bpf_cmd {',
'Could not find start of bpf_cmd enum', 0)
# Searches for either one or more BPF\w+ enums
- bpf_p = re.compile('\s*(BPF\w+)+')
+ bpf_p = re.compile(r'\s*(BPF\w+)+')
# Searches for an enum entry assigned to another entry,
# for e.g. BPF_PROG_RUN = BPF_PROG_TEST_RUN, which is
# not documented hence should be skipped in check to
# determine if the right number of syscalls are documented
- assign_p = re.compile('\s*(BPF\w+)\s*=\s*(BPF\w+)')
+ assign_p = re.compile(r'\s*(BPF\w+)\s*=\s*(BPF\w+)')
bpf_cmd_str = ''
while True:
capture = assign_p.match(self.line)
break
self.line = self.reader.readline()
# Find the number of occurences of BPF\w+
- self.enum_syscalls = re.findall('(BPF\w+)+', bpf_cmd_str)
+ self.enum_syscalls = re.findall(r'(BPF\w+)+', bpf_cmd_str)
def parse_desc_helpers(self):
self.seek_to(helpersDocStart,
self.seek_to('#define ___BPF_FUNC_MAPPER(FN, ctx...)',
'Could not find start of eBPF helper definition list')
# Searches for one FN(\w+) define or a backslash for newline
- p = re.compile('\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
+ p = re.compile(r'\s*FN\((\w+), (\d+), ##ctx\)|\\\\')
fn_defines_str = ''
i = 0
while True:
break
self.line = self.reader.readline()
# Find the number of occurences of FN(\w+)
- self.define_unique_helpers = re.findall('FN\(\w+, \d+, ##ctx\)', fn_defines_str)
+ self.define_unique_helpers = re.findall(r'FN\(\w+, \d+, ##ctx\)', fn_defines_str)
def validate_helpers(self):
last_helper = ''
try:
cmd = ['git', 'log', '-1', '--pretty=format:%cs', '--no-patch',
'-L',
- '/{}/,/\*\//:include/uapi/linux/bpf.h'.format(delimiter)]
+ '/{}/,/\\*\\//:include/uapi/linux/bpf.h'.format(delimiter)]
date = subprocess.run(cmd, cwd=linuxRoot,
capture_output=True, check=True)
return date.stdout.decode().rstrip()
programs that are compatible with the GNU Privacy License (GPL).
In order to use such helpers, the eBPF program must be loaded with the correct
-license string passed (via **attr**) to the **bpf**\ () system call, and this
+license string passed (via **attr**) to the **bpf**\\ () system call, and this
generally translates into the C source code of the program containing a line
similar to the following:
* The bpftool utility can be used to probe the availability of helper functions
on the system (as well as supported program and map types, and a number of
other parameters). To do so, run **bpftool feature probe** (see
- **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
+ **bpftool-feature**\\ (8) for details). Add the **unprivileged** keyword to
list features available to unprivileged users.
Compatibility between helper functions and program types can generally be found
requirement for GPL license is also in those **struct bpf_func_proto**.
Compatibility between helper functions and map types can be found in the
-**check_map_func_compatibility**\ () function in file *kernel/bpf/verifier.c*.
+**check_map_func_compatibility**\\ () function in file *kernel/bpf/verifier.c*.
Helper functions that invalidate the checks on **data** and **data_end**
pointers for network processing are listed in function
-**bpf_helper_changes_pkt_data**\ () in file *net/core/filter.c*.
+**bpf_helper_changes_pkt_data**\\ () in file *net/core/filter.c*.
SEE ALSO
========
-**bpf**\ (2),
-**bpftool**\ (8),
-**cgroups**\ (7),
-**ip**\ (8),
-**perf_event_open**\ (2),
-**sendmsg**\ (2),
-**socket**\ (7),
-**tc-bpf**\ (8)'''
+**bpf**\\ (2),
+**bpftool**\\ (8),
+**cgroups**\\ (7),
+**ip**\\ (8),
+**perf_event_open**\\ (2),
+**sendmsg**\\ (2),
+**socket**\\ (7),
+**tc-bpf**\\ (8)'''
print(footer)
def print_proto(self, helper):
one_arg = '{}{}'.format(comma, a['type'])
if a['name']:
if a['star']:
- one_arg += ' {}**\ '.format(a['star'].replace('*', '\\*'))
+ one_arg += ' {}**\\ '.format(a['star'].replace('*', '\\*'))
else:
one_arg += '** '
one_arg += '*{}*\\ **'.format(a['name'])
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
#
-# A depmod wrapper used by the toplevel Makefile
+# A depmod wrapper
-if test $# -ne 2; then
- echo "Usage: $0 /sbin/depmod <kernelrelease>" >&2
+if test $# -ne 1; then
+ echo "Usage: $0 <kernelrelease>" >&2
exit 1
fi
-DEPMOD=$1
-KERNELRELEASE=$2
+
+KERNELRELEASE=$1
+
+: ${DEPMOD:=depmod}
if ! test -r System.map ; then
echo "Warning: modules_install: missing 'System.map' file. Skipping depmod." >&2
exit 0
fi
-# older versions of depmod require the version string to start with three
-# numbers, so we cheat with a symlink here
-depmod_hack_needed=true
-tmp_dir=$(mktemp -d ${TMPDIR:-/tmp}/depmod.XXXXXX)
-mkdir -p "$tmp_dir/lib/modules/$KERNELRELEASE"
-if "$DEPMOD" -b "$tmp_dir" $KERNELRELEASE 2>/dev/null; then
- if test -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep" -o \
- -e "$tmp_dir/lib/modules/$KERNELRELEASE/modules.dep.bin"; then
- depmod_hack_needed=false
- fi
-fi
-rm -rf "$tmp_dir"
-if $depmod_hack_needed; then
- symlink="$INSTALL_MOD_PATH/lib/modules/99.98.$KERNELRELEASE"
- ln -s "$KERNELRELEASE" "$symlink"
- KERNELRELEASE=99.98.$KERNELRELEASE
-fi
-
set -- -ae -F System.map
if test -n "$INSTALL_MOD_PATH"; then
set -- "$@" -b "$INSTALL_MOD_PATH"
fi
-"$DEPMOD" "$@" "$KERNELRELEASE"
-ret=$?
-
-if $depmod_hack_needed; then
- rm -f "$symlink"
-fi
-
-exit $ret
+exec "$DEPMOD" "$@" "$KERNELRELEASE"
fi
# For arch/powerpc/tools/gcc-check-mprofile-kernel.sh
- if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" &&
- arg_contain -mprofile-kernel "$@"; then
+ if arg_contain -m64 "$@" && arg_contain -mprofile-kernel "$@"; then
if ! test -t 0 && ! grep -q notrace; then
echo "_mcount"
fi
%_defconfig: $(obj)/conf
$(Q)$< $(silent) --defconfig=arch/$(SRCARCH)/configs/$@ $(Kconfig)
-configfiles=$(wildcard $(srctree)/kernel/configs/$@ $(srctree)/arch/$(SRCARCH)/configs/$@)
+configfiles = $(wildcard $(srctree)/kernel/configs/$(1) $(srctree)/arch/$(SRCARCH)/configs/$(1))
+all-config-fragments = $(call configfiles,*.config)
+config-fragments = $(call configfiles,$@)
%.config: $(obj)/conf
- $(if $(call configfiles),, $(error No configuration exists for this target on this architecture))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(configfiles)
+ $(if $(config-fragments),, $(error $@ fragment does not exists on this architecture))
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m .config $(config-fragments)
$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
PHONY += tinyconfig
# Help text used by make help
help:
+ @echo 'Configuration targets:'
@echo ' config - Update current config utilising a line-oriented program'
@echo ' nconfig - Update current config utilising a ncurses menu based program'
@echo ' menuconfig - Update current config utilising a menu based program'
@echo ' default value without prompting'
@echo ' tinyconfig - Configure the tiniest possible kernel'
@echo ' testconfig - Run Kconfig unit tests (requires python3 and pytest)'
+ @echo ''
+ @echo 'Configuration topic targets:'
+ @$(foreach f, $(all-config-fragments), \
+ if help=$$(grep -m1 '^# Help: ' $(f)); then \
+ printf ' %-25s - %s\n' '$(notdir $(f))' "$${help#*: }"; \
+ fi;)
# ===========================================================================
# object files used by all kconfig flavours
char *p, *p2;
struct symbol *sym;
int i, def_flags;
+ const char *warn_unknown;
+ const char *werror;
+ warn_unknown = getenv("KCONFIG_WARN_UNKNOWN_SYMBOLS");
+ werror = getenv("KCONFIG_WERROR");
if (name) {
in = zconf_fopen(name);
} else {
if (def == S_DEF_USER) {
sym = sym_find(line + 2 + strlen(CONFIG_));
if (!sym) {
+ if (warn_unknown)
+ conf_warning("unknown symbol: %s",
+ line + 2 + strlen(CONFIG_));
+
conf_set_changed(true);
continue;
}
sym = sym_find(line + strlen(CONFIG_));
if (!sym) {
- if (def == S_DEF_AUTO)
+ if (def == S_DEF_AUTO) {
/*
* Reading from include/config/auto.conf
* If CONFIG_FOO previously existed in
* include/config/FOO must be touched.
*/
conf_touch_dep(line + strlen(CONFIG_));
- else
+ } else {
+ if (warn_unknown)
+ conf_warning("unknown symbol: %s",
+ line + strlen(CONFIG_));
+
conf_set_changed(true);
+ }
continue;
}
}
free(line);
fclose(in);
+
+ if (conf_warnings && werror)
+ exit(1);
+
return 0;
}
struct list_head entries;
size_t offset;
struct menu *target;
- int index;
};
extern struct file *file_list;
struct menu *menu_get_parent_menu(struct menu *menu);
bool menu_has_help(struct menu *menu);
const char *menu_get_help(struct menu *menu);
+int get_jump_key_char(void);
struct gstr get_relations_str(struct symbol **sym_arr, struct list_head *head);
void menu_get_ext_help(struct menu *menu, struct gstr *help);
int dialog_yesno(const char *title, const char *prompt, int height, int width);
int dialog_msgbox(const char *title, const char *prompt, int height,
int width, int pause);
-
-
-typedef void (*update_text_fn)(char *buf, size_t start, size_t end, void
- *_data);
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
- int initial_width, int *keys, int *_vscroll, int *_hscroll,
- update_text_fn update_text, void *data);
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+ int initial_width, int *_vscroll, int *_hscroll,
+ int (*extra_key_cb)(int, size_t, size_t, void *), void *data);
int dialog_menu(const char *title, const char *prompt,
const void *selected, int *s_scroll);
int dialog_checklist(const char *title, const char *prompt, int height,
static int hscroll;
static int begin_reached, end_reached, page_length;
-static char *buf;
-static char *page;
+static const char *buf, *page;
+static size_t start, end;
/*
* Go back 'n' lines in text. Called by dialog_textbox().
/*
* Print a new page of text.
*/
-static void print_page(WINDOW *win, int height, int width, update_text_fn
- update_text, void *data)
+static void print_page(WINDOW *win, int height, int width)
{
int i, passed_end = 0;
- if (update_text) {
- char *end;
-
- for (i = 0; i < height; i++)
- get_line();
- end = page;
- back_lines(height);
- update_text(buf, page - buf, end - buf, data);
- }
-
page_length = 0;
for (i = 0; i < height; i++) {
print_line(win, i, width);
* refresh window content
*/
static void refresh_text_box(WINDOW *dialog, WINDOW *box, int boxh, int boxw,
- int cur_y, int cur_x, update_text_fn update_text,
- void *data)
+ int cur_y, int cur_x)
{
- print_page(box, boxh, boxw, update_text, data);
+ start = page - buf;
+
+ print_page(box, boxh, boxw);
print_position(dialog);
wmove(dialog, cur_y, cur_x); /* Restore cursor position */
wrefresh(dialog);
+
+ end = page - buf;
}
/*
* Display text from a file in a dialog box.
*
* keys is a null-terminated array
- * update_text() may not add or remove any '\n' or '\0' in tbuf
*/
-int dialog_textbox(const char *title, char *tbuf, int initial_height,
- int initial_width, int *keys, int *_vscroll, int *_hscroll,
- update_text_fn update_text, void *data)
+int dialog_textbox(const char *title, const char *tbuf, int initial_height,
+ int initial_width, int *_vscroll, int *_hscroll,
+ int (*extra_key_cb)(int, size_t, size_t, void *), void *data)
{
int i, x, y, cur_x, cur_y, key = 0;
int height, width, boxh, boxw;
/* Print first page of text */
attr_clear(box, boxh, boxw, dlg.dialog.atr);
- refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x, update_text,
- data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
while (!done) {
key = wgetch(dialog);
begin_reached = 1;
page = buf;
refresh_text_box(dialog, box, boxh, boxw,
- cur_y, cur_x, update_text,
- data);
+ cur_y, cur_x);
}
break;
case 'G': /* Last page */
/* point to last char in buf */
page = buf + strlen(buf);
back_lines(boxh);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case 'K': /* Previous line */
case 'k':
break;
back_lines(page_length + 1);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case 'B': /* Previous page */
case 'b':
if (begin_reached)
break;
back_lines(page_length + boxh);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case 'J': /* Next line */
case 'j':
break;
back_lines(page_length - 1);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case KEY_NPAGE: /* Next page */
case ' ':
break;
begin_reached = 0;
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case '0': /* Beginning of line */
case 'H': /* Scroll left */
hscroll--;
/* Reprint current page to scroll horizontally */
back_lines(page_length);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case 'L': /* Scroll right */
case 'l':
hscroll++;
/* Reprint current page to scroll horizontally */
back_lines(page_length);
- refresh_text_box(dialog, box, boxh, boxw, cur_y,
- cur_x, update_text, data);
+ refresh_text_box(dialog, box, boxh, boxw, cur_y, cur_x);
break;
case KEY_ESC:
if (on_key_esc(dialog) == KEY_ESC)
on_key_resize();
goto do_resize;
default:
- for (i = 0; keys[i]; i++) {
- if (key == keys[i]) {
- done = true;
- break;
- }
+ if (extra_key_cb && extra_key_cb(key, start, end, data)) {
+ done = true;
+ break;
}
}
}
#include "lkc.h"
#include "lxdialog/dialog.h"
-#define JUMP_NB 9
-
static const char mconf_readme[] =
"Overview\n"
"--------\n"
static int show_all_options;
static int save_and_exit;
static int silent;
+static int jump_key_char;
static void conf(struct menu *menu, struct menu *active_menu);
set_dialog_subtitles(subtitles);
}
-static int show_textbox_ext(const char *title, char *text, int r, int c, int
- *keys, int *vscroll, int *hscroll, update_text_fn
- update_text, void *data)
+static int show_textbox_ext(const char *title, const char *text, int r, int c,
+ int *vscroll, int *hscroll,
+ int (*extra_key_cb)(int, size_t, size_t, void *),
+ void *data)
{
dialog_clear();
- return dialog_textbox(title, text, r, c, keys, vscroll, hscroll,
- update_text, data);
+ return dialog_textbox(title, text, r, c, vscroll, hscroll,
+ extra_key_cb, data);
}
static void show_textbox(const char *title, const char *text, int r, int c)
{
- show_textbox_ext(title, (char *) text, r, c, (int []) {0}, NULL, NULL,
- NULL, NULL);
+ show_textbox_ext(title, text, r, c, NULL, NULL, NULL, NULL);
}
static void show_helptext(const char *title, const char *text)
struct search_data {
struct list_head *head;
- struct menu **targets;
- int *keys;
+ struct menu *target;
};
-static void update_text(char *buf, size_t start, size_t end, void *_data)
+static int next_jump_key(int key)
+{
+ if (key < '1' || key > '9')
+ return '1';
+
+ key++;
+
+ if (key > '9')
+ key = '1';
+
+ return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
{
struct search_data *data = _data;
struct jump_key *pos;
- int k = 0;
+ int index = 0;
+
+ if (key < '1' || key > '9')
+ return 0;
list_for_each_entry(pos, data->head, entries) {
- if (pos->offset >= start && pos->offset < end) {
- char header[4];
+ index = next_jump_key(index);
- if (k < JUMP_NB) {
- int key = '0' + (pos->index % JUMP_NB) + 1;
+ if (pos->offset < start)
+ continue;
- sprintf(header, "(%c)", key);
- data->keys[k] = key;
- data->targets[k] = pos->target;
- k++;
- } else {
- sprintf(header, " ");
- }
+ if (pos->offset >= end)
+ break;
- memcpy(buf + pos->offset, header, sizeof(header) - 1);
+ if (key == index) {
+ data->target = pos->target;
+ return 1;
}
}
- data->keys[k] = 0;
+
+ return 0;
+}
+
+int get_jump_key_char(void)
+{
+ jump_key_char = next_jump_key(jump_key_char);
+
+ return jump_key_char;
}
static void search_conf(void)
sym_arr = sym_re_search(dialog_input);
do {
LIST_HEAD(head);
- struct menu *targets[JUMP_NB];
- int keys[JUMP_NB + 1], i;
struct search_data data = {
.head = &head,
- .targets = targets,
- .keys = keys,
};
struct jump_key *pos, *tmp;
+ jump_key_char = 0;
res = get_relations_str(sym_arr, &head);
set_subtitle();
dres = show_textbox_ext("Search Results", str_get(&res), 0, 0,
- keys, &vscroll, &hscroll, &update_text,
- &data);
+ &vscroll, &hscroll,
+ handle_search_keys, &data);
again = false;
- for (i = 0; i < JUMP_NB && keys[i]; i++)
- if (dres == keys[i]) {
- conf(targets[i]->parent, targets[i]);
- again = true;
- }
+ if (dres >= '1' && dres <= '9') {
+ assert(data.target != NULL);
+ conf(data.target->parent, data.target);
+ again = true;
+ }
str_free(&res);
list_for_each_entry_safe(pos, tmp, &head, entries)
free(pos);
}
}
+int __attribute__((weak)) get_jump_key_char(void)
+{
+ return -1;
+}
+
static void get_prompt_str(struct gstr *r, struct property *prop,
struct list_head *head)
{
}
if (head && location) {
jump = xmalloc(sizeof(struct jump_key));
-
jump->target = location;
-
- if (list_empty(head))
- jump->index = 0;
- else
- jump->index = list_entry(head->prev, struct jump_key,
- entries)->index + 1;
-
list_add_tail(&jump->entries, head);
}
str_printf(r, " Location:\n");
- for (j = 4; --i >= 0; j += 2) {
+ for (j = 0; --i >= 0; j++) {
+ int jk = -1;
+ int indent = 2 * j + 4;
+
menu = submenu[i];
- if (jump && menu == location)
+ if (jump && menu == location) {
jump->offset = strlen(r->s);
- str_printf(r, "%*c-> %s", j, ' ', menu_get_prompt(menu));
+ jk = get_jump_key_char();
+ }
+
+ if (jk >= 0) {
+ str_printf(r, "(%c)", jk);
+ indent -= 3;
+ }
+
+ str_printf(r, "%*c-> %s", indent, ' ', menu_get_prompt(menu));
if (menu->sym) {
str_printf(r, " (%s [=%s])", menu->sym->name ?
menu->sym->name : "<choice>",
"Location:\n"
" -> Bus options (PCI, PCMCIA, EISA, ISA)\n"
" -> PCI support (PCI [ = y])\n"
-" -> PCI access mode (<choice> [ = y])\n"
+"(1) -> PCI access mode (<choice> [ = y])\n"
"Selects: LIBCRC32\n"
"Selected by: BAR\n"
"-----------------------------------------------------------------\n"
"o The 'Depends on:' line lists symbols that need to be defined for\n"
" this symbol to be visible and selectable in the menu.\n"
"o The 'Location:' lines tell, where in the menu structure this symbol\n"
-" is located. A location followed by a [ = y] indicates that this is\n"
-" a selectable menu item, and the current value is displayed inside\n"
-" brackets.\n"
+" is located.\n"
+" A location followed by a [ = y] indicates that this is\n"
+" a selectable menu item, and the current value is displayed inside\n"
+" brackets.\n"
+" Press the key in the (#) prefix to jump directly to that\n"
+" location. You will be returned to the current search results\n"
+" after exiting this new menu.\n"
"o The 'Selects:' line tells, what symbol will be automatically selected\n"
" if this symbol is selected (y or m).\n"
"o The 'Selected by' line tells what symbol has selected this symbol.\n"
static char *dialog_input_result;
static int dialog_input_result_len;
+static int jump_key_char;
+static void selected_conf(struct menu *menu, struct menu *active_menu);
static void conf(struct menu *menu);
static void conf_choice(struct menu *menu);
static void conf_string(struct menu *menu);
return 0;
}
+struct search_data {
+ struct list_head *head;
+ struct menu *target;
+};
+
+static int next_jump_key(int key)
+{
+ if (key < '1' || key > '9')
+ return '1';
+
+ key++;
+
+ if (key > '9')
+ key = '1';
+
+ return key;
+}
+
+static int handle_search_keys(int key, size_t start, size_t end, void *_data)
+{
+ struct search_data *data = _data;
+ struct jump_key *pos;
+ int index = 0;
+
+ if (key < '1' || key > '9')
+ return 0;
+
+ list_for_each_entry(pos, data->head, entries) {
+ index = next_jump_key(index);
+
+ if (pos->offset < start)
+ continue;
+
+ if (pos->offset >= end)
+ break;
+
+ if (key == index) {
+ data->target = pos->target;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int get_jump_key_char(void)
+{
+ jump_key_char = next_jump_key(jump_key_char);
+
+ return jump_key_char;
+}
static void search_conf(void)
{
struct gstr res;
struct gstr title;
char *dialog_input;
- int dres;
+ int dres, vscroll = 0, hscroll = 0;
+ bool again;
title = str_new();
str_printf( &title, "Enter (sub)string or regexp to search for "
dialog_input += strlen(CONFIG_);
sym_arr = sym_re_search(dialog_input);
- res = get_relations_str(sym_arr, NULL);
+
+ do {
+ LIST_HEAD(head);
+ struct search_data data = {
+ .head = &head,
+ .target = NULL,
+ };
+ jump_key_char = 0;
+ res = get_relations_str(sym_arr, &head);
+ dres = show_scroll_win_ext(main_window,
+ "Search Results", str_get(&res),
+ &vscroll, &hscroll,
+ handle_search_keys, &data);
+ again = false;
+ if (dres >= '1' && dres <= '9') {
+ assert(data.target != NULL);
+ selected_conf(data.target->parent, data.target);
+ again = true;
+ }
+ str_free(&res);
+ } while (again);
free(sym_arr);
- show_scroll_win(main_window,
- "Search Results", str_get(&res));
- str_free(&res);
str_free(&title);
}
static void conf(struct menu *menu)
{
+ selected_conf(menu, NULL);
+}
+
+static void selected_conf(struct menu *menu, struct menu *active_menu)
+{
struct menu *submenu = NULL;
struct symbol *sym;
- int res;
+ int i, res;
int current_index = 0;
int last_top_row = 0;
struct match_state match_state = {
if (!child_count)
break;
+ if (active_menu != NULL) {
+ for (i = 0; i < items_num; i++) {
+ struct mitem *mcur;
+
+ mcur = (struct mitem *) item_userptr(curses_menu_items[i]);
+ if ((struct menu *) mcur->usrptr == active_menu) {
+ current_index = i;
+ break;
+ }
+ }
+ active_menu = NULL;
+ }
+
show_menu(menu_get_prompt(menu), menu_instructions,
current_index, &last_top_row);
keypad((menu_win(curses_menu)), TRUE);
refresh();
}
-/* layman's scrollable window... */
void show_scroll_win(WINDOW *main_window,
const char *title,
const char *text)
{
+ (void)show_scroll_win_ext(main_window, title, (char *)text, NULL, NULL, NULL, NULL);
+}
+
+/* layman's scrollable window... */
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+ int *vscroll, int *hscroll,
+ extra_key_cb_fn extra_key_cb, void *data)
+{
int res;
int total_lines = get_line_no(text);
int x, y, lines, columns;
WINDOW *win;
WINDOW *pad;
PANEL *panel;
+ bool done = false;
+
+ if (hscroll)
+ start_x = *hscroll;
+ if (vscroll)
+ start_y = *vscroll;
getmaxyx(stdscr, lines, columns);
panel = new_panel(win);
/* handle scrolling */
- do {
-
+ while (!done) {
copywin(pad, win, start_y, start_x, 2, 2, text_lines,
text_cols, 0);
print_in_middle(win,
case 'l':
start_x++;
break;
+ default:
+ if (extra_key_cb) {
+ size_t start = (get_line(text, start_y) - text);
+ size_t end = (get_line(text, start_y + text_lines) - text);
+
+ if (extra_key_cb(res, start, end, data)) {
+ done = true;
+ break;
+ }
+ }
}
- if (res == 10 || res == 27 || res == 'q' ||
+ if (res == 0 || res == 10 || res == 27 || res == 'q' ||
res == KEY_F(F_HELP) || res == KEY_F(F_BACK) ||
res == KEY_F(F_EXIT))
break;
start_x = 0;
if (start_x >= total_cols-text_cols)
start_x = total_cols-text_cols;
- } while (res);
+ }
+ if (hscroll)
+ *hscroll = start_x;
+ if (vscroll)
+ *vscroll = start_y;
del_panel(panel);
delwin(win);
refresh_all_windows(main_window);
+ return res;
}
void set_colors(void);
+typedef int (*extra_key_cb_fn)(int, size_t, size_t, void *);
+
/* this changes the windows attributes !!! */
void print_in_middle(WINDOW *win, int y, int width, const char *str, int attrs);
int get_line_length(const char *line);
const char *title, const char *prompt,
const char *init, char **resultp, int *result_len);
void refresh_all_windows(WINDOW *main_window);
+int show_scroll_win_ext(WINDOW *main_window, const char *title, char *text,
+ int *vscroll, int *hscroll,
+ extra_key_cb_fn extra_key_cb, void *data);
void show_scroll_win(WINDOW *main_window,
const char *title,
const char *text);
p++;
}
+
+ if (new_argc >= FUNCTION_MAX_ARGS)
+ pperror("too many function arguments");
new_argv[new_argc++] = prev;
/*
libs=$2
bin=$3
-PKG="Qt5Core Qt5Gui Qt5Widgets"
+PKG5="Qt5Core Qt5Gui Qt5Widgets"
+PKG6="Qt6Core Qt6Gui Qt6Widgets"
if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
echo >&2 "*"
exit 1
fi
-if ${HOSTPKG_CONFIG} --exists $PKG; then
- ${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
- ${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
+if ${HOSTPKG_CONFIG} --exists $PKG6; then
+ ${HOSTPKG_CONFIG} --cflags ${PKG6} > ${cflags}
+ # Qt6 requires C++17.
+ echo -std=c++17 >> ${cflags}
+ ${HOSTPKG_CONFIG} --libs ${PKG6} > ${libs}
+ ${HOSTPKG_CONFIG} --variable=libexecdir Qt6Core > ${bin}
+ exit 0
+fi
+
+if ${HOSTPKG_CONFIG} --exists $PKG5; then
+ ${HOSTPKG_CONFIG} --cflags ${PKG5} > ${cflags}
+ ${HOSTPKG_CONFIG} --libs ${PKG5} > ${libs}
${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin}
exit 0
fi
echo >&2 "*"
-echo >&2 "* Could not find Qt5 via ${HOSTPKG_CONFIG}."
-echo >&2 "* Please install Qt5 and make sure it's in PKG_CONFIG_PATH"
-echo >&2 "* You need $PKG"
+echo >&2 "* Could not find Qt6 or Qt5 via ${HOSTPKG_CONFIG}."
+echo >&2 "* Please install Qt6 or Qt5 and make sure it's in PKG_CONFIG_PATH"
+echo >&2 "* You need $PKG6 for Qt6"
+echo >&2 "* You need $PKG5 for Qt5"
echo >&2 "*"
exit 1
*/
#include <QAction>
+#include <QActionGroup>
#include <QApplication>
#include <QCloseEvent>
#include <QDebug>
-#include <QDesktopWidget>
#include <QFileDialog>
#include <QLabel>
#include <QLayout>
#include <QMenu>
#include <QMenuBar>
#include <QMessageBox>
+#include <QRegularExpression>
+#include <QScreen>
#include <QToolBar>
#include <stdlib.h>
QString ConfigInfoView::print_filter(const QString &str)
{
- QRegExp re("[<>&\"\\n]");
+ QRegularExpression re("[<>&\"\\n]");
QString res = str;
for (int i = 0; (i = res.indexOf(re, i)) >= 0;) {
switch (res[i].toLatin1()) {
int width, height;
char title[256];
- QDesktopWidget *d = configApp->desktop();
snprintf(title, sizeof(title), "%s%s",
rootmenu.prompt->text,
""
);
setWindowTitle(title);
- width = configSettings->value("/window width", d->width() - 64).toInt();
- height = configSettings->value("/window height", d->height() - 64).toInt();
+ QRect g = configApp->primaryScreen()->geometry();
+ width = configSettings->value("/window width", g.width() - 64).toInt();
+ height = configSettings->value("/window height", g.height() - 64).toInt();
resize(width, height);
x = configSettings->value("/window x");
y = configSettings->value("/window y");
this, &ConfigMainWindow::goBack);
QAction *quitAction = new QAction("&Quit", this);
- quitAction->setShortcut(Qt::CTRL + Qt::Key_Q);
+ quitAction->setShortcut(Qt::CTRL | Qt::Key_Q);
connect(quitAction, &QAction::triggered,
this, &ConfigMainWindow::close);
QAction *loadAction = new QAction(QPixmap(xpm_load), "&Load", this);
- loadAction->setShortcut(Qt::CTRL + Qt::Key_L);
+ loadAction->setShortcut(Qt::CTRL | Qt::Key_L);
connect(loadAction, &QAction::triggered,
this, &ConfigMainWindow::loadConfig);
saveAction = new QAction(QPixmap(xpm_save), "&Save", this);
- saveAction->setShortcut(Qt::CTRL + Qt::Key_S);
+ saveAction->setShortcut(Qt::CTRL | Qt::Key_S);
connect(saveAction, &QAction::triggered,
this, &ConfigMainWindow::saveConfig);
connect(saveAsAction, &QAction::triggered,
this, &ConfigMainWindow::saveConfigAs);
QAction *searchAction = new QAction("&Find", this);
- searchAction->setShortcut(Qt::CTRL + Qt::Key_F);
+ searchAction->setShortcut(Qt::CTRL | Qt::Key_F);
connect(searchAction, &QAction::triggered,
this, &ConfigMainWindow::searchConfig);
singleViewAction = new QAction(QPixmap(xpm_single_view), "Single View", this);
e->accept();
return;
}
- QMessageBox mb("qconf", "Save configuration?", QMessageBox::Warning,
- QMessageBox::Yes | QMessageBox::Default, QMessageBox::No, QMessageBox::Cancel | QMessageBox::Escape);
- mb.setButtonText(QMessageBox::Yes, "&Save Changes");
- mb.setButtonText(QMessageBox::No, "&Discard Changes");
- mb.setButtonText(QMessageBox::Cancel, "Cancel Exit");
+
+ QMessageBox mb(QMessageBox::Icon::Warning, "qconf",
+ "Save configuration?");
+
+ QPushButton *yb = mb.addButton(QMessageBox::Yes);
+ QPushButton *db = mb.addButton(QMessageBox::No);
+ QPushButton *cb = mb.addButton(QMessageBox::Cancel);
+
+ yb->setText("&Save Changes");
+ db->setText("&Discard Changes");
+ cb->setText("Cancel Exit");
+
+ mb.setDefaultButton(yb);
+ mb.setEscapeButton(cb);
+
switch (mb.exec()) {
case QMessageBox::Yes:
if (saveConfig())
#include "../../include/linux/license.h"
#include "../../include/linux/module_symbol.h"
+static bool module_enabled;
/* Are we using CONFIG_MODVERSIONS? */
static bool modversions;
/* Is CONFIG_MODULE_SRCVERSION_ALL set? */
".fmt_slot*", /* EZchip */
".gnu.lto*",
".discard.*",
+ ".llvm.call-graph-profile", /* call graph */
NULL
};
const char *tosec = sec_name(elf, get_secindex(elf, sym));
const struct sectioncheck *mismatch;
- if (elf->export_symbol_secndx == fsecndx) {
+ if (module_enabled && elf->export_symbol_secndx == fsecndx) {
check_export_symbol(mod, elf, faddr, tosec, sym);
return;
}
tosec, taddr);
}
-static int addend_386_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_386_rel(uint32_t *location, unsigned int r_type)
{
- unsigned int r_typ = ELF_R_TYPE(r->r_info);
-
- switch (r_typ) {
+ switch (r_type) {
case R_386_32:
- r->r_addend = TO_NATIVE(*location);
- break;
+ return TO_NATIVE(*location);
case R_386_PC32:
- r->r_addend = TO_NATIVE(*location) + 4;
- break;
- default:
- r->r_addend = (Elf_Addr)(-1);
+ return TO_NATIVE(*location) + 4;
}
- return 0;
+
+ return (Elf_Addr)(-1);
}
#ifndef R_ARM_CALL
return (int32_t)(value << shift) >> shift;
}
-static int addend_arm_rel(void *loc, Elf_Sym *sym, Elf_Rela *r)
+static Elf_Addr addend_arm_rel(void *loc, Elf_Sym *sym, unsigned int r_type)
{
- unsigned int r_typ = ELF_R_TYPE(r->r_info);
uint32_t inst, upper, lower, sign, j1, j2;
int32_t offset;
- switch (r_typ) {
+ switch (r_type) {
case R_ARM_ABS32:
case R_ARM_REL32:
inst = TO_NATIVE(*(uint32_t *)loc);
- r->r_addend = inst + sym->st_value;
- break;
+ return inst + sym->st_value;
case R_ARM_MOVW_ABS_NC:
case R_ARM_MOVT_ABS:
inst = TO_NATIVE(*(uint32_t *)loc);
offset = sign_extend32(((inst & 0xf0000) >> 4) | (inst & 0xfff),
15);
- r->r_addend = offset + sym->st_value;
- break;
+ return offset + sym->st_value;
case R_ARM_PC24:
case R_ARM_CALL:
case R_ARM_JUMP24:
inst = TO_NATIVE(*(uint32_t *)loc);
offset = sign_extend32((inst & 0x00ffffff) << 2, 25);
- r->r_addend = offset + sym->st_value + 8;
- break;
+ return offset + sym->st_value + 8;
case R_ARM_THM_MOVW_ABS_NC:
case R_ARM_THM_MOVT_ABS:
upper = TO_NATIVE(*(uint16_t *)loc);
((lower & 0x7000) >> 4) |
(lower & 0x00ff),
15);
- r->r_addend = offset + sym->st_value;
- break;
+ return offset + sym->st_value;
case R_ARM_THM_JUMP19:
/*
* Encoding T3:
((upper & 0x03f) << 12) |
((lower & 0x07ff) << 1),
20);
- r->r_addend = offset + sym->st_value + 4;
- break;
+ return offset + sym->st_value + 4;
case R_ARM_THM_CALL:
case R_ARM_THM_JUMP24:
/*
((upper & 0x03ff) << 12) |
((lower & 0x07ff) << 1),
24);
- r->r_addend = offset + sym->st_value + 4;
- break;
- default:
- r->r_addend = (Elf_Addr)(-1);
+ return offset + sym->st_value + 4;
}
- return 0;
+
+ return (Elf_Addr)(-1);
}
-static int addend_mips_rel(uint32_t *location, Elf_Rela *r)
+static Elf_Addr addend_mips_rel(uint32_t *location, unsigned int r_type)
{
- unsigned int r_typ = ELF_R_TYPE(r->r_info);
uint32_t inst;
inst = TO_NATIVE(*location);
- switch (r_typ) {
+ switch (r_type) {
case R_MIPS_LO16:
- r->r_addend = inst & 0xffff;
- break;
+ return inst & 0xffff;
case R_MIPS_26:
- r->r_addend = (inst & 0x03ffffff) << 2;
- break;
+ return (inst & 0x03ffffff) << 2;
case R_MIPS_32:
- r->r_addend = inst;
- break;
- default:
- r->r_addend = (Elf_Addr)(-1);
+ return inst;
}
- return 0;
+ return (Elf_Addr)(-1);
}
#ifndef EM_RISCV
#define R_LARCH_SUB32 55
#endif
+static void get_rel_type_and_sym(struct elf_info *elf, uint64_t r_info,
+ unsigned int *r_type, unsigned int *r_sym)
+{
+ typedef struct {
+ Elf64_Word r_sym; /* Symbol index */
+ unsigned char r_ssym; /* Special symbol for 2nd relocation */
+ unsigned char r_type3; /* 3rd relocation type */
+ unsigned char r_type2; /* 2nd relocation type */
+ unsigned char r_type; /* 1st relocation type */
+ } Elf64_Mips_R_Info;
+
+ bool is_64bit = (elf->hdr->e_ident[EI_CLASS] == ELFCLASS64);
+
+ if (elf->hdr->e_machine == EM_MIPS && is_64bit) {
+ Elf64_Mips_R_Info *mips64_r_info = (void *)&r_info;
+
+ *r_type = mips64_r_info->r_type;
+ *r_sym = TO_NATIVE(mips64_r_info->r_sym);
+ return;
+ }
+
+ if (is_64bit) {
+ Elf64_Xword r_info64 = r_info;
+
+ r_info = TO_NATIVE(r_info64);
+ } else {
+ Elf32_Word r_info32 = r_info;
+
+ r_info = TO_NATIVE(r_info32);
+ }
+
+ *r_type = ELF_R_TYPE(r_info);
+ *r_sym = ELF_R_SYM(r_info);
+}
+
static void section_rela(struct module *mod, struct elf_info *elf,
Elf_Shdr *sechdr)
{
Elf_Rela *rela;
- Elf_Rela r;
- unsigned int r_sym;
unsigned int fsecndx = sechdr->sh_info;
const char *fromsec = sec_name(elf, fsecndx);
Elf_Rela *start = (void *)elf->hdr + sechdr->sh_offset;
return;
for (rela = start; rela < stop; rela++) {
- r.r_offset = TO_NATIVE(rela->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
- if (elf->hdr->e_machine == EM_MIPS) {
- unsigned int r_typ;
- r_sym = ELF64_MIPS_R_SYM(rela->r_info);
- r_sym = TO_NATIVE(r_sym);
- r_typ = ELF64_MIPS_R_TYPE(rela->r_info);
- r.r_info = ELF64_R_INFO(r_sym, r_typ);
- } else {
- r.r_info = TO_NATIVE(rela->r_info);
- r_sym = ELF_R_SYM(r.r_info);
- }
-#else
- r.r_info = TO_NATIVE(rela->r_info);
- r_sym = ELF_R_SYM(r.r_info);
-#endif
- r.r_addend = TO_NATIVE(rela->r_addend);
+ Elf_Addr taddr, r_offset;
+ unsigned int r_type, r_sym;
+
+ r_offset = TO_NATIVE(rela->r_offset);
+ get_rel_type_and_sym(elf, rela->r_info, &r_type, &r_sym);
+
+ taddr = TO_NATIVE(rela->r_addend);
+
switch (elf->hdr->e_machine) {
case EM_RISCV:
if (!strcmp("__ex_table", fromsec) &&
- ELF_R_TYPE(r.r_info) == R_RISCV_SUB32)
+ r_type == R_RISCV_SUB32)
continue;
break;
case EM_LOONGARCH:
if (!strcmp("__ex_table", fromsec) &&
- ELF_R_TYPE(r.r_info) == R_LARCH_SUB32)
+ r_type == R_LARCH_SUB32)
continue;
break;
}
check_section_mismatch(mod, elf, elf->symtab_start + r_sym,
- fsecndx, fromsec, r.r_offset, r.r_addend);
+ fsecndx, fromsec, r_offset, taddr);
}
}
Elf_Shdr *sechdr)
{
Elf_Rel *rel;
- Elf_Rela r;
- unsigned int r_sym;
unsigned int fsecndx = sechdr->sh_info;
const char *fromsec = sec_name(elf, fsecndx);
Elf_Rel *start = (void *)elf->hdr + sechdr->sh_offset;
for (rel = start; rel < stop; rel++) {
Elf_Sym *tsym;
+ Elf_Addr taddr = 0, r_offset;
+ unsigned int r_type, r_sym;
void *loc;
- r.r_offset = TO_NATIVE(rel->r_offset);
-#if KERNEL_ELFCLASS == ELFCLASS64
- if (elf->hdr->e_machine == EM_MIPS) {
- unsigned int r_typ;
- r_sym = ELF64_MIPS_R_SYM(rel->r_info);
- r_sym = TO_NATIVE(r_sym);
- r_typ = ELF64_MIPS_R_TYPE(rel->r_info);
- r.r_info = ELF64_R_INFO(r_sym, r_typ);
- } else {
- r.r_info = TO_NATIVE(rel->r_info);
- r_sym = ELF_R_SYM(r.r_info);
- }
-#else
- r.r_info = TO_NATIVE(rel->r_info);
- r_sym = ELF_R_SYM(r.r_info);
-#endif
- r.r_addend = 0;
+ r_offset = TO_NATIVE(rel->r_offset);
+ get_rel_type_and_sym(elf, rel->r_info, &r_type, &r_sym);
- loc = sym_get_data_by_offset(elf, fsecndx, r.r_offset);
+ loc = sym_get_data_by_offset(elf, fsecndx, r_offset);
tsym = elf->symtab_start + r_sym;
switch (elf->hdr->e_machine) {
case EM_386:
- addend_386_rel(loc, &r);
+ taddr = addend_386_rel(loc, r_type);
break;
case EM_ARM:
- addend_arm_rel(loc, tsym, &r);
+ taddr = addend_arm_rel(loc, tsym, r_type);
break;
case EM_MIPS:
- addend_mips_rel(loc, &r);
+ taddr = addend_mips_rel(loc, r_type);
break;
default:
fatal("Please add code to calculate addend for this architecture\n");
}
check_section_mismatch(mod, elf, tsym,
- fsecndx, fromsec, r.r_offset, r.r_addend);
+ fsecndx, fromsec, r_offset, taddr);
}
}
LIST_HEAD(dump_lists);
struct dump_list *dl, *dl2;
- while ((opt = getopt(argc, argv, "ei:mnT:to:au:WwENd:")) != -1) {
+ while ((opt = getopt(argc, argv, "ei:MmnT:to:au:WwENd:")) != -1) {
switch (opt) {
case 'e':
external_module = true;
dl->file = optarg;
list_add_tail(&dl->list, &dump_lists);
break;
+ case 'M':
+ module_enabled = true;
+ break;
case 'm':
modversions = true;
break;
#define ELF_R_TYPE ELF64_R_TYPE
#endif
-/* The 64-bit MIPS ELF ABI uses an unusual reloc format. */
-typedef struct
-{
- Elf32_Word r_sym; /* Symbol index */
- unsigned char r_ssym; /* Special symbol for 2nd relocation */
- unsigned char r_type3; /* 3rd relocation type */
- unsigned char r_type2; /* 2nd relocation type */
- unsigned char r_type1; /* 1st relocation type */
-} _Elf64_Mips_R_Info;
-
-typedef union
-{
- Elf64_Xword r_info_number;
- _Elf64_Mips_R_Info r_info_fields;
-} _Elf64_Mips_R_Info_union;
-
-#define ELF64_MIPS_R_SYM(i) \
- ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_sym)
-
-#define ELF64_MIPS_R_TYPE(i) \
- ((__extension__ (_Elf64_Mips_R_Info_union)(i)).r_info_fields.r_type1)
-
#if KERNEL_ELFDATA != HOST_ELFDATA
static inline void __endian(const void *src, void *dest, unsigned int size)
rm -rf $pdir
- (
- cd $srctree
- find . arch/$SRCARCH -maxdepth 1 -name Makefile\*
- find include scripts -type f -o -type l
- find arch/$SRCARCH -name Kbuild.platforms -o -name Platform
- find $(find arch/$SRCARCH -name include -o -name scripts -type d) -type f
- ) > debian/hdrsrcfiles
-
- {
- if is_enabled CONFIG_OBJTOOL; then
- echo tools/objtool/objtool
- fi
-
- find arch/$SRCARCH/include Module.symvers include scripts -type f
-
- if is_enabled CONFIG_GCC_PLUGINS; then
- find scripts/gcc-plugins -name \*.so
- fi
- } > debian/hdrobjfiles
-
- destdir=$pdir/usr/src/linux-headers-$version
- mkdir -p $destdir
- tar -c -f - -C $srctree -T debian/hdrsrcfiles | tar -xf - -C $destdir
- tar -c -f - -T debian/hdrobjfiles | tar -xf - -C $destdir
- rm -f debian/hdrsrcfiles debian/hdrobjfiles
-
- # copy .config manually to be where it's expected to be
- cp $KCONFIG_CONFIG $destdir/.config
+ "${srctree}/scripts/package/install-extmod-build" "${pdir}/usr/src/linux-headers-${version}"
mkdir -p $pdir/lib/modules/$version/
ln -s /usr/src/linux-headers-$version $pdir/lib/modules/$version/build
--- /dev/null
+#!/usr/bin/make -f
+# SPDX-License-Identifier: GPL-2.0-only
+
+include debian/rules.vars
+
+srctree ?= .
+
+ifneq (,$(filter-out parallel=1,$(filter parallel=%,$(DEB_BUILD_OPTIONS))))
+ NUMJOBS = $(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
+ MAKEFLAGS += -j$(NUMJOBS)
+endif
+
+.PHONY: binary binary-indep binary-arch
+binary: binary-arch binary-indep
+binary-indep: build-indep
+binary-arch: build-arch
+ $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+ KERNELRELEASE=$(KERNELRELEASE) \
+ run-command KBUILD_RUN_COMMAND=+$(srctree)/scripts/package/builddeb
+
+.PHONY: build build-indep build-arch
+build: build-arch build-indep
+build-indep:
+build-arch:
+ $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) \
+ KERNELRELEASE=$(KERNELRELEASE) \
+ $(shell $(srctree)/scripts/package/deb-build-option) \
+ olddefconfig all
+
+.PHONY: clean
+clean:
+ rm -rf debian/files debian/linux-*
+ $(MAKE) -f $(srctree)/Makefile ARCH=$(ARCH) clean
--- /dev/null
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+set -e
+
+destdir=${1}
+
+test -n "${srctree}"
+test -n "${SRCARCH}"
+
+is_enabled() {
+ grep -q "^$1=y" include/config/auto.conf
+}
+
+mkdir -p "${destdir}"
+
+(
+ cd "${srctree}"
+ echo Makefile
+ find "arch/${SRCARCH}" -maxdepth 1 -name 'Makefile*'
+ find include scripts -type f -o -type l
+ find "arch/${SRCARCH}" -name Kbuild.platforms -o -name Platform
+ find "$(find "arch/${SRCARCH}" -name include -o -name scripts -type d)" -type f
+) | tar -c -f - -C "${srctree}" -T - | tar -xf - -C "${destdir}"
+
+{
+ if is_enabled CONFIG_OBJTOOL; then
+ echo tools/objtool/objtool
+ fi
+
+ find "arch/${SRCARCH}/include" Module.symvers include scripts -type f
+
+ if is_enabled CONFIG_GCC_PLUGINS; then
+ find scripts/gcc-plugins -name '*.so'
+ fi
+} | tar -c -f - -T - | tar -xf - -C "${destdir}"
+
+# copy .config manually to be where it's expected to be
+cp "${KCONFIG_CONFIG}" "${destdir}/.config"
--- /dev/null
+# _arch is undefined if /usr/lib/rpm/platform/*/macros was not included.
+%{!?_arch: %define _arch dummy}
+%{!?make: %define make make}
+%define makeflags %{?_smp_mflags} ARCH=%{ARCH}
+%define __spec_install_post /usr/lib/rpm/brp-compress || :
+%define debug_package %{nil}
+
+Name: kernel
+Summary: The Linux Kernel
+Version: %(echo %{KERNELRELEASE} | sed -e 's/-/_/g')
+Release: %{pkg_release}
+License: GPL
+Group: System Environment/Kernel
+Vendor: The Linux Community
+URL: https://www.kernel.org
+Source0: linux.tar.gz
+Source1: config
+Source2: diff.patch
+Provides: kernel-%{KERNELRELEASE}
+BuildRequires: bc binutils bison dwarves
+BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
+BuildRequires: gcc make openssl openssl-devel perl python3 rsync
+
+%description
+The Linux Kernel, the operating system core itself
+
+%package headers
+Summary: Header files for the Linux kernel for use by glibc
+Group: Development/System
+Obsoletes: kernel-headers
+Provides: kernel-headers = %{version}
+%description headers
+Kernel-headers includes the C header files that specify the interface
+between the Linux kernel and userspace libraries and programs. The
+header files define structures and constants that are needed for
+building most standard programs and are also needed for rebuilding the
+glibc package.
+
+%if %{with_devel}
+%package devel
+Summary: Development package for building kernel modules to match the %{version} kernel
+Group: System Environment/Kernel
+AutoReqProv: no
+%description -n kernel-devel
+This package provides kernel headers and makefiles sufficient to build modules
+against the %{version} kernel package.
+%endif
+
+%prep
+%setup -q -n linux
+cp %{SOURCE1} .config
+patch -p1 < %{SOURCE2}
+
+%build
+%{make} %{makeflags} KERNELRELEASE=%{KERNELRELEASE} KBUILD_BUILD_VERSION=%{release}
+
+%install
+mkdir -p %{buildroot}/boot
+%ifarch ia64
+mkdir -p %{buildroot}/boot/efi
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/efi/vmlinuz-%{KERNELRELEASE}
+ln -s efi/vmlinuz-%{KERNELRELEASE} %{buildroot}/boot/
+%else
+cp $(%{make} %{makeflags} -s image_name) %{buildroot}/boot/vmlinuz-%{KERNELRELEASE}
+%endif
+%{make} %{makeflags} INSTALL_MOD_PATH=%{buildroot} modules_install
+%{make} %{makeflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
+cp System.map %{buildroot}/boot/System.map-%{KERNELRELEASE}
+cp .config %{buildroot}/boot/config-%{KERNELRELEASE}
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/build
+ln -fns /usr/src/kernels/%{KERNELRELEASE} %{buildroot}/lib/modules/%{KERNELRELEASE}/source
+%if %{with_devel}
+%{make} %{makeflags} run-command KBUILD_RUN_COMMAND='${srctree}/scripts/package/install-extmod-build %{buildroot}/usr/src/kernels/%{KERNELRELEASE}'
+%endif
+
+%clean
+rm -rf %{buildroot}
+
+%post
+if [ -x /sbin/installkernel -a -r /boot/vmlinuz-%{KERNELRELEASE} -a -r /boot/System.map-%{KERNELRELEASE} ]; then
+cp /boot/vmlinuz-%{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm
+cp /boot/System.map-%{KERNELRELEASE} /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/vmlinuz-%{KERNELRELEASE} /boot/System.map-%{KERNELRELEASE}
+/sbin/installkernel %{KERNELRELEASE} /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+rm -f /boot/.vmlinuz-%{KERNELRELEASE}-rpm /boot/.System.map-%{KERNELRELEASE}-rpm
+fi
+
+%preun
+if [ -x /sbin/new-kernel-pkg ]; then
+new-kernel-pkg --remove %{KERNELRELEASE} --rminitrd --initrdfile=/boot/initramfs-%{KERNELRELEASE}.img
+elif [ -x /usr/bin/kernel-install ]; then
+kernel-install remove %{KERNELRELEASE}
+fi
+
+%postun
+if [ -x /sbin/update-bootloader ]; then
+/sbin/update-bootloader --remove %{KERNELRELEASE}
+fi
+
+%files
+%defattr (-, root, root)
+/lib/modules/%{KERNELRELEASE}
+%exclude /lib/modules/%{KERNELRELEASE}/build
+%exclude /lib/modules/%{KERNELRELEASE}/source
+/boot/*
+
+%files headers
+%defattr (-, root, root)
+/usr/include
+
+%if %{with_devel}
+%files devel
+%defattr (-, root, root)
+/usr/src/kernels/%{KERNELRELEASE}
+/lib/modules/%{KERNELRELEASE}/build
+/lib/modules/%{KERNELRELEASE}/source
+%endif
EOF
fi
-cat <<EOF > debian/rules
-#!$(command -v $MAKE) -f
-
-srctree ?= .
-KERNELRELEASE = ${KERNELRELEASE}
-
-.PHONY: clean build build-arch build-indep binary binary-arch binary-indep
-
-build-indep:
-build-arch:
- \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
- KERNELRELEASE=\$(KERNELRELEASE) \
- \$(shell \$(srctree)/scripts/package/deb-build-option) \
- olddefconfig all
-
-build: build-arch
-
-binary-indep:
-binary-arch: build-arch
- \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
- KERNELRELEASE=\$(KERNELRELEASE) intdeb-pkg
-
-clean:
- rm -rf debian/files debian/linux-*
- \$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} clean
-
-binary: binary-arch
+cat <<EOF > debian/rules.vars
+ARCH := ${ARCH}
+KERNELRELEASE := ${KERNELRELEASE}
EOF
-chmod +x debian/rules
+
+cp "${srctree}/scripts/package/debian/rules" debian/
exit 0
# Patched for non-x86 by Opencon (L) 2002 <opencon@rio.skydome.net>
#
-# how we were called determines which rpms we build and how we build them
-if [ "$1" = prebuilt ]; then
- S=DEL
- MAKE="$MAKE -f $srctree/Makefile"
-else
- S=
-
- mkdir -p rpmbuild/SOURCES
- cp linux.tar.gz rpmbuild/SOURCES
- cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
- "${srctree}/scripts/package/gen-diff-patch" rpmbuild/SOURCES/diff.patch
-fi
-
if grep -q CONFIG_MODULES=y include/config/auto.conf; then
- M=
+echo '%define with_devel %{?_without_devel: 0} %{?!_without_devel: 1}'
else
- M=DEL
+echo '%define with_devel 0'
fi
-__KERNELRELEASE=$(echo $KERNELRELEASE | sed -e "s/-/_/g")
-EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
---exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
---exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
-
-# We can label the here-doc lines for conditional output to the spec file
-#
-# Labels:
-# $S: this line is enabled only when building source package
-# $M: this line is enabled only when CONFIG_MODULES is enabled
-sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
- Name: kernel
- Summary: The Linux Kernel
- Version: $__KERNELRELEASE
- Release: $(cat .version 2>/dev/null || echo 1)
- License: GPL
- Group: System Environment/Kernel
- Vendor: The Linux Community
- URL: https://www.kernel.org
-$S Source0: linux.tar.gz
-$S Source1: config
-$S Source2: diff.patch
- Provides: kernel-$KERNELRELEASE
-$S BuildRequires: bc binutils bison dwarves
-$S BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
-$S BuildRequires: gcc make openssl openssl-devel perl python3 rsync
-
- # $UTS_MACHINE as a fallback of _arch in case
- # /usr/lib/rpm/platform/*/macros was not included.
- %define _arch %{?_arch:$UTS_MACHINE}
- %define __spec_install_post /usr/lib/rpm/brp-compress || :
- %define debug_package %{nil}
-
- %description
- The Linux Kernel, the operating system core itself
-
- %package headers
- Summary: Header files for the Linux kernel for use by glibc
- Group: Development/System
- Obsoletes: kernel-headers
- Provides: kernel-headers = %{version}
- %description headers
- Kernel-headers includes the C header files that specify the interface
- between the Linux kernel and userspace libraries and programs. The
- header files define structures and constants that are needed for
- building most standard programs and are also needed for rebuilding the
- glibc package.
-
-$S$M %package devel
-$S$M Summary: Development package for building kernel modules to match the $__KERNELRELEASE kernel
-$S$M Group: System Environment/Kernel
-$S$M AutoReqProv: no
-$S$M %description -n kernel-devel
-$S$M This package provides kernel headers and makefiles sufficient to build modules
-$S$M against the $__KERNELRELEASE kernel package.
-$S$M
-$S %prep
-$S %setup -q -n linux
-$S cp %{SOURCE1} .config
-$S patch -p1 < %{SOURCE2}
-$S
-$S %build
-$S $MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}
-$S
- %install
- mkdir -p %{buildroot}/boot
- %ifarch ia64
- mkdir -p %{buildroot}/boot/efi
- cp \$($MAKE -s image_name) %{buildroot}/boot/efi/vmlinuz-$KERNELRELEASE
- ln -s efi/vmlinuz-$KERNELRELEASE %{buildroot}/boot/
- %else
- cp \$($MAKE -s image_name) %{buildroot}/boot/vmlinuz-$KERNELRELEASE
- %endif
-$M $MAKE %{?_smp_mflags} INSTALL_MOD_PATH=%{buildroot} modules_install
- $MAKE %{?_smp_mflags} INSTALL_HDR_PATH=%{buildroot}/usr headers_install
- cp System.map %{buildroot}/boot/System.map-$KERNELRELEASE
- cp .config %{buildroot}/boot/config-$KERNELRELEASE
-$S$M rm -f %{buildroot}/lib/modules/$KERNELRELEASE/build
-$S$M rm -f %{buildroot}/lib/modules/$KERNELRELEASE/source
-$S$M mkdir -p %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M tar cf - $EXCLUDES . | tar xf - -C %{buildroot}/usr/src/kernels/$KERNELRELEASE
-$S$M cd %{buildroot}/lib/modules/$KERNELRELEASE
-$S$M ln -sf /usr/src/kernels/$KERNELRELEASE build
-$S$M ln -sf /usr/src/kernels/$KERNELRELEASE source
-
- %clean
- rm -rf %{buildroot}
-
- %post
- if [ -x /sbin/installkernel -a -r /boot/vmlinuz-$KERNELRELEASE -a -r /boot/System.map-$KERNELRELEASE ]; then
- cp /boot/vmlinuz-$KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm
- cp /boot/System.map-$KERNELRELEASE /boot/.System.map-$KERNELRELEASE-rpm
- rm -f /boot/vmlinuz-$KERNELRELEASE /boot/System.map-$KERNELRELEASE
- /sbin/installkernel $KERNELRELEASE /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
- rm -f /boot/.vmlinuz-$KERNELRELEASE-rpm /boot/.System.map-$KERNELRELEASE-rpm
- fi
-
- %preun
- if [ -x /sbin/new-kernel-pkg ]; then
- new-kernel-pkg --remove $KERNELRELEASE --rminitrd --initrdfile=/boot/initramfs-$KERNELRELEASE.img
- elif [ -x /usr/bin/kernel-install ]; then
- kernel-install remove $KERNELRELEASE
- fi
-
- %postun
- if [ -x /sbin/update-bootloader ]; then
- /sbin/update-bootloader --remove $KERNELRELEASE
- fi
-
- %files
- %defattr (-, root, root)
-$M /lib/modules/$KERNELRELEASE
-$M %exclude /lib/modules/$KERNELRELEASE/build
-$M %exclude /lib/modules/$KERNELRELEASE/source
- /boot/*
-
- %files headers
- %defattr (-, root, root)
- /usr/include
-$S$M
-$S$M %files devel
-$S$M %defattr (-, root, root)
-$S$M /usr/src/kernels/$KERNELRELEASE
-$S$M /lib/modules/$KERNELRELEASE/build
-$S$M /lib/modules/$KERNELRELEASE/source
+cat<<EOF
+%define ARCH ${ARCH}
+%define KERNELRELEASE ${KERNELRELEASE}
+%define pkg_release $("${srctree}/init/build-version")
EOF
+
+cat "${srctree}/scripts/package/kernel.spec"
rm -rf include/ksym
find . -name '*.usyms' | xargs rm -f
+
+rm -f binkernel.spec
# SPDX-License-Identifier: GPL-2.0
#
# This scripts adds local version information from the version
-# control systems git, mercurial (hg) and subversion (svn).
+# control system git.
#
# If something goes wrong, send a mail the kernel build mailinglist
# (see MAINTAINERS) and CC Nico Schottelius
return
fi
- # If a localversion*' file and the corresponding annotated tag exist,
- # use it. This is the case in linux-next.
+ # mainline kernel: 6.2.0-rc5 -> v6.2-rc5
+ # stable kernel: 6.1.7 -> v6.1.7
+ version_tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+
+ # If a localversion* file exists, and the corresponding
+ # annotated tag exists and is an ancestor of HEAD, use
+ # it. This is the case in linux-next.
tag=${file_localversion#-}
- tag=$(git describe --exact-match --match=$tag $tag 2>/dev/null)
+ desc=
+ if [ -n "${tag}" ]; then
+ desc=$(git describe --match=$tag 2>/dev/null)
+ fi
+
+ # Otherwise, if a localversion* file exists, and the tag
+ # obtained by appending it to the tag derived from
+ # KERNELVERSION exists and is an ancestor of HEAD, use
+ # it. This is e.g. the case in linux-rt.
+ if [ -z "${desc}" ] && [ -n "${file_localversion}" ]; then
+ tag="${version_tag}${file_localversion}"
+ desc=$(git describe --match=$tag 2>/dev/null)
+ fi
# Otherwise, default to the annotated tag derived from KERNELVERSION.
- # mainline kernel: 6.2.0-rc5 -> v6.2-rc5
- # stable kernel: 6.1.7 -> v6.1.7
- if [ -z "${tag}" ]; then
- tag=v$(echo "${KERNELVERSION}" | sed -E 's/^([0-9]+\.[0-9]+)\.0(.*)$/\1\2/')
+ if [ -z "${desc}" ]; then
+ tag="${version_tag}"
+ desc=$(git describe --match=$tag 2>/dev/null)
fi
# If we are at the tagged commit, we ignore it because the version is
# well-defined.
- if [ -z "$(git describe --exact-match --match=$tag 2>/dev/null)" ]; then
+ if [ "${tag}" != "${desc}" ]; then
# If only the short version is requested, don't bother
# running further git commands
fi
# If we are past the tagged commit, we pretty print it.
# (like 6.1.0-14595-g292a089d78d3)
- if atag="$(git describe --match=$tag 2>/dev/null)"; then
- echo "$atag" | awk -F- '{printf("-%05d", $(NF-1))}'
+ if [ -n "${desc}" ]; then
+ echo "${desc}" | awk -F- '{printf("-%05d", $(NF-1))}'
fi
# Add -g and exactly 12 hex chars.
* @layers: Stack of layers, from the latest to the newest, implemented
* as a flexible array member (FAM).
*/
- struct landlock_layer layers[];
+ struct landlock_layer layers[] __counted_by(num_layers);
};
/**
int channel, unsigned long hwoff,
struct iov_iter *iter, unsigned long bytes)
{
- if (!copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
- bytes, iter))
+ if (copy_from_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+ bytes, iter) != bytes)
return -EFAULT;
return 0;
}
int channel, unsigned long hwoff,
struct iov_iter *iter, unsigned long bytes)
{
- if (!copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
- bytes, iter))
+ if (copy_to_iter(get_dma_ptr(substream->runtime, channel, hwoff),
+ bytes, iter) != bytes)
return -EFAULT;
return 0;
}
err = expand_var_event(event, 0, len, buf, in_kernel);
if (err < 0)
return err;
- if (len != newlen)
- memset(buf + len, 0, newlen - len);
+ if (len != newlen) {
+ if (in_kernel)
+ memset(buf + len, 0, newlen - len);
+ else if (clear_user((__force void __user *)buf + len,
+ newlen - len))
+ return -EFAULT;
+ }
return newlen;
}
EXPORT_SYMBOL(snd_seq_expand_var_event);
/* convert to word unit */
pos = (pos << 1) + rec->loop_start[voice];
count <<= 1;
- LOOP_WRITE(rec, pos, USER_SOCKPTR(NULL), count);
+ LOOP_WRITE(rec, pos, NULL, count);
return 0;
}
/* Initialize CS42L42 companion codec */
cs8409_i2c_bulk_write(cs42l42, cs42l42->init_seq, cs42l42->init_seq_num);
- usleep_range(30000, 35000);
+ msleep(CS42L42_INIT_TIMEOUT_MS);
/* Clear interrupts, by reading interrupt status registers */
cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs));
#define CS42L42_I2C_SLEEP_US (2000)
#define CS42L42_PDN_TIMEOUT_US (250000)
#define CS42L42_PDN_SLEEP_US (2000)
+#define CS42L42_INIT_TIMEOUT_MS (45)
#define CS42L42_FULL_SCALE_VOL_MASK (2)
#define CS42L42_FULL_SCALE_VOL_0DB (1)
#define CS42L42_FULL_SCALE_VOL_MINUS6DB (0)
}
}
+/* Forcibly assign NID 0x03 to HP while NID 0x02 to SPK */
+static void alc287_fixup_bind_dacs(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ struct alc_spec *spec = codec->spec;
+ static const hda_nid_t conn[] = { 0x02, 0x03 }; /* exclude 0x06 */
+ static const hda_nid_t preferred_pairs[] = {
+ 0x17, 0x02, 0x21, 0x03, 0
+ };
+
+ if (action != HDA_FIXUP_ACT_PRE_PROBE)
+ return;
+
+ snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn);
+ spec->gen.preferred_dacs = preferred_pairs;
+ spec->gen.auto_mute_via_amp = 1;
+ snd_hda_codec_write_cache(codec, 0x14, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
+ 0x0); /* Make sure 0x14 was disable */
+}
+
+
enum {
ALC269_FIXUP_GPIO2,
ALC269_FIXUP_SONY_VAIO,
ALC287_FIXUP_TAS2781_I2C,
ALC245_FIXUP_HP_MUTE_LED_COEFBIT,
ALC245_FIXUP_HP_X360_MUTE_LEDS,
+ ALC287_FIXUP_THINKPAD_I2S_SPK,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
.chained = true,
.chain_id = ALC245_FIXUP_HP_GPIO_LED
},
+ [ALC287_FIXUP_THINKPAD_I2S_SPK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc287_fixup_bind_dacs,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
{0x17, 0x90170111},
{0x19, 0x03a11030},
{0x21, 0x03211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0287, 0x17aa, "Lenovo", ALC287_FIXUP_THINKPAD_I2S_SPK,
+ {0x17, 0x90170110},
+ {0x19, 0x03a11030},
+ {0x21, 0x03211020}),
SND_HDA_PIN_QUIRK(0x10ec0286, 0x1025, "Acer", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE,
{0x12, 0x90a60130},
{0x17, 0x90170110},
return 0;
}
-static int tasdevice_hda_clamp(int val, int max)
-{
- if (val > max)
- val = max;
-
- if (val < 0)
- val = 0;
- return val;
-}
-
static int tasdevice_set_profile_id(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_value *ucontrol)
{
int max = tas_priv->rcabin.ncfgs - 1;
int val, ret = 0;
- val = tasdevice_hda_clamp(nr_profile, max);
+ val = clamp(nr_profile, 0, max);
if (tas_priv->rcabin.profile_cfg_id != val) {
tas_priv->rcabin.profile_cfg_id = val;
int max = tas_fw->nr_programs - 1;
int val, ret = 0;
- val = tasdevice_hda_clamp(nr_program, max);
+ val = clamp(nr_program, 0, max);
if (tas_priv->cur_prog != val) {
tas_priv->cur_prog = val;
int max = tas_fw->nr_configurations - 1;
int val, ret = 0;
- val = tasdevice_hda_clamp(nr_config, max);
+ val = clamp(nr_config, 0, max);
if (tas_priv->cur_conf != val) {
tas_priv->cur_conf = val;
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "82TL"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
}
},
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "HP"),
+ DMI_MATCH(DMI_BOARD_NAME, "8A3E"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "MECHREVO"),
DMI_MATCH(DMI_BOARD_NAME, "MRID6"),
}
/* used to clean the channel index found on RHR's MSB */
static int mchp_pdmc_process(struct snd_pcm_substream *substream,
int channel, unsigned long hwoff,
- struct iov_iter *buf, unsigned long bytes)
+ unsigned long bytes)
{
struct snd_pcm_runtime *runtime = substream->runtime;
u8 *dma_ptr = runtime->dma_area + hwoff +
tristate
depends on I2C
+config SND_SOC_WCD_CLASSH
+ tristate
+
config SND_SOC_WCD9335
tristate "WCD9335 Codec"
depends on SLIMBUS
select REGMAP_SLIMBUS
select REGMAP_IRQ
+ select SND_SOC_WCD_CLASSH
help
The WCD9335 is a standalone Hi-Fi audio CODEC IC, supports
Qualcomm Technologies, Inc. (QTI) multimedia solutions,
depends on SLIMBUS
select REGMAP_IRQ
select REGMAP_SLIMBUS
+ select SND_SOC_WCD_CLASSH
select SND_SOC_WCD_MBHC
depends on MFD_WCD934X || COMPILE_TEST
help
depends on SND_SOC_WCD938X_SDW
tristate
depends on SOUNDWIRE || !SOUNDWIRE
+ select SND_SOC_WCD_CLASSH
config SND_SOC_WCD938X_SDW
tristate "WCD9380/WCD9385 Codec - SDW"
snd-soc-twl6040-objs := twl6040.o
snd-soc-uda1334-objs := uda1334.o
snd-soc-uda1380-objs := uda1380.o
+snd-soc-wcd-classh-objs := wcd-clsh-v2.o
snd-soc-wcd-mbhc-objs := wcd-mbhc-v2.o
-snd-soc-wcd9335-objs := wcd-clsh-v2.o wcd9335.o
-snd-soc-wcd934x-objs := wcd-clsh-v2.o wcd934x.o
-snd-soc-wcd938x-objs := wcd938x.o wcd-clsh-v2.o
+snd-soc-wcd9335-objs := wcd9335.o
+snd-soc-wcd934x-objs := wcd934x.o
+snd-soc-wcd938x-objs := wcd938x.o
snd-soc-wcd938x-sdw-objs := wcd938x-sdw.o
snd-soc-wl1273-objs := wl1273.o
snd-soc-wm-adsp-objs := wm_adsp.o
obj-$(CONFIG_SND_SOC_TWL6040) += snd-soc-twl6040.o
obj-$(CONFIG_SND_SOC_UDA1334) += snd-soc-uda1334.o
obj-$(CONFIG_SND_SOC_UDA1380) += snd-soc-uda1380.o
+obj-$(CONFIG_SND_SOC_WCD_CLASSH) += snd-soc-wcd-classh.o
obj-$(CONFIG_SND_SOC_WCD_MBHC) += snd-soc-wcd-mbhc.o
obj-$(CONFIG_SND_SOC_WCD9335) += snd-soc-wcd9335.o
obj-$(CONFIG_SND_SOC_WCD934X) += snd-soc-wcd934x.o
};
static const struct snd_kcontrol_new cs35l45_dac_muxes[] = {
- SOC_DAPM_ENUM("DACPCM1 Source", cs35l45_dacpcm_enums[0]),
+ SOC_DAPM_ENUM("DACPCM Source", cs35l45_dacpcm_enums[0]),
};
static const struct snd_soc_dapm_widget cs35l45_dapm_widgets[] = {
SND_SOC_DAPM_MUX("DSP_RX7 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[6]),
SND_SOC_DAPM_MUX("DSP_RX8 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dsp_muxes[7]),
- SND_SOC_DAPM_MUX("DACPCM1 Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
+ SND_SOC_DAPM_MUX("DACPCM Source", SND_SOC_NOPM, 0, 0, &cs35l45_dac_muxes[0]),
SND_SOC_DAPM_OUT_DRV("AMP", SND_SOC_NOPM, 0, 0, NULL, 0),
{ "ASP_RX1", NULL, "ASP_EN" },
{ "ASP_RX2", NULL, "ASP_EN" },
- { "AMP", NULL, "DACPCM1 Source"},
+ { "AMP", NULL, "DACPCM Source"},
{ "AMP", NULL, "GLOBAL_EN"},
CS35L45_DSP_MUX_ROUTE("DSP_RX1"),
{"DSP1 Preload", NULL, "DSP1 Preloader"},
{"DSP1", NULL, "DSP1 Preloader"},
- CS35L45_DAC_MUX_ROUTE("DACPCM1"),
+ CS35L45_DAC_MUX_ROUTE("DACPCM"),
{ "SPK", NULL, "AMP"},
};
ret = regmap_read(cs35l45->regmap, CS35L45_DSP_VIRT2_MBOX_3, &mbox_val);
if (!ret && mbox_val)
- ret = cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
+ cs35l45_dsp_virt2_mbox3_irq_handle(cs35l45, mbox_val & CS35L45_MBOX3_CMD_MASK,
(mbox_val & CS35L45_MBOX3_DATA_MASK) >> CS35L45_MBOX3_DATA_SHIFT);
/* Handle DSP trace log IRQ */
switch (dev_id[0]) {
case 0x35A450:
+ case 0x35A460:
break;
default:
dev_err(cs35l45->dev, "Bad DEVID 0x%x\n", dev_id[0]);
{
unsigned int reg;
unsigned int val;
- int ret;
+ int read_ret, poll_ret;
if (cs35l56_base->rev < CS35L56_REVID_B0)
reg = CS35L56_DSP1_HALO_STATE_A1;
else
reg = CS35L56_DSP1_HALO_STATE;
- ret = regmap_read_poll_timeout(cs35l56_base->regmap, reg,
- val,
- (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
- CS35L56_HALO_STATE_POLL_US,
- CS35L56_HALO_STATE_TIMEOUT_US);
-
- if ((ret < 0) && (ret != -ETIMEDOUT)) {
- dev_err(cs35l56_base->dev, "Failed to read HALO_STATE: %d\n", ret);
- return ret;
- }
-
- if ((ret == -ETIMEDOUT) || (val != CS35L56_HALO_STATE_BOOT_DONE)) {
- dev_err(cs35l56_base->dev, "Firmware boot fail: HALO_STATE=%#x\n", val);
+ /*
+ * This can't be a regmap_read_poll_timeout() because cs35l56 will NAK
+ * I2C until it has booted which would terminate the poll
+ */
+ poll_ret = read_poll_timeout(regmap_read, read_ret,
+ (val < 0xFFFF) && (val >= CS35L56_HALO_STATE_BOOT_DONE),
+ CS35L56_HALO_STATE_POLL_US,
+ CS35L56_HALO_STATE_TIMEOUT_US,
+ false,
+ cs35l56_base->regmap, reg, &val);
+
+ if (poll_ret) {
+ dev_err(cs35l56_base->dev, "Firmware boot timed out(%d): HALO_STATE=%#x\n",
+ read_ret, val);
return -EIO;
}
// Don't use devm as we need to get against the MFD device
priv->mclk = clk_get_optional(cs42l43->dev, "mclk");
if (IS_ERR(priv->mclk)) {
- dev_err_probe(priv->dev, PTR_ERR(priv->mclk), "Failed to get mclk\n");
+ ret = PTR_ERR(priv->mclk);
+ dev_err_probe(priv->dev, ret, "Failed to get mclk\n");
goto err_pm;
}
{
struct snd_soc_jack *mic_jack = NULL;
struct snd_soc_jack *btn_jack = NULL;
- int *type = (int *)data;
+ int type;
- if (*type & SND_JACK_MICROPHONE)
- mic_jack = hs_jack;
- if (*type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
- SND_JACK_BTN_2 | SND_JACK_BTN_3))
- btn_jack = hs_jack;
+ if (hs_jack) {
+ type = *(int *)data;
+
+ if (type & SND_JACK_MICROPHONE)
+ mic_jack = hs_jack;
+ if (type & (SND_JACK_BTN_0 | SND_JACK_BTN_1 |
+ SND_JACK_BTN_2 | SND_JACK_BTN_3))
+ btn_jack = hs_jack;
+ }
return rt5645_set_jack_detect(component, hs_jack, mic_jack, btn_jack);
}
wcd_clsh_v2_set_hph_mode(comp, mode);
}
+EXPORT_SYMBOL_GPL(wcd_clsh_set_hph_mode);
static void wcd_clsh_set_flyback_current(struct snd_soc_component *comp,
int mode)
return 0;
}
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_set_state);
int wcd_clsh_ctrl_get_state(struct wcd_clsh_ctrl *ctrl)
{
return ctrl->state;
}
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_get_state);
struct wcd_clsh_ctrl *wcd_clsh_ctrl_alloc(struct snd_soc_component *comp,
int version)
return ctrl;
}
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_alloc);
void wcd_clsh_ctrl_free(struct wcd_clsh_ctrl *ctrl)
{
kfree(ctrl);
}
+EXPORT_SYMBOL_GPL(wcd_clsh_ctrl_free);
+
+MODULE_DESCRIPTION("WCD93XX Class-H driver");
+MODULE_LICENSE("GPL");
ret = avs_load_topology(component, filename);
kfree(filename);
+ if (ret == -ENOENT && !strncmp(mach->tplg_filename, "hda-", 4)) {
+ unsigned int vendor_id;
+
+ if (sscanf(mach->tplg_filename, "hda-%08x-tplg.bin", &vendor_id) != 1)
+ return ret;
+
+ if (((vendor_id >> 16) & 0xFFFF) == 0x8086)
+ mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+ "hda-8086-generic-tplg.bin");
+ else
+ mach->tplg_filename = devm_kasprintf(adev->dev, GFP_KERNEL,
+ "hda-generic-tplg.bin");
+
+ filename = kasprintf(GFP_KERNEL, "%s/%s", component->driver->topology_name_prefix,
+ mach->tplg_filename);
+ if (!filename)
+ return -ENOMEM;
+
+ dev_info(card->dev, "trying to load fallback topology %s\n", mach->tplg_filename);
+ ret = avs_load_topology(component, filename);
+ kfree(filename);
+ }
if (ret < 0)
return ret;
int snd_soc_pcm_component_copy(struct snd_pcm_substream *substream,
int channel, unsigned long pos,
- struct iov_iter *buf, unsigned long bytes)
+ struct iov_iter *iter, unsigned long bytes)
{
struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
struct snd_soc_component *component;
if (component->driver->copy)
return soc_component_ret(component,
component->driver->copy(component, substream,
- channel, pos, buf, bytes));
+ channel, pos, iter, bytes));
return -EINVAL;
}
static int dmaengine_copy(struct snd_soc_component *component,
struct snd_pcm_substream *substream,
int channel, unsigned long hwoff,
- struct iov_iter *buf, unsigned long bytes)
+ struct iov_iter *iter, unsigned long bytes)
{
struct snd_pcm_runtime *runtime = substream->runtime;
struct dmaengine_pcm *pcm = soc_component_to_pcm(component);
int (*process)(struct snd_pcm_substream *substream,
int channel, unsigned long hwoff,
- struct iov_iter *buf, unsigned long bytes) = pcm->config->process;
+ unsigned long bytes) = pcm->config->process;
bool is_playback = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
void *dma_ptr = runtime->dma_area + hwoff +
channel * (runtime->dma_bytes / runtime->channels);
if (is_playback)
- if (copy_from_iter(dma_ptr, bytes, buf) != bytes)
+ if (copy_from_iter(dma_ptr, bytes, iter) != bytes)
return -EFAULT;
if (process) {
- int ret = process(substream, channel, hwoff, buf, bytes);
+ int ret = process(substream, channel, hwoff, bytes);
if (ret < 0)
return ret;
}
if (!is_playback)
- if (copy_to_iter(dma_ptr, bytes, buf) != bytes)
+ if (copy_to_iter(dma_ptr, bytes, iter) != bytes)
return -EFAULT;
return 0;
static int stm32_sai_pcm_process_spdif(struct snd_pcm_substream *substream,
int channel, unsigned long hwoff,
- struct iov_iter *buf, unsigned long bytes)
+ unsigned long bytes)
{
struct snd_pcm_runtime *runtime = substream->runtime;
struct snd_soc_pcm_runtime *rtd = asoc_substream_to_rtd(substream);
if (!ep)
return;
- for (i = 0; i < ep->num_urbs; ++i) {
+ for (i = 0; i < NUM_URBS; ++i) {
ctx = &ep->urbs[i];
if (!ctx->urb)
break;
}
/* allocate URBs for an EP */
+/* the callers should handle allocation errors via free_midi_urbs() */
static int alloc_midi_urbs(struct snd_usb_midi2_endpoint *ep)
{
struct snd_usb_midi2_urb *ctx;
return -EIO;
if (ep->direction == STR_OUT) {
err = alloc_midi_urbs(ep);
- if (err)
+ if (err) {
+ free_midi_urbs(ep);
return err;
+ }
}
return 0;
}
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
#define perf_event_name(array, id) ({ \
const char *event_str = NULL; \
\
- if ((id) >= 0 && (id) < ARRAY_SIZE(array)) \
+ if ((id) < ARRAY_SIZE(array)) \
event_str = array[id]; \
event_str; \
})
LIB_DIR = ../lib/api
LIBS = $(LIB_DIR)/libapi.a
-CFLAGS += -Wall -Wextra -I../lib/
-LDFLAGS += $(LIBS)
+CFLAGS += -Wall -Wextra -I../lib/ -pthread
+LDFLAGS += $(LIBS) -pthread
all: $(TARGETS)
test_cgroup_storage \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32
+TEST_INST_SUBDIRS := no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
+TEST_INST_SUBDIRS += bpf_gcc
endif
ifneq ($(CLANG_CPUV4),)
TEST_GEN_PROGS += test_progs-cpuv4
+TEST_INST_SUBDIRS += cpuv4
endif
TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
# Delete partially updated (corrupted) files on error
.DELETE_ON_ERROR:
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ @for DIR in $(TEST_INST_SUBDIRS); do \
+ mkdir -p $(INSTALL_PATH)/$$DIR; \
+ rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
+ done
+endef
#include <linux/unistd.h>
#include <linux/mount.h>
#include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
static inline int sys_fsopen(const char *fsname, unsigned flags)
{
ASSERT_OK(err, "obj_pin");
/* cleanup */
- if (pin_opts.path_fd >= 0)
+ if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
close(pin_opts.path_fd);
if (old_cwd[0])
ASSERT_OK(chdir(old_cwd), "restore_cwd");
goto cleanup;
/* cleanup */
- if (get_opts.path_fd >= 0)
+ if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
close(get_opts.path_fd);
if (old_cwd[0])
ASSERT_OK(chdir(old_cwd), "restore_cwd");
#include "test_d_path_check_rdonly_mem.skel.h"
#include "test_d_path_check_types.skel.h"
+/* sys_close_range is not around for long time, so let's
+ * make sure we can call it on systems with older glibc
+ */
+#ifndef __NR_close_range
+#ifdef __alpha__
+#define __NR_close_range 546
+#else
+#define __NR_close_range 436
+#endif
+#endif
+
static int duration;
static struct {
fstat(indicatorfd, &fileStat);
out_close:
- /* triggers filp_close */
+ /* sys_close no longer triggers filp_close, but we can
+ * call sys_close_range instead which still does
+ */
+#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+
close(pipefd[0]);
close(pipefd[1]);
close(sockfd);
close(devfd);
close(localfd);
close(indicatorfd);
+
+#undef close
return ret;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "sk_storage_omem_uncharge.skel.h"
+
+void test_sk_storage_omem_uncharge(void)
+{
+ struct sk_storage_omem_uncharge *skel;
+ int sk_fd = -1, map_fd, err, value;
+ socklen_t optlen;
+
+ skel = sk_storage_omem_uncharge__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+ map_fd = bpf_map__fd(skel->maps.sk_storage);
+
+ /* A standalone socket not binding to addr:port,
+ * so nentns is not needed.
+ */
+ sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sk_fd, 0, "socket"))
+ goto done;
+
+ optlen = sizeof(skel->bss->cookie);
+ err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
+ goto done;
+
+ value = 0;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
+ goto done;
+
+ value = 0xdeadbeef;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
+ goto done;
+
+ err = sk_storage_omem_uncharge__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto done;
+
+ close(sk_fd);
+ sk_fd = -1;
+
+ ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
+ ASSERT_EQ(skel->bss->omem, 0, "omem");
+
+done:
+ sk_storage_omem_uncharge__destroy(skel);
+ if (sk_fd != -1)
+ close(sk_fd);
+}
__ret; \
})
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set wfds;
+ int r, eval;
+ socklen_t esize = sizeof(eval);
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+
+ r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+ if (r != 1)
+ return -1;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+ return -1;
+ if (eval != 0) {
+ errno = eval;
+ return -1;
+ }
+
+ return 0;
+}
+
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
if (p < 0)
goto close_cli;
+ if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
+ FAIL_ERRNO("poll_connect");
+ goto close_acc;
+ }
+
*v0 = p;
*v1 = c;
return 0;
+close_acc:
+ close(p);
close_cli:
close(c);
close_srv:
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
#define sk_reuse __sk_common.skc_reuse
+#define sk_cookie __sk_common.skc_cookie
#define s6_addr32 in6_u.u6_addr32
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+void *local_storage_ptr = NULL;
+void *sk_ptr = NULL;
+int cookie_found = 0;
+__u64 cookie = 0;
+__u32 omem = 0;
+
+void *bpf_rdonly_cast(void *, __u32) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_storage SEC(".maps");
+
+SEC("fexit/bpf_local_storage_destroy")
+int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+{
+ struct sock *sk;
+
+ if (local_storage_ptr != local_storage)
+ return 0;
+
+ sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
+ if (sk->sk_cookie.counter != cookie)
+ return 0;
+
+ cookie_found++;
+ omem = sk->sk_omem_alloc.counter;
+ local_storage_ptr = NULL;
+
+ return 0;
+}
+
+SEC("fentry/inet6_sock_destruct")
+int BPF_PROG(inet6_sock_destruct, struct sock *sk)
+{
+ int *value;
+
+ if (!cookie || sk->sk_cookie.counter != cookie)
+ return 0;
+
+ value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
+ if (value && *value == 0xdeadbeef) {
+ cookie_found++;
+ sk_ptr = sk;
+ local_storage_ptr = sk->sk_bpf_storage;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
LIBKVM += lib/io.c
LIBKVM += lib/kvm_util.c
LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
LIBKVM += lib/rbtree.c
LIBKVM += lib/sparsebit.c
LIBKVM += lib/test_util.c
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += guest_print_test
TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
-TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
TEST_GEN_PROGS_aarch64 += aarch64/psci_test
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
+TEST_GEN_PROGS_aarch64 += guest_print_test
+TEST_GEN_PROGS_aarch64 += get-reg-list
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += guest_print_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += get-reg-list
TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
TEST_GEN_PROGS_riscv += kvm_page_table_test
TEST_GEN_PROGS_riscv += set_memory_region_test
TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+SPLIT_TESTS += get-reg-list
+
TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD\
-fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
+ -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS))
TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS))
-include $(TEST_DEP_FILES)
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o
$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) cscope.*
+$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
uint64_t val;
vcpu_get_reg(vcpu, reg_id, &val);
- ASSERT_EQ(val, 0);
+ TEST_ASSERT_EQ(val, 0);
/*
* Expect the ioctl to succeed with no effect on the register
vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
vcpu_get_reg(vcpu, reg_id, &val);
- ASSERT_EQ(val, 0);
+ TEST_ASSERT_EQ(val, 0);
}
}
uint64_t val;
vcpu_get_reg(vcpu, reg_id, &val);
- ASSERT_EQ(val, 0);
+ TEST_ASSERT_EQ(val, 0);
r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
TEST_ASSERT(r < 0 && errno == EINVAL,
"unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
vcpu_get_reg(vcpu, reg_id, &val);
- ASSERT_EQ(val, 0);
+ TEST_ASSERT_EQ(val, 0);
}
}
*
* Copyright (c) 2021, Google LLC.
*/
-
#define _GNU_SOURCE
#include <stdlib.h>
xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
/* Make sure we are dealing with the correct timer IRQ */
- GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+ GUEST_ASSERT_EQ(intid, timer_irq);
/* Basic 'timer condition met' check */
- GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
- GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+ __GUEST_ASSERT(xcnt >= cval,
+ "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx",
+ xcnt, cval, xcnt_diff_us);
+ __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt);
WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
}
TIMER_TEST_ERR_MARGIN_US);
irq_iter = READ_ONCE(shared_data->nr_iter);
- GUEST_ASSERT_2(config_iter + 1 == irq_iter,
- config_iter + 1, irq_iter);
+ GUEST_ASSERT_EQ(config_iter + 1, irq_iter);
}
}
break;
case UCALL_ABORT:
sync_global_from_guest(vm, *shared_data);
- REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u",
- GUEST_ASSERT_ARG(uc, 0),
- GUEST_ASSERT_ARG(uc, 1),
- GUEST_ASSERT_ARG(uc, 2),
- vcpu_idx,
- shared_data->guest_stage,
- shared_data->nr_iter);
+ fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n",
+ vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+ REPORT_GUEST_ASSERT(uc);
break;
default:
TEST_FAIL("Unexpected guest exit\n");
static void guest_ss_handler(struct ex_regs *regs)
{
- GUEST_ASSERT_1(ss_idx < 4, ss_idx);
+ __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx);
ss_addr[ss_idx++] = regs->pc;
regs->pstate |= SPSR_SS;
}
/* Userspace disables Single Step when the end is nigh. */
asm volatile("iter_ss_end:\n");
- GUEST_ASSERT(bvr == w_bvr);
- GUEST_ASSERT(wvr == w_wvr);
+ GUEST_ASSERT_EQ(bvr, w_bvr);
+ GUEST_ASSERT_EQ(wvr, w_wvr);
}
GUEST_DONE();
}
vcpu_run(vcpu);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
*
* Copyright (C) 2020, Red Hat, Inc.
*
- * When attempting to migrate from a host with an older kernel to a host
- * with a newer kernel we allow the newer kernel on the destination to
- * list new registers with get-reg-list. We assume they'll be unused, at
- * least until the guest reboots, and so they're relatively harmless.
- * However, if the destination host with the newer kernel is missing
- * registers which the source host with the older kernel has, then that's
- * a regression in get-reg-list. This test checks for that regression by
- * checking the current list against a blessed list. We should never have
- * missing registers, but if new ones appear then they can probably be
- * added to the blessed list. A completely new blessed list can be created
- * by running the test with the --list command line argument.
- *
- * Note, the blessed list should be created from the oldest possible
- * kernel. We can't go older than v4.15, though, because that's the first
- * release to expose the ID system registers in KVM_GET_REG_LIST, see
- * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
- * from guests"). Also, one must use the --core-reg-fixup command line
- * option when running on an older kernel that doesn't include df205b5c6328
- * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
*/
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/wait.h>
#include "kvm_util.h"
#include "test_util.h"
#include "processor.h"
-static struct kvm_reg_list *reg_list;
-static __u64 *blessed_reg, blessed_n;
-
-struct reg_sublist {
- const char *name;
- long capability;
- int feature;
- bool finalize;
- __u64 *regs;
- __u64 regs_n;
- __u64 *rejects_set;
- __u64 rejects_set_n;
-};
-
struct feature_id_reg {
__u64 reg;
__u64 id_reg;
}
};
-struct vcpu_config {
- char *name;
- struct reg_sublist sublists[];
-};
-
-static struct vcpu_config *vcpu_configs[];
-static int vcpu_configs_n;
-
-#define for_each_sublist(c, s) \
- for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-
-#define for_each_reg(i) \
- for ((i) = 0; (i) < reg_list->n; ++(i))
-
-#define for_each_reg_filtered(i) \
- for_each_reg(i) \
- if (!filter_reg(reg_list->reg[i]))
-
-#define for_each_missing_reg(i) \
- for ((i) = 0; (i) < blessed_n; ++(i)) \
- if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \
- if (check_supported_feat_reg(vcpu, blessed_reg[i]))
-
-#define for_each_new_reg(i) \
- for_each_reg_filtered(i) \
- if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
-
-static const char *config_name(struct vcpu_config *c)
-{
- struct reg_sublist *s;
- int len = 0;
-
- if (c->name)
- return c->name;
-
- for_each_sublist(c, s)
- len += strlen(s->name) + 1;
-
- c->name = malloc(len);
-
- len = 0;
- for_each_sublist(c, s) {
- if (!strcmp(s->name, "base"))
- continue;
- strcat(c->name + len, s->name);
- len += strlen(s->name) + 1;
- c->name[len - 1] = '+';
- }
- c->name[len - 1] = '\0';
-
- return c->name;
-}
-
-static bool has_cap(struct vcpu_config *c, long capability)
-{
- struct reg_sublist *s;
-
- for_each_sublist(c, s)
- if (s->capability == capability)
- return true;
- return false;
-}
-
-static bool filter_reg(__u64 reg)
+bool filter_reg(__u64 reg)
{
/*
* DEMUX register presence depends on the host's CLIDR_EL1.
return false;
}
-static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
-{
- int i;
-
- for (i = 0; i < nr_regs; ++i)
- if (reg == regs[i])
- return true;
- return false;
-}
-
static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
{
int i, ret;
return true;
}
-static const char *str_with_index(const char *template, __u64 index)
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
{
- char *str, *p;
- int n;
+ return check_supported_feat_reg(vcpu, reg);
+}
- str = strdup(template);
- p = strstr(str, "##");
- n = sprintf(p, "%lld", index);
- strcat(p + n, strstr(template, "##") + 2);
+bool check_reject_set(int err)
+{
+ return err == EPERM;
+}
- return (const char *)str;
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int feature;
+
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
+ }
}
#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
#define CORE_SPSR_XX_NR_WORDS 2
#define CORE_FPREGS_XX_NR_WORDS 4
-static const char *core_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *core_id_to_str(const char *prefix, __u64 id)
{
__u64 core_off = id & ~REG_MASK, idx;
case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
KVM_REG_ARM_CORE_REG(regs.regs[30]):
idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
case KVM_REG_ARM_CORE_REG(regs.sp):
return "KVM_REG_ARM_CORE_REG(regs.sp)";
case KVM_REG_ARM_CORE_REG(regs.pc):
case KVM_REG_ARM_CORE_REG(spsr[0]) ...
KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
- TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
- TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx);
- return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
}
- TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id);
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
return NULL;
}
-static const char *sve_id_to_str(struct vcpu_config *c, __u64 id)
+static const char *sve_id_to_str(const char *prefix, __u64 id)
{
__u64 sve_off, n, i;
sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
- TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id);
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
switch (sve_off) {
case KVM_REG_ARM64_SVE_ZREG_BASE ...
KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
- "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id);
- return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
case KVM_REG_ARM64_SVE_PREG_BASE ...
KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
- "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id);
- return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
case KVM_REG_ARM64_SVE_FFR_BASE:
TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
- "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id);
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
return "KVM_REG_ARM64_SVE_FFR(0)";
}
return NULL;
}
-static void print_reg(struct vcpu_config *c, __u64 id)
+void print_reg(const char *prefix, __u64 id)
{
unsigned op0, op1, crn, crm, op2;
const char *reg_size = NULL;
TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
- "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id);
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
switch (id & KVM_REG_SIZE_MASK) {
case KVM_REG_SIZE_U8:
break;
default:
TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
- config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
}
switch (id & KVM_REG_ARM_COPROC_MASK) {
case KVM_REG_ARM_CORE:
- printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id));
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
break;
case KVM_REG_ARM_DEMUX:
TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
- "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id);
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
break;
crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
- "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id);
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
break;
case KVM_REG_ARM_FW:
TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
- "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id);
+ "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
break;
case KVM_REG_ARM_FW_FEAT_BMAP:
TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
- "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id);
+ "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
break;
case KVM_REG_ARM64_SVE:
- if (has_cap(c, KVM_CAP_ARM_SVE))
- printf("\t%s,\n", sve_id_to_str(c, id));
- else
- TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id);
+ printf("\t%s,\n", sve_id_to_str(prefix, id));
break;
default:
TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
- config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
- }
-}
-
-/*
- * Older kernels listed each 32-bit word of CORE registers separately.
- * For 64 and 128-bit registers we need to ignore the extra words. We
- * also need to fixup the sizes, because the older kernels stated all
- * registers were 64-bit, even when they weren't.
- */
-static void core_reg_fixup(void)
-{
- struct kvm_reg_list *tmp;
- __u64 id, core_off;
- int i;
-
- tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
-
- for (i = 0; i < reg_list->n; ++i) {
- id = reg_list->reg[i];
-
- if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
- tmp->reg[tmp->n++] = id;
- continue;
- }
-
- core_off = id & ~REG_MASK;
-
- switch (core_off) {
- case 0x52: case 0xd2: case 0xd6:
- /*
- * These offsets are pointing at padding.
- * We need to ignore them too.
- */
- continue;
- case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
- KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
- if (core_off & 3)
- continue;
- id &= ~KVM_REG_SIZE_MASK;
- id |= KVM_REG_SIZE_U128;
- tmp->reg[tmp->n++] = id;
- continue;
- case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
- case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
- id &= ~KVM_REG_SIZE_MASK;
- id |= KVM_REG_SIZE_U32;
- tmp->reg[tmp->n++] = id;
- continue;
- default:
- if (core_off & 1)
- continue;
- tmp->reg[tmp->n++] = id;
- break;
- }
+ prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
}
-
- free(reg_list);
- reg_list = tmp;
-}
-
-static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init)
-{
- struct reg_sublist *s;
-
- for_each_sublist(c, s)
- if (s->capability)
- init->features[s->feature / 32] |= 1 << (s->feature % 32);
-}
-
-static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c)
-{
- struct reg_sublist *s;
- int feature;
-
- for_each_sublist(c, s) {
- if (s->finalize) {
- feature = s->feature;
- vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
- }
- }
-}
-
-static void check_supported(struct vcpu_config *c)
-{
- struct reg_sublist *s;
-
- for_each_sublist(c, s) {
- if (!s->capability)
- continue;
-
- __TEST_REQUIRE(kvm_has_cap(s->capability),
- "%s: %s not available, skipping tests\n",
- config_name(c), s->name);
- }
-}
-
-static bool print_list;
-static bool print_filtered;
-static bool fixup_core_regs;
-
-static void run_test(struct vcpu_config *c)
-{
- struct kvm_vcpu_init init = { .target = -1, };
- int new_regs = 0, missing_regs = 0, i, n;
- int failed_get = 0, failed_set = 0, failed_reject = 0;
- struct kvm_vcpu *vcpu;
- struct kvm_vm *vm;
- struct reg_sublist *s;
-
- check_supported(c);
-
- vm = vm_create_barebones();
- prepare_vcpu_init(c, &init);
- vcpu = __vm_vcpu_add(vm, 0);
- aarch64_vcpu_setup(vcpu, &init);
- finalize_vcpu(vcpu, c);
-
- reg_list = vcpu_get_reg_list(vcpu);
-
- if (fixup_core_regs)
- core_reg_fixup();
-
- if (print_list || print_filtered) {
- putchar('\n');
- for_each_reg(i) {
- __u64 id = reg_list->reg[i];
- if ((print_list && !filter_reg(id)) ||
- (print_filtered && filter_reg(id)))
- print_reg(c, id);
- }
- putchar('\n');
- return;
- }
-
- /*
- * We only test that we can get the register and then write back the
- * same value. Some registers may allow other values to be written
- * back, but others only allow some bits to be changed, and at least
- * for ID registers set will fail if the value does not exactly match
- * what was returned by get. If registers that allow other values to
- * be written need to have the other values tested, then we should
- * create a new set of tests for those in a new independent test
- * executable.
- */
- for_each_reg(i) {
- uint8_t addr[2048 / 8];
- struct kvm_one_reg reg = {
- .id = reg_list->reg[i],
- .addr = (__u64)&addr,
- };
- bool reject_reg = false;
- int ret;
-
- ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
- if (ret) {
- printf("%s: Failed to get ", config_name(c));
- print_reg(c, reg.id);
- putchar('\n');
- ++failed_get;
- }
-
- /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
- for_each_sublist(c, s) {
- if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
- reject_reg = true;
- ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®);
- if (ret != -1 || errno != EPERM) {
- printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
- print_reg(c, reg.id);
- putchar('\n');
- ++failed_reject;
- }
- break;
- }
- }
-
- if (!reject_reg) {
- ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®);
- if (ret) {
- printf("%s: Failed to set ", config_name(c));
- print_reg(c, reg.id);
- putchar('\n');
- ++failed_set;
- }
- }
- }
-
- for_each_sublist(c, s)
- blessed_n += s->regs_n;
- blessed_reg = calloc(blessed_n, sizeof(__u64));
-
- n = 0;
- for_each_sublist(c, s) {
- for (i = 0; i < s->regs_n; ++i)
- blessed_reg[n++] = s->regs[i];
- }
-
- for_each_new_reg(i)
- ++new_regs;
-
- for_each_missing_reg(i)
- ++missing_regs;
-
- if (new_regs || missing_regs) {
- n = 0;
- for_each_reg_filtered(i)
- ++n;
-
- printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
- printf("%s: Number registers: %5lld (includes %lld filtered registers)\n",
- config_name(c), reg_list->n, reg_list->n - n);
- }
-
- if (new_regs) {
- printf("\n%s: There are %d new registers.\n"
- "Consider adding them to the blessed reg "
- "list with the following lines:\n\n", config_name(c), new_regs);
- for_each_new_reg(i)
- print_reg(c, reg_list->reg[i]);
- putchar('\n');
- }
-
- if (missing_regs) {
- printf("\n%s: There are %d missing registers.\n"
- "The following lines are missing registers:\n\n", config_name(c), missing_regs);
- for_each_missing_reg(i)
- print_reg(c, blessed_reg[i]);
- putchar('\n');
- }
-
- TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
- "%s: There are %d missing registers; "
- "%d registers failed get; %d registers failed set; %d registers failed reject",
- config_name(c), missing_regs, failed_get, failed_set, failed_reject);
-
- pr_info("%s: PASS\n", config_name(c));
- blessed_n = 0;
- free(blessed_reg);
- free(reg_list);
- kvm_vm_free(vm);
-}
-
-static void help(void)
-{
- struct vcpu_config *c;
- int i;
-
- printf(
- "\n"
- "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n"
- " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
- " '<selection>' may be\n");
-
- for (i = 0; i < vcpu_configs_n; ++i) {
- c = vcpu_configs[i];
- printf(
- " '%s'\n", config_name(c));
- }
-
- printf(
- "\n"
- " --list Print the register list rather than test it (requires --config)\n"
- " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
- " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n"
- "\n"
- );
-}
-
-static struct vcpu_config *parse_config(const char *config)
-{
- struct vcpu_config *c;
- int i;
-
- if (config[8] != '=')
- help(), exit(1);
-
- for (i = 0; i < vcpu_configs_n; ++i) {
- c = vcpu_configs[i];
- if (strcmp(config_name(c), &config[9]) == 0)
- break;
- }
-
- if (i == vcpu_configs_n)
- help(), exit(1);
-
- return c;
-}
-
-int main(int ac, char **av)
-{
- struct vcpu_config *c, *sel = NULL;
- int i, ret = 0;
- pid_t pid;
-
- for (i = 1; i < ac; ++i) {
- if (strcmp(av[i], "--core-reg-fixup") == 0)
- fixup_core_regs = true;
- else if (strncmp(av[i], "--config", 8) == 0)
- sel = parse_config(av[i]);
- else if (strcmp(av[i], "--list") == 0)
- print_list = true;
- else if (strcmp(av[i], "--list-filtered") == 0)
- print_filtered = true;
- else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
- help(), exit(0);
- else
- help(), exit(1);
- }
-
- if (print_list || print_filtered) {
- /*
- * We only want to print the register list of a single config.
- */
- if (!sel)
- help(), exit(1);
- }
-
- for (i = 0; i < vcpu_configs_n; ++i) {
- c = vcpu_configs[i];
- if (sel && c != sel)
- continue;
-
- pid = fork();
-
- if (!pid) {
- run_test(c);
- exit(0);
- } else {
- int wstatus;
- pid_t wpid = wait(&wstatus);
- TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
- if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
- ret = KSFT_FAIL;
- }
- }
-
- return ret;
}
/*
- * The current blessed list was primed with the output of kernel version
+ * The original blessed list was primed with the output of kernel version
* v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
*
* The blessed list is up to date with kernel version v6.4 (or so we hope)
*/
.regs_n = ARRAY_SIZE(pauth_generic_regs), \
}
-static struct vcpu_config vregs_config = {
+static struct vcpu_reg_list vregs_config = {
.sublists = {
BASE_SUBLIST,
VREGS_SUBLIST,
{0},
},
};
-static struct vcpu_config vregs_pmu_config = {
+static struct vcpu_reg_list vregs_pmu_config = {
.sublists = {
BASE_SUBLIST,
VREGS_SUBLIST,
{0},
},
};
-static struct vcpu_config sve_config = {
+static struct vcpu_reg_list sve_config = {
.sublists = {
BASE_SUBLIST,
SVE_SUBLIST,
{0},
},
};
-static struct vcpu_config sve_pmu_config = {
+static struct vcpu_reg_list sve_pmu_config = {
.sublists = {
BASE_SUBLIST,
SVE_SUBLIST,
{0},
},
};
-static struct vcpu_config pauth_config = {
+static struct vcpu_reg_list pauth_config = {
.sublists = {
BASE_SUBLIST,
VREGS_SUBLIST,
{0},
},
};
-static struct vcpu_config pauth_pmu_config = {
+static struct vcpu_reg_list pauth_pmu_config = {
.sublists = {
BASE_SUBLIST,
VREGS_SUBLIST,
},
};
-static struct vcpu_config *vcpu_configs[] = {
+struct vcpu_reg_list *vcpu_configs[] = {
&vregs_config,
&vregs_pmu_config,
&sve_config,
&pauth_config,
&pauth_pmu_config,
};
-static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
* hypercalls are properly masked or unmasked to the guest when disabled or
* enabled from the KVM userspace, respectively.
*/
-
#include <errno.h>
#include <linux/arm-smccc.h>
#include <asm/kvm.h>
switch (stage) {
case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
case TEST_STAGE_HVC_IFACE_FALSE_INFO:
- GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED,
- res.a0, hc_info->func_id, hc_info->arg1);
+ __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
break;
case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
- GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED,
- res.a0, hc_info->func_id, hc_info->arg1);
+ __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
break;
default:
- GUEST_ASSERT_1(0, stage);
+ GUEST_FAIL("Unexpected stage = %u", stage);
}
}
}
guest_test_hvc(false_hvc_info);
break;
default:
- GUEST_ASSERT_1(0, stage);
+ GUEST_FAIL("Unexpected stage = %u", stage);
}
GUEST_SYNC(stage);
guest_done = true;
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u",
- GUEST_ASSERT_ARG(uc, 0),
- GUEST_ASSERT_ARG(uc, 1),
- GUEST_ASSERT_ARG(uc, 2), stage);
+ REPORT_GUEST_ASSERT(uc);
break;
default:
TEST_FAIL("Unexpected guest exit\n");
* hugetlbfs with a hole). It checks that the expected handling method is
* called (e.g., uffd faults with the right address and write/read flag).
*/
-
#define _GNU_SOURCE
#include <linux/bitmap.h>
#include <fcntl.h>
static void no_dabt_handler(struct ex_regs *regs)
{
- GUEST_ASSERT_1(false, read_sysreg(far_el1));
+ GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1));
}
static void no_iabt_handler(struct ex_regs *regs)
{
- GUEST_ASSERT_1(false, regs->pc);
+ GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
}
static struct uffd_args {
TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
"The only expected UFFD mode is MISSING");
- ASSERT_EQ(addr, (uint64_t)args->hva);
+ TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
pr_debug("uffd fault: addr=%p write=%d\n",
(void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
hva = (void *)region->region.userspace_addr;
- ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+ TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
memcpy(hva, run->mmio.data, run->mmio.len);
events.mmio_exits += 1;
static void check_event_counts(struct test_desc *test)
{
- ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
- ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
- ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+ TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+ TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+ TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
}
static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
}
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
* host to inject a specific intid via a GUEST_SYNC call, and then checks that
* it received it.
*/
-
#include <asm/kvm.h>
#include <asm/kvm_para.h>
#include <sys/eventfd.h>
run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * The blessed list should be created from the oldest possible kernel.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+extern struct vcpu_reg_list *vcpu_configs[];
+extern int vcpu_configs_n;
+
+#define for_each_reg(i) \
+ for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_reg_filtered(i) \
+ for_each_reg(i) \
+ if (!filter_reg(reg_list->reg[i]))
+
+#define for_each_missing_reg(i) \
+ for ((i) = 0; (i) < blessed_n; ++(i)) \
+ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \
+ if (check_supported_reg(vcpu, blessed_reg[i]))
+
+#define for_each_new_reg(i) \
+ for_each_reg_filtered(i) \
+ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+#define for_each_present_blessed_reg(i) \
+ for_each_reg(i) \
+ if (find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+static const char *config_name(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int len = 0;
+
+ if (c->name)
+ return c->name;
+
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ strcat(c->name + len, s->name);
+ len += strlen(s->name) + 1;
+ c->name[len - 1] = '+';
+ }
+ c->name[len - 1] = '\0';
+
+ return c->name;
+}
+
+bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ return true;
+}
+
+bool __weak filter_reg(__u64 reg)
+{
+ return false;
+}
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+ int i;
+
+ for (i = 0; i < nr_regs; ++i)
+ if (reg == regs[i])
+ return true;
+ return false;
+}
+
+void __weak print_reg(const char *prefix, __u64 id)
+{
+ printf("\t0x%llx,\n", id);
+}
+
+bool __weak check_reject_set(int err)
+{
+ return true;
+}
+
+void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+}
+
+#ifdef __aarch64__
+static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+{
+ struct vcpu_reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
+}
+
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ struct kvm_vcpu_init init = { .target = -1, };
+ struct kvm_vcpu *vcpu;
+
+ prepare_vcpu_init(c, &init);
+ vcpu = __vm_vcpu_add(vm, 0);
+ aarch64_vcpu_setup(vcpu, &init);
+
+ return vcpu;
+}
+#else
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ return __vm_vcpu_add(vm, 0);
+}
+#endif
+
+static void check_supported(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (!s->capability)
+ continue;
+
+ __TEST_REQUIRE(kvm_has_cap(s->capability),
+ "%s: %s not available, skipping tests\n",
+ config_name(c), s->name);
+ }
+}
+
+static bool print_list;
+static bool print_filtered;
+
+static void run_test(struct vcpu_reg_list *c)
+{
+ int new_regs = 0, missing_regs = 0, i, n;
+ int failed_get = 0, failed_set = 0, failed_reject = 0;
+ int skipped_set = 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct vcpu_reg_sublist *s;
+
+ check_supported(c);
+
+ vm = vm_create_barebones();
+ vcpu = vcpu_config_get_vcpu(c, vm);
+ finalize_vcpu(vcpu, c);
+
+ reg_list = vcpu_get_reg_list(vcpu);
+
+ if (print_list || print_filtered) {
+ putchar('\n');
+ for_each_reg(i) {
+ __u64 id = reg_list->reg[i];
+ if ((print_list && !filter_reg(id)) ||
+ (print_filtered && filter_reg(id)))
+ print_reg(config_name(c), id);
+ }
+ putchar('\n');
+ return;
+ }
+
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
+ blessed_reg = calloc(blessed_n, sizeof(__u64));
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
+
+ /*
+ * We only test that we can get the register and then write back the
+ * same value. Some registers may allow other values to be written
+ * back, but others only allow some bits to be changed, and at least
+ * for ID registers set will fail if the value does not exactly match
+ * what was returned by get. If registers that allow other values to
+ * be written need to have the other values tested, then we should
+ * create a new set of tests for those in a new independent test
+ * executable.
+ *
+ * Only do the get/set tests on present, blessed list registers,
+ * since we don't know the capabilities of any new registers.
+ */
+ for_each_present_blessed_reg(i) {
+ uint8_t addr[2048 / 8];
+ struct kvm_one_reg reg = {
+ .id = reg_list->reg[i],
+ .addr = (__u64)&addr,
+ };
+ bool reject_reg = false, skip_reg = false;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
+ if (ret) {
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_get;
+ }
+
+ for_each_sublist(c, s) {
+ /* rejects_set registers are rejected for set operation */
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®);
+ if (ret != -1 || !check_reject_set(errno)) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
+ }
+
+ /* skips_set registers are skipped for set operation */
+ if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) {
+ skip_reg = true;
+ ++skipped_set;
+ break;
+ }
+ }
+
+ if (!reject_reg && !skip_reg) {
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
+ }
+ }
+
+ for_each_new_reg(i)
+ ++new_regs;
+
+ for_each_missing_reg(i)
+ ++missing_regs;
+
+ if (new_regs || missing_regs) {
+ n = 0;
+ for_each_reg_filtered(i)
+ ++n;
+
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld (includes %lld filtered registers)\n",
+ config_name(c), reg_list->n, reg_list->n - n);
+ }
+
+ if (new_regs) {
+ printf("\n%s: There are %d new registers.\n"
+ "Consider adding them to the blessed reg "
+ "list with the following lines:\n\n", config_name(c), new_regs);
+ for_each_new_reg(i)
+ print_reg(config_name(c), reg_list->reg[i]);
+ putchar('\n');
+ }
+
+ if (missing_regs) {
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
+ for_each_missing_reg(i)
+ print_reg(config_name(c), blessed_reg[i]);
+ putchar('\n');
+ }
+
+ TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+ "%s: There are %d missing registers; %d registers failed get; "
+ "%d registers failed set; %d registers failed reject; %d registers skipped set",
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set);
+
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_reg_list *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ "\n"
+ );
+}
+
+static struct vcpu_reg_list *parse_config(const char *config)
+{
+ struct vcpu_reg_list *c = NULL;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_reg_list *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct guest_vals {
+ uint64_t a;
+ uint64_t b;
+ uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST \
+TYPE(test_type_i64, I64, "%ld", int64_t) \
+TYPE(test_type_u64, U64u, "%lu", uint64_t) \
+TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \
+TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \
+TYPE(test_type_u32, U32u, "%u", uint32_t) \
+TYPE(test_type_x32, U32x, "0x%x", uint32_t) \
+TYPE(test_type_X32, U32X, "0x%X", uint32_t) \
+TYPE(test_type_int, INT, "%d", int) \
+TYPE(test_type_char, CHAR, "%c", char) \
+TYPE(test_type_str, STR, "'%s'", const char *) \
+TYPE(test_type_ptr, PTR, "%p", uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+ TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \
+static void fn(struct kvm_vcpu *vcpu, T a, T b) \
+{ \
+ char expected_printf[UCALL_BUFFER_LEN]; \
+ char expected_assert[UCALL_BUFFER_LEN]; \
+ \
+ snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+ snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+ vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \
+ sync_global_to_guest(vcpu->vm, vals); \
+ run_test(vcpu, expected_printf, expected_assert); \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+ BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+ TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+ while (1) {
+ switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T) \
+ case TYPE_##ext: \
+ GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \
+ __GUEST_ASSERT(vals.a == vals.b, \
+ ASSERT_FMT_##ext, vals.a, vals.b); \
+ break;
+ TYPE_LIST
+#undef TYPE
+ default:
+ GUEST_SYNC(vals.type);
+ }
+
+ GUEST_DONE();
+ }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info. Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+ int len_str = strlen(assert_msg);
+ int len_substr = strlen(expected_assert_msg);
+ int offset = len_str - len_substr;
+
+ TEST_ASSERT(len_substr <= len_str,
+ "Expected '%s' to be a substring of '%s'\n",
+ assert_msg, expected_assert_msg);
+
+ TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_assert_msg, &assert_msg[offset]);
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+ break;
+ case UCALL_PRINTF:
+ TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_printf, uc.buffer);
+ break;
+ case UCALL_ABORT:
+ ucall_abort(uc.buffer, expected_assert);
+ break;
+ case UCALL_DONE:
+ return;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+static void guest_code_limits(void)
+{
+ char test_str[UCALL_BUFFER_LEN + 10];
+
+ memset(test_str, 'a', sizeof(test_str));
+ test_str[sizeof(test_str) - 1] = 0;
+
+ GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+ "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n",
+ uc.cmd, UCALL_ABORT);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ test_type_i64(vcpu, -1, -1);
+ test_type_i64(vcpu, -1, 1);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+ test_type_int(vcpu, -1, -1);
+ test_type_int(vcpu, -1, 1);
+ test_type_int(vcpu, 1, 1);
+
+ test_type_char(vcpu, 'a', 'a');
+ test_type_char(vcpu, 'a', 'A');
+ test_type_char(vcpu, 'a', 'b');
+
+ test_type_str(vcpu, "foo", "foo");
+ test_type_str(vcpu, "foo", "bar");
+
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ kvm_vm_free(vm);
+
+ test_limits();
+
+ return 0;
+}
case PHYSICAL:
return read_sysreg(cntpct_el0);
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
/* We should not reach here */
write_sysreg(cval, cntp_cval_el0);
break;
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
isb();
case PHYSICAL:
return read_sysreg(cntp_cval_el0);
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
/* We should not reach here */
write_sysreg(tval, cntp_tval_el0);
break;
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
isb();
write_sysreg(ctl, cntp_ctl_el0);
break;
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
isb();
case PHYSICAL:
return read_sysreg(cntp_ctl_el0);
default:
- GUEST_ASSERT_1(0, timer);
+ GUEST_FAIL("Unexpected timer type = %u", timer);
}
/* We should not reach here */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
#include <linux/kernel.h>
#include <linux/kvm.h>
#include "linux/rbtree.h"
+#include <linux/types.h>
#include <asm/atomic.h>
uint32_t memslots[NR_MEM_REGIONS];
};
+struct vcpu_reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+ __u64 *skips_set;
+ __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+ char *name;
+ struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
#define kvm_for_each_vcpu(vm, i, vcpu) \
for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
KVM_REG_RISCV_TIMER_REG(name), \
KVM_REG_SIZE_U64)
+#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \
+ idx, KVM_REG_SIZE_ULONG)
+
/* L3 index Bit[47:39] */
#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
#define PGTBL_L3_INDEX_SHIFT 39
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UCALL,
+ uc, 0, 0, 0, 0, 0);
+}
+
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-#define ASSERT_EQ(a, b) do { \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- TEST_ASSERT(__a == __b, \
- "ASSERT_EQ(%s, %s) failed.\n" \
- "\t%s is %#lx\n" \
- "\t%s is %#lx", \
- #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \
+ "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
} while (0)
#define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \
return num;
}
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
+char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
#ifndef SELFTEST_KVM_UCALL_COMMON_H
#define SELFTEST_KVM_UCALL_COMMON_H
#include "test_util.h"
+#include "ucall.h"
/* Common ucalls */
enum {
UCALL_NONE,
UCALL_SYNC,
UCALL_ABORT,
+ UCALL_PRINTF,
UCALL_DONE,
UCALL_UNHANDLED,
};
#define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
struct ucall {
uint64_t cmd;
uint64_t args[UCALL_MAX_ARGS];
+ char buffer[UCALL_BUFFER_LEN];
/* Host virtual address of this struct. */
struct ucall *hva;
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
void ucall(uint64_t cmd, int nargs, ...);
+void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+ unsigned int line, const char *fmt, ...);
uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
/*
* Perform userspace call without any associated data. This bare call avoids
#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
enum guest_assert_builtin_args {
GUEST_ERROR_STRING,
GUEST_FILE,
GUEST_ASSERT_BUILTIN_NARGS
};
-#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \
-do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \
- "Failed guest assert: " _condstr, \
- __FILE__, __LINE__, ##_args); \
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \
+do { \
+ if (!(_condition)) \
+ ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \
} while (0)
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT(_condition, #_condition, 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT(_condition, #_condition, 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4))
-
-#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b)
+#define __GUEST_ASSERT(_condition, _fmt, _args...) \
+ ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
-#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \
- TEST_FAIL("%s at %s:%ld\n" fmt, \
- (const char *)(_ucall).args[GUEST_ERROR_STRING], \
- (const char *)(_ucall).args[GUEST_FILE], \
- (_ucall).args[GUEST_LINE], \
- ##_args)
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition)
-#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i])
+#define GUEST_FAIL(_fmt, _args...) \
+ ucall_assert(UCALL_ABORT, "Unconditional guest failure", \
+ __FILE__, __LINE__, _fmt, ##_args)
-#define REPORT_GUEST_ASSERT(ucall) \
- __REPORT_GUEST_ASSERT((ucall), "")
-
-#define REPORT_GUEST_ASSERT_1(ucall, fmt) \
- __REPORT_GUEST_ASSERT((ucall), \
- fmt, \
- GUEST_ASSERT_ARG((ucall), 0))
-
-#define REPORT_GUEST_ASSERT_2(ucall, fmt) \
- __REPORT_GUEST_ASSERT((ucall), \
- fmt, \
- GUEST_ASSERT_ARG((ucall), 0), \
- GUEST_ASSERT_ARG((ucall), 1))
-
-#define REPORT_GUEST_ASSERT_3(ucall, fmt) \
- __REPORT_GUEST_ASSERT((ucall), \
- fmt, \
- GUEST_ASSERT_ARG((ucall), 0), \
- GUEST_ASSERT_ARG((ucall), 1), \
- GUEST_ASSERT_ARG((ucall), 2))
+#define GUEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
-#define REPORT_GUEST_ASSERT_4(ucall, fmt) \
- __REPORT_GUEST_ASSERT((ucall), \
- fmt, \
- GUEST_ASSERT_ARG((ucall), 0), \
- GUEST_ASSERT_ARG((ucall), 1), \
- GUEST_ASSERT_ARG((ucall), 2), \
- GUEST_ASSERT_ARG((ucall), 3))
+#define GUEST_ASSERT_NE(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
-#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \
- __REPORT_GUEST_ASSERT((ucall), fmt, ##args)
+#define REPORT_GUEST_ASSERT(ucall) \
+ test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \
+ (const char *)(ucall).args[GUEST_FILE], \
+ (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
#endif /* SELFTEST_KVM_UCALL_COMMON_H */
#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
if (READ_ONCE(host_quit))
return NULL;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
ret = _vcpu_run(vcpu);
ts_diff = timespec_elapsed(start);
/* Test the stage of KVM creating mappings */
*current_stage = KVM_CREATE_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
*current_stage = KVM_UPDATE_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
*current_stage = KVM_ADJUST_MAPPINGS;
- clock_gettime(CLOCK_MONOTONIC_RAW, &start);
+ clock_gettime(CLOCK_MONOTONIC, &start);
vcpus_complete_new_stage(*current_stage);
ts_diff = timespec_elapsed(start);
*/
#include "kvm_util.h"
-/*
- * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
- * VM), it must not be accessed from host code.
- */
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
{
write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
}
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- WRITE_ONCE(*ucall_exit_mmio_addr, uc);
-}
-
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do { \
+ GUEST_ASSERT(str < end); \
+ *str++ = (v); \
+} while (0)
+
+static int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) \
+({ \
+ int __res; \
+ \
+ __res = ((uint64_t) n) % (uint32_t) base; \
+ n = ((uint64_t) n) / (uint32_t) base; \
+ __res; \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+ int precision, int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /*
+ * locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters
+ */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ if (sign)
+ APPEND_BUFFER_SAFE(str, end, sign);
+ if (type & SPECIAL) {
+ if (base == 8)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ else if (base == 16) {
+ APPEND_BUFFER_SAFE(str, end, '0');
+ APPEND_BUFFER_SAFE(str, end, 'x');
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, c);
+ while (i < precision--)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ while (i-- > 0)
+ APPEND_BUFFER_SAFE(str, end, tmp[i]);
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+
+ return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+ char *str, *end;
+ const char *s;
+ uint64_t num;
+ int i, base;
+ int len;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /*
+ * min. # of digits for integers; max
+ * number of chars for from string
+ */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ end = buf + n;
+ GUEST_ASSERT(buf < end);
+ GUEST_ASSERT(n > 0);
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ APPEND_BUFFER_SAFE(str, end,
+ (uint8_t)va_arg(args, int));
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ for (i = 0; i < len; ++i)
+ APPEND_BUFFER_SAFE(str, end, *s++);
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= SPECIAL | SMALL | ZEROPAD;
+ }
+ str = number(str, end,
+ (uint64_t)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ APPEND_BUFFER_SAFE(str, end, '%');
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ APPEND_BUFFER_SAFE(str, end, '%');
+ if (*fmt)
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, uint64_t);
+ else if (qualifier == 'h') {
+ num = (uint16_t)va_arg(args, int);
+ if (flags & SIGN)
+ num = (int16_t)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, uint32_t);
+ str = number(str, end, num, base, field_width, precision, flags);
+ }
+
+ GUEST_ASSERT(str < end);
+ *str = '\0';
+ return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+ va_list va;
+ int len;
+
+ va_start(va, fmt);
+ len = guest_vsnprintf(buf, n, fmt, va);
+ va_end(va);
+
+ return len;
+}
uint32_t nr_runnable_vcpus,
uint64_t extra_mem_pages)
{
+ uint64_t page_size = vm_guest_mode_params[mode].page_size;
uint64_t nr_pages;
TEST_ASSERT(nr_runnable_vcpus,
*/
nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
+ /* Account for the number of pages needed by ucall. */
+ nr_pages += ucall_nr_pages_required(page_size);
+
return vm_adjust_num_guest_pages(mode, nr_pages);
}
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
alignment = max(backing_src_pagesz, alignment);
- ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
+ TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
#include "kvm_util.h"
#include "processor.h"
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
return ret;
}
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
- KVM_RISCV_SELFTESTS_SBI_UCALL,
- uc, 0, 0, 0, 0, 0);
-}
-
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
*/
#include "kvm_util.h"
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
-void ucall_arch_do_ucall(vm_vaddr_t uc)
-{
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
-}
-
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
struct kvm_run *run = vcpu->run;
tmp = node_prev(s, nodep);
node_rm(s, nodep);
- nodep = NULL;
nodep = tmp;
reduction_performed = true;
*xs++ = c;
return s;
}
+
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
* Copyright (C) 2020, Google LLC.
*/
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
return num;
}
+
+char *strdup_printf(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+
+ va_start(ap, fmt);
+ vasprintf(&str, fmt, ap);
+ va_end(ap);
+
+ return str;
+}
struct ucall ucalls[KVM_MAX_VCPUS];
};
+int ucall_nr_pages_required(uint64_t page_size)
+{
+ return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
/*
* ucall_pool holds per-VM values (global data is duplicated by each VM), it
* must not be accessed from host code.
clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
}
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+ unsigned int line, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+ WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+ WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
void ucall(uint64_t cmd, int nargs, ...)
{
struct ucall *uc;
return true;
}
-void kvm_exit_unexpected_vector(uint32_t value)
-{
- ucall(UCALL_UNHANDLED, 1, value);
-}
-
void route_exception(struct ex_regs *regs)
{
typedef void(*handler)(struct ex_regs *);
if (kvm_fixup_exception(regs))
return;
- kvm_exit_unexpected_vector(regs->vector);
+ ucall_assert(UCALL_UNHANDLED,
+ "Unhandled exception in guest", __FILE__, __LINE__,
+ "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
}
void vm_init_descriptor_tables(struct kvm_vm *vm)
{
struct ucall uc;
- if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
- uint64_t vector = uc.args[0];
-
- TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)",
- vector);
- }
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ REPORT_GUEST_ASSERT(uc);
}
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
#define UCALL_PIO_PORT ((uint16_t)0x1000)
-void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-}
-
void ucall_arch_do_ucall(vm_vaddr_t uc)
{
- asm volatile("in %[port], %%al"
- : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
+ /*
+ * FIXME: Revert this hack (the entire commit that added it) once nVMX
+ * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
+ * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+ * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
+ * in particular is problematic) along with RDX and RDI (which are
+ * inputs), and clobber volatile GPRs. *sigh*
+ */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+ "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+ asm volatile("push %%rbp\n\t"
+ "push %%r15\n\t"
+ "push %%r14\n\t"
+ "push %%r13\n\t"
+ "push %%r12\n\t"
+ "push %%rbx\n\t"
+ "push %%rdx\n\t"
+ "push %%rdi\n\t"
+ "in %[port], %%al\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rbx\n\t"
+ "pop %%r12\n\t"
+ "pop %%r13\n\t"
+ "pop %%r14\n\t"
+ "pop %%r15\n\t"
+ "pop %%rbp\n\t"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+ HORRIFIC_L2_UCALL_CLOBBER_HACK);
}
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
static void run_vcpu(struct kvm_vcpu *vcpu)
{
vcpu_run(vcpu);
- ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
}
static void *vcpu_worker(void *data)
goto done;
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
uint64_t val = *(uint64_t *)ptr;
- GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
+ GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
*(uint64_t *)ptr = 0;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
+
+bool filter_reg(__u64 reg)
+{
+ /*
+ * Some ISA extensions are optional and not present on all host,
+ * but they can't be disabled through ISA_EXT registers when present.
+ * So, to make life easy, just filtering out these kind of registers.
+ */
+ switch (reg & ~REG_MASK) {
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool check_reject_set(int err)
+{
+ return err == EINVAL;
+}
+
+static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext)
+{
+ int ret;
+ unsigned long value;
+
+ ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value);
+ if (ret) {
+ printf("Failed to get ext %d", ext);
+ return false;
+ }
+
+ return !!value;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+
+ /*
+ * Disable all extensions which were enabled by default
+ * if they were available in the risc-v host.
+ */
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
+ __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
+
+ for_each_sublist(c, s) {
+ if (!s->feature)
+ continue;
+
+ /* Try to enable the desired extension */
+ __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1);
+
+ /* Double check whether the desired extension was enabled */
+ __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature),
+ "%s not available, skipping tests\n", s->name);
+ }
+}
+
+static const char *config_id_to_str(__u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_config */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ return "KVM_REG_RISCV_CONFIG_REG(isa)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+ return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
+ case KVM_REG_RISCV_CONFIG_REG(marchid):
+ return "KVM_REG_RISCV_CONFIG_REG(marchid)";
+ case KVM_REG_RISCV_CONFIG_REG(mimpid):
+ return "KVM_REG_RISCV_CONFIG_REG(mimpid)";
+ case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+ return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
+ }
+
+ /*
+ * Config regs would grow regularly with new pseudo reg added, so
+ * just show raw id to indicate a new pseudo config reg.
+ */
+ return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off);
+}
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_core */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CORE_REG(regs.pc):
+ return "KVM_REG_RISCV_CORE_REG(regs.pc)";
+ case KVM_REG_RISCV_CORE_REG(regs.ra):
+ return "KVM_REG_RISCV_CORE_REG(regs.ra)";
+ case KVM_REG_RISCV_CORE_REG(regs.sp):
+ return "KVM_REG_RISCV_CORE_REG(regs.sp)";
+ case KVM_REG_RISCV_CORE_REG(regs.gp):
+ return "KVM_REG_RISCV_CORE_REG(regs.gp)";
+ case KVM_REG_RISCV_CORE_REG(regs.tp):
+ return "KVM_REG_RISCV_CORE_REG(regs.tp)";
+ case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t0));
+ case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s0));
+ case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.a0));
+ case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2);
+ case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3);
+ case KVM_REG_RISCV_CORE_REG(mode):
+ return "KVM_REG_RISCV_CORE_REG(mode)";
+ }
+
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+#define RISCV_CSR_GENERAL(csr) \
+ "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_AIA(csr) \
+ "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+
+static const char *general_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_REG(sstatus):
+ return RISCV_CSR_GENERAL(sstatus);
+ case KVM_REG_RISCV_CSR_REG(sie):
+ return RISCV_CSR_GENERAL(sie);
+ case KVM_REG_RISCV_CSR_REG(stvec):
+ return RISCV_CSR_GENERAL(stvec);
+ case KVM_REG_RISCV_CSR_REG(sscratch):
+ return RISCV_CSR_GENERAL(sscratch);
+ case KVM_REG_RISCV_CSR_REG(sepc):
+ return RISCV_CSR_GENERAL(sepc);
+ case KVM_REG_RISCV_CSR_REG(scause):
+ return RISCV_CSR_GENERAL(scause);
+ case KVM_REG_RISCV_CSR_REG(stval):
+ return RISCV_CSR_GENERAL(stval);
+ case KVM_REG_RISCV_CSR_REG(sip):
+ return RISCV_CSR_GENERAL(sip);
+ case KVM_REG_RISCV_CSR_REG(satp):
+ return RISCV_CSR_GENERAL(satp);
+ case KVM_REG_RISCV_CSR_REG(scounteren):
+ return RISCV_CSR_GENERAL(scounteren);
+ }
+
+ TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off);
+ return NULL;
+}
+
+static const char *aia_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_aia_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_AIA_REG(siselect):
+ return RISCV_CSR_AIA(siselect);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+ return RISCV_CSR_AIA(iprio1);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+ return RISCV_CSR_AIA(iprio2);
+ case KVM_REG_RISCV_CSR_AIA_REG(sieh):
+ return RISCV_CSR_AIA(sieh);
+ case KVM_REG_RISCV_CSR_AIA_REG(siph):
+ return RISCV_CSR_AIA(siph);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+ return RISCV_CSR_AIA(iprio1h);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+ return RISCV_CSR_AIA(iprio2h);
+ }
+
+ TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off);
+ return NULL;
+}
+
+static const char *csr_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_CSR_GENERAL:
+ return general_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_AIA:
+ return aia_csr_id_to_str(reg_off);
+ }
+
+ TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype);
+ return NULL;
+}
+
+static const char *timer_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_timer */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ return "KVM_REG_RISCV_TIMER_REG(frequency)";
+ case KVM_REG_RISCV_TIMER_REG(time):
+ return "KVM_REG_RISCV_TIMER_REG(time)";
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ return "KVM_REG_RISCV_TIMER_REG(compare)";
+ case KVM_REG_RISCV_TIMER_REG(state):
+ return "KVM_REG_RISCV_TIMER_REG(state)";
+ }
+
+ TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+static const char *fp_f_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_f_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_F_REG(f[0]) ...
+ KVM_REG_RISCV_FP_F_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_F_REG(fcsr):
+ return "KVM_REG_RISCV_FP_F_REG(fcsr)";
+ }
+
+ TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+static const char *fp_d_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_d_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_D_REG(f[0]) ...
+ KVM_REG_RISCV_FP_D_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_D_REG(fcsr):
+ return "KVM_REG_RISCV_FP_D_REG(fcsr)";
+ }
+
+ TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+static const char *isa_ext_id_to_str(__u64 id)
+{
+ /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+
+ static const char * const kvm_isa_ext_reg_name[] = {
+ "KVM_RISCV_ISA_EXT_A",
+ "KVM_RISCV_ISA_EXT_C",
+ "KVM_RISCV_ISA_EXT_D",
+ "KVM_RISCV_ISA_EXT_F",
+ "KVM_RISCV_ISA_EXT_H",
+ "KVM_RISCV_ISA_EXT_I",
+ "KVM_RISCV_ISA_EXT_M",
+ "KVM_RISCV_ISA_EXT_SVPBMT",
+ "KVM_RISCV_ISA_EXT_SSTC",
+ "KVM_RISCV_ISA_EXT_SVINVAL",
+ "KVM_RISCV_ISA_EXT_ZIHINTPAUSE",
+ "KVM_RISCV_ISA_EXT_ZICBOM",
+ "KVM_RISCV_ISA_EXT_ZICBOZ",
+ "KVM_RISCV_ISA_EXT_ZBB",
+ "KVM_RISCV_ISA_EXT_SSAIA",
+ "KVM_RISCV_ISA_EXT_V",
+ "KVM_RISCV_ISA_EXT_SVNAPOT",
+ "KVM_RISCV_ISA_EXT_ZBA",
+ "KVM_RISCV_ISA_EXT_ZBS",
+ "KVM_RISCV_ISA_EXT_ZICNTR",
+ "KVM_RISCV_ISA_EXT_ZICSR",
+ "KVM_RISCV_ISA_EXT_ZIFENCEI",
+ "KVM_RISCV_ISA_EXT_ZIHPM",
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) {
+ /*
+ * isa_ext regs would grow regularly with new isa extension added, so
+ * just show "reg" to indicate a new extension.
+ */
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+ }
+
+ return kvm_isa_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_single_id_to_str(__u64 reg_off)
+{
+ /* reg_off is KVM_RISCV_SBI_EXT_ID */
+ static const char * const kvm_sbi_ext_reg_name[] = {
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL",
+ "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR",
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) {
+ /*
+ * sbi_ext regs would grow regularly with new sbi extension added, so
+ * just show "reg" to indicate a new extension.
+ */
+ return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
+ }
+
+ return kvm_sbi_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+ if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) {
+ /*
+ * sbi_ext regs would grow regularly with new sbi extension added, so
+ * just show "reg" to indicate a new extension.
+ */
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+ }
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off);
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off);
+ }
+
+ return NULL;
+}
+
+static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_SINGLE:
+ return sbi_ext_single_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
+ }
+
+ TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype);
+ return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV,
+ "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ }
+
+ switch (id & KVM_REG_RISCV_TYPE_MASK) {
+ case KVM_REG_RISCV_CONFIG:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
+ reg_size, config_id_to_str(id));
+ break;
+ case KVM_REG_RISCV_CORE:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
+ reg_size, core_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_CSR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n",
+ reg_size, csr_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_TIMER:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n",
+ reg_size, timer_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_F:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n",
+ reg_size, fp_f_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_D:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
+ reg_size, fp_d_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_ISA_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
+ reg_size, isa_ext_id_to_str(id));
+ break;
+ case KVM_REG_RISCV_SBI_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
+ reg_size, sbi_ext_id_to_str(prefix, id));
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix,
+ (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id);
+ }
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v6.5-rc3 and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0,
+};
+
+/*
+ * The skips_set list registers that should skip set test.
+ * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly.
+ */
+static __u64 base_skips_set[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+static __u64 h_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H,
+};
+
+static __u64 zicbom_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM,
+};
+
+static __u64 zicboz_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ,
+};
+
+static __u64 svpbmt_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT,
+};
+
+static __u64 sstc_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC,
+};
+
+static __u64 svinval_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL,
+};
+
+static __u64 zihintpause_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+};
+
+static __u64 zba_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA,
+};
+
+static __u64 zbb_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB,
+};
+
+static __u64 zbs_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS,
+};
+
+static __u64 zicntr_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR,
+};
+
+static __u64 zicsr_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR,
+};
+
+static __u64 zifencei_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI,
+};
+
+static __u64 zihpm_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM,
+};
+
+static __u64 aia_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA,
+};
+
+static __u64 fp_f_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F,
+};
+
+static __u64 fp_d_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D,
+};
+
+#define BASE_SUBLIST \
+ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
+ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
+#define H_REGS_SUBLIST \
+ {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),}
+#define ZICBOM_REGS_SUBLIST \
+ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define ZICBOZ_REGS_SUBLIST \
+ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
+#define SVPBMT_REGS_SUBLIST \
+ {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),}
+#define SSTC_REGS_SUBLIST \
+ {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),}
+#define SVINVAL_REGS_SUBLIST \
+ {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),}
+#define ZIHINTPAUSE_REGS_SUBLIST \
+ {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),}
+#define ZBA_REGS_SUBLIST \
+ {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),}
+#define ZBB_REGS_SUBLIST \
+ {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),}
+#define ZBS_REGS_SUBLIST \
+ {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),}
+#define ZICNTR_REGS_SUBLIST \
+ {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),}
+#define ZICSR_REGS_SUBLIST \
+ {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),}
+#define ZIFENCEI_REGS_SUBLIST \
+ {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),}
+#define ZIHPM_REGS_SUBLIST \
+ {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),}
+#define AIA_REGS_SUBLIST \
+ {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define FP_F_REGS_SUBLIST \
+ {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
+ .regs_n = ARRAY_SIZE(fp_f_regs),}
+#define FP_D_REGS_SUBLIST \
+ {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
+ .regs_n = ARRAY_SIZE(fp_d_regs),}
+
+static struct vcpu_reg_list h_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ H_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zicbom_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZICBOM_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zicboz_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZICBOZ_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list svpbmt_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVPBMT_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list sstc_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SSTC_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list svinval_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVINVAL_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zihintpause_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZIHINTPAUSE_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zba_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZBA_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zbb_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZBB_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zbs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZBS_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zicntr_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZICNTR_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zicsr_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZICSR_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zifencei_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZIFENCEI_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list zihpm_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ ZIHPM_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list aia_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ AIA_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list fp_f_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ FP_F_REGS_SUBLIST,
+ {0},
+ },
+};
+
+static struct vcpu_reg_list fp_d_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ FP_D_REGS_SUBLIST,
+ {0},
+ },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &h_config,
+ &zicbom_config,
+ &zicboz_config,
+ &svpbmt_config,
+ &sstc_config,
+ &svinval_config,
+ &zihintpause_config,
+ &zba_config,
+ &zbb_config,
+ &zbs_config,
+ &zicntr_config,
+ &zicsr_config,
+ &zifencei_config,
+ &zihpm_config,
+ &aia_config,
+ &fp_f_config,
+ &fp_d_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
/* GET_CMMA_BITS without CMMA enabled should fail */
rc = vm_get_cmma_bits(vm, 0, &errno_out);
- ASSERT_EQ(rc, -1);
- ASSERT_EQ(errno_out, ENXIO);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, ENXIO);
enable_cmma(vm);
vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
/* GET_CMMA_BITS without migration mode and without peeking should fail */
rc = vm_get_cmma_bits(vm, 0, &errno_out);
- ASSERT_EQ(rc, -1);
- ASSERT_EQ(errno_out, EINVAL);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
/* GET_CMMA_BITS without migration mode and with peeking should work */
rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
- ASSERT_EQ(rc, 0);
- ASSERT_EQ(errno_out, 0);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(errno_out, 0);
enable_dirty_tracking(vm);
enable_migration_mode(vm);
/* GET_CMMA_BITS with invalid flags */
rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
- ASSERT_EQ(rc, -1);
- ASSERT_EQ(errno_out, EINVAL);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
kvm_vm_free(vm);
}
static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
{
- ASSERT_EQ(vcpu->run->exit_reason, 13);
- ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
- ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
- ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
}
static void test_migration_mode(void)
/* enabling migration mode on a VM without memory should fail */
rc = __enable_migration_mode(vm);
- ASSERT_EQ(rc, -1);
- ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
errno = 0;
/* migration mode when memslots have dirty tracking off should fail */
rc = __enable_migration_mode(vm);
- ASSERT_EQ(rc, -1);
- ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
errno = 0;
/* enabling migration mode should work now */
rc = __enable_migration_mode(vm);
- ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(rc, 0);
TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
errno = 0;
*/
vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
rc = __enable_migration_mode(vm);
- ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(rc, 0);
TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
errno = 0;
};
memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
- ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
- ASSERT_EQ(args.start_gfn, 0);
+ TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
/* ...and then - after a hole - the TEST_DATA memslot should follow */
args = (struct kvm_s390_cmma_log){
};
memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
- ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
- ASSERT_EQ(args.remaining, 0);
+ TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ TEST_ASSERT_EQ(args.remaining, 0);
/* ...and nothing else should be there */
args = (struct kvm_s390_cmma_log){
};
memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
- ASSERT_EQ(args.count, 0);
- ASSERT_EQ(args.start_gfn, 0);
- ASSERT_EQ(args.remaining, 0);
+ TEST_ASSERT_EQ(args.count, 0);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+ TEST_ASSERT_EQ(args.remaining, 0);
}
/**
u64 dirty_gfn_count,
const struct kvm_s390_cmma_log *res)
{
- ASSERT_EQ(res->start_gfn, first_dirty_gfn);
- ASSERT_EQ(res->count, dirty_gfn_count);
+ TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ TEST_ASSERT_EQ(res->count, dirty_gfn_count);
for (size_t i = 0; i < dirty_gfn_count; i++)
- ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
- ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+ TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
}
static void test_get_skip_holes(void)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+ "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+ size_t new_psw_off, uint64_t *new_psw)
+{
+ struct kvm_guest_debug debug = {};
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ char *lowcore;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ lowcore = addr_gpa2hva(vm, 0);
+ new_psw[0] = (*vcpu)->run->psw_mask;
+ new_psw[1] = (uint64_t)int_handler;
+ memcpy(lowcore + new_psw_off, new_psw, 16);
+ vcpu_regs_get(*vcpu, ®s);
+ regs.gprs[2] = -1;
+ vcpu_regs_set(*vcpu, ®s);
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(*vcpu, &debug);
+ vcpu_run(*vcpu);
+
+ return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+ ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+ "j .\n");
+
+static void test_step_pgm(void)
+{
+ test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = PGM_SPECIFICATION,
+ };
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+ __LC_PGM_NEW_PSW, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+ vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+ "iske %r2,%r2\n"
+ "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+ test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+ "lctl %c0,%c0,1\n"
+ "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+ test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+ "svc 0\n"
+ "j .\n");
+
+static void test_step_svc(void)
+{
+ test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "single-step pgm", test_step_pgm },
+ { "single-step pgm caused by diag", test_step_pgm_diag },
+ { "single-step pgm caused by iske", test_step_pgm_iske },
+ { "single-step pgm caused by lctl", test_step_pgm_lctl },
+ { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
*
* Copyright (C) 2019, Red Hat, Inc.
*/
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
vcpu_run(__vcpu); \
get_ucall(__vcpu, &uc); \
if (uc.cmd == UCALL_ABORT) { \
- REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \
+ REPORT_GUEST_ASSERT(uc); \
} \
- ASSERT_EQ(uc.cmd, UCALL_SYNC); \
- ASSERT_EQ(uc.args[1], __stage); \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
}) \
static void prepare_mem12(void)
case 16:
return val;
}
- GUEST_ASSERT_1(false, "Invalid size");
+ GUEST_FAIL("Invalid size = %u", size);
return 0;
}
return ret;
}
}
- GUEST_ASSERT_1(false, "Invalid size");
+ GUEST_FAIL("Invalid size = %u", size);
return 0;
}
HOST_SYNC(t.vcpu, STAGE_IDLED);
MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
/* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
- ASSERT_EQ(teid & teid_mask, 0);
+ TEST_ASSERT_EQ(teid & teid_mask, 0);
kvm_vm_free(t.kvm_vm);
}
*
* Copyright IBM Corp. 2021
*/
-
#include <sys/mman.h>
#include "test_util.h"
#include "kvm_util.h"
!mapped_0;
if (!skip) {
result = test_protection(tests[*i].addr, tests[*i].key);
- GUEST_ASSERT_2(result == tests[*i].expected, *i, result);
+ __GUEST_ASSERT(result == tests[*i].expected,
+ "Wanted %u, got %u, for i = %u",
+ tests[*i].expected, result, *i);
}
}
return stage;
vcpu_run(__vcpu); \
get_ucall(__vcpu, &uc); \
if (uc.cmd == UCALL_ABORT) \
- REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \
- ASSERT_EQ(uc.cmd, UCALL_SYNC); \
- ASSERT_EQ(uc.args[1], __stage); \
+ REPORT_GUEST_ASSERT(uc); \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
})
#define HOST_SYNC(vcpu, stage) \
}
if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
- REPORT_GUEST_ASSERT_1(uc, "val = %lu");
+ REPORT_GUEST_ASSERT(uc);
return NULL;
}
* window where the memslot is invalid is usually quite small.
*/
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
/* Spin until the misaligning memory region move completes. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 1 || val == 0, val);
+ __GUEST_ASSERT(val == 1 || val == 0,
+ "Expected '0' or '1' (no MMIO), got '%llx'", val);
/* Spin until the memory region starts to get re-aligned. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val);
/* Spin until the re-aligning memory region move completes. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 1, val);
+ GUEST_ASSERT_EQ(val, 1);
GUEST_DONE();
}
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
/* Spin until the memory region is recreated. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 0, val);
+ GUEST_ASSERT_EQ(val, 0);
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
asm("1:\n\t"
".pushsection .rodata\n\t"
"final_rip_end: .quad 1b\n\t"
".popsection");
- GUEST_ASSERT_1(0, 0);
+ GUEST_ASSERT(0);
}
static void test_delete_memory_region(void)
static void check_status(struct kvm_steal_time *st)
{
GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
- GUEST_ASSERT(READ_ONCE(st->flags) == 0);
- GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
}
static void guest_code(int cpu)
struct kvm_steal_time *st = st_gva[cpu];
uint32_t version;
- GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+ GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
memset(st, 0, sizeof(*st));
GUEST_SYNC(0);
static void check_status(struct st_time *st)
{
- GUEST_ASSERT(READ_ONCE(st->rev) == 0);
- GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
}
static void guest_code(int cpu)
int64_t status;
status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_ST, 0);
- GUEST_ASSERT(status != -1);
- GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+ GUEST_ASSERT_NE(status, -1);
+ GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
st = (struct st_time *)status;
GUEST_SYNC(0);
guest_cpuid->entries[i].index,
&eax, &ebx, &ecx, &edx);
- GUEST_ASSERT(eax == guest_cpuid->entries[i].eax &&
- ebx == guest_cpuid->entries[i].ebx &&
- ecx == guest_cpuid->entries[i].ecx &&
- edx == guest_cpuid->entries[i].edx);
+ GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+ GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+ GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+ GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
}
}
GUEST_SYNC(2);
- GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001);
+ GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
GUEST_DONE();
}
case UCALL_DONE:
return;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
exit_reason_str(vcpu->run->exit_reason));
vcpu_run(vcpu);
- ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
* with that capability.
*/
if (dirty_log_manual_caps) {
- ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
- ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
- ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
} else {
- ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
- ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages);
}
/*
* memory again, the page counts should be the same as they were
* right after initial population of memory.
*/
- ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
- ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
- ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+ TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k);
+ TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
}
static void help(char *name)
vcpu_run(vcpu);
handle_flds_emulation_failure_exit(vcpu);
vcpu_run(vcpu);
- ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
kvm_vm_free(vm);
return 0;
* Copyright 2022 Google LLC
* Author: Vipin Sharma <vipinsh@google.com>
*/
-
#include "kvm_util.h"
#include "processor.h"
#include "hyperv.h"
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
break;
vector = rdmsr_safe(msr->idx, &msr_val);
if (msr->fault_expected)
- GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR);
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+ msr->idx, msr->write ? "WR" : "RD", vector);
else
- GUEST_ASSERT_3(!vector, msr->idx, vector, 0);
+ __GUEST_ASSERT(!vector,
+ "Expected success on %sMSR(0x%x), got vector '0x%x'",
+ msr->idx, msr->write ? "WR" : "RD", vector);
if (vector || is_write_only_msr(msr->idx))
goto done;
if (msr->write)
- GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx,
- msr_val, msr->write_val);
+ __GUEST_ASSERT(!vector,
+ "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'",
+ msr->idx, msr->write_val, msr_val);
/* Invariant TSC bit appears when TSC invariant control MSR is written to */
if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
u64 res, input, output;
uint8_t vector;
- GUEST_ASSERT(hcall->control);
+ GUEST_ASSERT_NE(hcall->control, 0);
wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
vector = __hyperv_hypercall(hcall->control, input, output, &res);
if (hcall->ud_expected) {
- GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
+ __GUEST_ASSERT(vector == UD_VECTOR,
+ "Expected #UD for control '%u', got vector '0x%x'",
+ hcall->control, vector);
} else {
- GUEST_ASSERT_2(!vector, hcall->control, vector);
- GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res);
+ __GUEST_ASSERT(!vector,
+ "Expected no exception for control '%u', got vector '0x%x'",
+ hcall->control, vector);
+ GUEST_ASSERT_EQ(res, hcall->expect);
}
GUEST_DONE();
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx");
+ REPORT_GUEST_ASSERT(uc);
return;
case UCALL_DONE:
break;
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx");
+ REPORT_GUEST_ASSERT(uc);
return;
case UCALL_DONE:
break;
PR_MSR(msr);
vector = rdmsr_safe(msr->idx, &ignored);
- GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
vector = wrmsr_safe(msr->idx, 0);
- GUEST_ASSERT_1(vector == GP_VECTOR, vector);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
}
struct hcall_data {
PR_HCALL(hc);
r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
- GUEST_ASSERT(r == -KVM_ENOSYS);
+ GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
}
static void guest_main(void)
pr_hcall(&uc);
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_1(uc, "vector = %lu");
+ REPORT_GUEST_ASSERT(uc);
return;
case UCALL_DONE:
return;
MWAIT_DISABLED = BIT(2),
};
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
+do { \
+ bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
+ ((testcase) & MWAIT_DISABLED); \
+ \
+ if (fault_wanted) \
+ __GUEST_ASSERT((vector) == UD_VECTOR, \
+ "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \
+ else \
+ __GUEST_ASSERT(!(vector), \
+ "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \
+} while (0)
+
static void guest_monitor_wait(int testcase)
{
- /*
- * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD,
- * in all other scenarios KVM should emulate them as nops.
- */
- bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) &&
- (testcase & MWAIT_DISABLED);
u8 vector;
GUEST_SYNC(testcase);
* intercept checks, so the inputs for MONITOR and MWAIT must be valid.
*/
vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
- if (fault_wanted)
- GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
- else
- GUEST_ASSERT_2(!vector, testcase, vector);
+ GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
- if (fault_wanted)
- GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector);
- else
- GUEST_ASSERT_2(!vector, testcase, vector);
+ GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
}
static void guest_code(void)
testcase = uc.args[1];
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld");
+ REPORT_GUEST_ASSERT(uc);
goto done;
case UCALL_DONE:
goto done;
"Expected L2 to ask for %d, L2 says it's done", vector);
break;
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)",
- (const char *)uc.args[0], __FILE__, uc.args[1],
- uc.args[2], uc.args[3]);
+ REPORT_GUEST_ASSERT(uc);
break;
default:
TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
/* Verify the pending events comes back out the same as it went in. */
vcpu_events_get(vcpu, &events);
- ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
- KVM_VCPUEVENT_VALID_PAYLOAD);
- ASSERT_EQ(events.exception.pending, true);
- ASSERT_EQ(events.exception.nr, SS_VECTOR);
- ASSERT_EQ(events.exception.has_error_code, true);
- ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+ TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ TEST_ASSERT_EQ(events.exception.pending, true);
+ TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ TEST_ASSERT_EQ(events.exception.has_error_code, true);
+ TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
/*
* Run for real with the pending #SS, L1 should get a VM-Exit due to
#define ARCH_PERFMON_BRANCHES_RETIRED 5
#define NUM_BRANCHES 42
+#define INTEL_PMC_IDX_FIXED 32
+
+/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
+#define MAX_FILTER_EVENTS 300
+#define MAX_TEST_EVENTS 10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS (MAX_FILTER_EVENTS + 1)
/*
* This is how the event selector and unit mask are stored in an AMD
#define INST_RETIRED EVENT(0xc0, 0)
+struct __kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[MAX_FILTER_EVENTS];
+};
+
/*
* This event list comprises Intel's eight architectural events plus
* AMD's "retired branch instructions" for Zen[123] (and possibly
* other AMD CPUs).
*/
-static const uint64_t event_list[] = {
- EVENT(0x3c, 0),
- INST_RETIRED,
- EVENT(0x3c, 1),
- EVENT(0x2e, 0x4f),
- EVENT(0x2e, 0x41),
- EVENT(0xc4, 0),
- EVENT(0xc5, 0),
- EVENT(0xa4, 1),
- AMD_ZEN_BR_RETIRED,
+static const struct __kvm_pmu_event_filter base_event_filter = {
+ .nevents = ARRAY_SIZE(base_event_filter.events),
+ .events = {
+ EVENT(0x3c, 0),
+ INST_RETIRED,
+ EVENT(0x3c, 1),
+ EVENT(0x2e, 0x4f),
+ EVENT(0x2e, 0x41),
+ EVENT(0xc4, 0),
+ EVENT(0xc5, 0),
+ EVENT(0xa4, 1),
+ AMD_ZEN_BR_RETIRED,
+ },
};
struct {
return !r;
}
-static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
-{
- struct kvm_pmu_event_filter *f;
- int size = sizeof(*f) + nevents * sizeof(f->events[0]);
-
- f = malloc(size);
- TEST_ASSERT(f, "Out of memory");
- memset(f, 0, size);
- f->nevents = nevents;
- return f;
-}
-
-
-static struct kvm_pmu_event_filter *
-create_pmu_event_filter(const uint64_t event_list[], int nevents,
- uint32_t action, uint32_t flags)
-{
- struct kvm_pmu_event_filter *f;
- int i;
-
- f = alloc_pmu_event_filter(nevents);
- f->action = action;
- f->flags = flags;
- for (i = 0; i < nevents; i++)
- f->events[i] = event_list[i];
-
- return f;
-}
-
-static struct kvm_pmu_event_filter *event_filter(uint32_t action)
-{
- return create_pmu_event_filter(event_list,
- ARRAY_SIZE(event_list),
- action, 0);
-}
-
/*
* Remove the first occurrence of 'event' (if any) from the filter's
* event list.
*/
-static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
- uint64_t event)
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
{
bool found = false;
int i;
}
if (found)
f->nevents--;
- return f;
}
#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
}
static void test_with_filter(struct kvm_vcpu *vcpu,
- struct kvm_pmu_event_filter *f)
+ struct __kvm_pmu_event_filter *__f)
{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
run_vcpu_and_sync_pmc_results(vcpu);
}
static void test_amd_deny_list(struct kvm_vcpu *vcpu)
{
- uint64_t event = EVENT(0x1C2, 0);
- struct kvm_pmu_event_filter *f;
+ struct __kvm_pmu_event_filter f = {
+ .action = KVM_PMU_EVENT_DENY,
+ .nevents = 1,
+ .events = {
+ EVENT(0x1C2, 0),
+ },
+ };
- f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0);
- test_with_filter(vcpu, f);
- free(f);
+ test_with_filter(vcpu, &f);
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_member_deny_list(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ struct __kvm_pmu_event_filter f = base_event_filter;
- test_with_filter(vcpu, f);
- free(f);
+ f.action = KVM_PMU_EVENT_DENY;
+ test_with_filter(vcpu, &f);
ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
static void test_member_allow_list(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ struct __kvm_pmu_event_filter f = base_event_filter;
- test_with_filter(vcpu, f);
- free(f);
+ f.action = KVM_PMU_EVENT_ALLOW;
+ test_with_filter(vcpu, &f);
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
+ struct __kvm_pmu_event_filter f = base_event_filter;
- remove_event(f, INST_RETIRED);
- remove_event(f, INTEL_BR_RETIRED);
- remove_event(f, AMD_ZEN_BR_RETIRED);
- test_with_filter(vcpu, f);
- free(f);
+ f.action = KVM_PMU_EVENT_DENY;
+
+ remove_event(&f, INST_RETIRED);
+ remove_event(&f, INTEL_BR_RETIRED);
+ remove_event(&f, AMD_ZEN_BR_RETIRED);
+ test_with_filter(vcpu, &f);
ASSERT_PMC_COUNTING_INSTRUCTIONS();
}
static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
{
- struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
- remove_event(f, INST_RETIRED);
- remove_event(f, INTEL_BR_RETIRED);
- remove_event(f, AMD_ZEN_BR_RETIRED);
- test_with_filter(vcpu, f);
- free(f);
+ remove_event(&f, INST_RETIRED);
+ remove_event(&f, INTEL_BR_RETIRED);
+ remove_event(&f, AMD_ZEN_BR_RETIRED);
+ test_with_filter(vcpu, &f);
ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
}
const uint64_t masked_events[],
const int nmasked_events)
{
- struct kvm_pmu_event_filter *f;
+ struct __kvm_pmu_event_filter f = {
+ .nevents = nmasked_events,
+ .action = KVM_PMU_EVENT_ALLOW,
+ .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ };
- f = create_pmu_event_filter(masked_events, nmasked_events,
- KVM_PMU_EVENT_ALLOW,
- KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
- test_with_filter(vcpu, f);
- free(f);
+ memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ test_with_filter(vcpu, &f);
}
-/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */
-#define MAX_FILTER_EVENTS 300
-#define MAX_TEST_EVENTS 10
-
#define ALLOW_LOADS BIT(0)
#define ALLOW_STORES BIT(1)
#define ALLOW_LOADS_STORES BIT(2)
run_masked_events_tests(vcpu, events, nevents);
}
-static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events,
- int nevents, uint32_t flags)
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
{
- struct kvm_pmu_event_filter *f;
- int r;
+ struct kvm_pmu_event_filter *f = (void *)__f;
- f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags);
- r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
- free(f);
+ return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
- return r;
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+ uint32_t flags, uint32_t action)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = 1,
+ .flags = flags,
+ .action = action,
+ .events = {
+ event,
+ },
+ };
+
+ return set_pmu_event_filter(vcpu, &f);
}
static void test_filter_ioctl(struct kvm_vcpu *vcpu)
{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct __kvm_pmu_event_filter f;
uint64_t e = ~0ul;
int r;
* Unfortunately having invalid bits set in event data is expected to
* pass when flags == 0 (bits other than eventsel+umask).
*/
- r = run_filter_test(vcpu, &e, 1, 0);
+ r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
- r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
- r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ f = base_event_filter;
+ f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+ f = base_event_filter;
+ f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+ f = base_event_filter;
+ f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+ f = base_event_filter;
+ f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0);
+
+ /* Only OS_EN bit is enabled for fixed counter[idx]. */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL,
+ BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx));
+ }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ uint32_t action, uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = action,
+ .fixed_counter_bitmap = bitmap,
+ };
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ uint32_t action,
+ uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = action;
+ f.fixed_counter_bitmap = bitmap;
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+ uint8_t nr_fixed_counters)
+{
+ unsigned int i;
+ uint32_t bitmap;
+ uint64_t count;
+
+ TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+ "Invalid nr_fixed_counters");
+
+ /*
+ * Check the fixed performance counter can count normally when KVM
+ * userspace doesn't set any pmu filter.
+ */
+ count = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(count, "Unexpected count value: %ld\n", count);
+
+ for (i = 0; i < BIT(nr_fixed_counters); i++) {
+ bitmap = BIT(i);
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+ /*
+ * Check that fixed_counter_bitmap has higher priority than
+ * events[] when both are set.
+ */
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+ }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint8_t idx;
+
+ /*
+ * Check that pmu_event_filter works as expected when it's applied to
+ * fixed performance counters.
+ */
+ for (idx = 0; idx < nr_fixed_counters; idx++) {
+ vm = vm_create_with_one_vcpu(&vcpu,
+ intel_run_fixed_counter_guest_code);
+ vcpu_args_set(vcpu, 1, idx);
+ __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+ kvm_vm_free(vm);
+ }
}
int main(int argc, char *argv[])
kvm_vm_free(vm);
test_pmu_config_disable(guest_code);
+ test_fixed_counter_bitmap();
return 0;
}
for (i = 0; i < KVM_MAX_VCPUS; i++)
vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
- ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
vcpuN = vcpus[KVM_MAX_VCPUS - 1];
for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
}
- ASSERT_EQ(pthread_cancel(thread), 0);
- ASSERT_EQ(pthread_join(thread, NULL), 0);
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
kvm_vm_free(vm);
{
GUEST_SYNC(1);
- GUEST_ASSERT(get_bsp_flag() != 0);
+ GUEST_ASSERT_NE(get_bsp_flag(), 0);
GUEST_DONE();
}
{
GUEST_SYNC(1);
- GUEST_ASSERT(get_bsp_flag() == 0);
+ GUEST_ASSERT_EQ(get_bsp_flag(), 0);
GUEST_DONE();
}
stage);
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
exit_reason_str(vcpu->run->exit_reason));
* Copyright (C) 2021, Red Hat, Inc.
*
*/
-
#include <stdatomic.h>
#include <stdio.h>
#include <unistd.h>
static void guest_int_handler(struct ex_regs *regs)
{
int_fired++;
- GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int,
- regs->rip, (unsigned long)l2_guest_code_int);
+ GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
}
static void l2_guest_code_int(void)
{
- GUEST_ASSERT_1(int_fired == 1, int_fired);
+ GUEST_ASSERT_EQ(int_fired, 1);
/*
* Same as the vmmcall() function, but with a ud2 sneaked after the
: "rbx", "rdx", "rsi", "rdi", "r8", "r9",
"r10", "r11", "r12", "r13", "r14", "r15");
- GUEST_ASSERT_1(bp_fired == 1, bp_fired);
+ GUEST_ASSERT_EQ(bp_fired, 1);
hlt();
}
if (nmi_stage_get() == 1) {
vmmcall();
- GUEST_ASSERT(false);
+ GUEST_FAIL("Unexpected resume after VMMCALL");
} else {
- GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get());
+ GUEST_ASSERT_EQ(nmi_stage_get(), 3);
GUEST_DONE();
}
}
}
run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
clgi();
x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
- GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get());
+ GUEST_ASSERT_EQ(nmi_stage_get(), 1);
nmi_stage_inc();
stgi();
vmcb->control.next_rip = vmcb->save.rip + 2;
run_guest(vmcb, svm->vmcb_gpa);
- GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT,
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+ "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'",
vmcb->control.exit_code,
vmcb->control.exit_info_1, vmcb->control.exit_info_2);
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx");
+ REPORT_GUEST_ASSERT(uc);
break;
/* NOT REACHED */
case UCALL_DONE:
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <pthread.h>
#include "test_util.h"
#include "kvm_util.h"
#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
#define INVALID_SYNC_FIELD 0x80000000
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.injected, 1);
+ WRITE_ONCE(events->exception.pending, 1);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events. KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+ WRITE_ONCE(events->exception.pending, 1);
+ WRITE_ONCE(events->exception.nr, 255);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ __u64 *cr4 = &run->s.regs.sregs.cr4;
+ __u64 pae_enabled = *cr4;
+ __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+ WRITE_ONCE(*cr4, pae_enabled);
+ asm volatile(".rept 512\n\t"
+ "nop\n\t"
+ ".endr");
+ WRITE_ONCE(*cr4, pae_disabled);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+static void race_sync_regs(void *racer)
+{
+ const time_t TIMEOUT = 2; /* seconds, roughly */
+ struct kvm_x86_state *state;
+ struct kvm_translation tr;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ run->kvm_valid_regs = 0;
+
+ /* Save state *before* spawning the thread that mucks with vCPU state. */
+ state = vcpu_save_state(vcpu);
+
+ /*
+ * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+ * should already be set in guest state.
+ */
+ TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+ (run->s.regs.sregs.efer & EFER_LME),
+ "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+ !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+ !!(run->s.regs.sregs.efer & EFER_LME));
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ /*
+ * Reload known good state if the vCPU triple faults, e.g. due
+ * to the unhandled #GPs being injected. VMX preserves state
+ * on shutdown, but SVM synthesizes an INIT as the VMCB state
+ * is architecturally undefined on triple fault.
+ */
+ if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+ vcpu_load_state(vcpu, state);
+
+ if (racer == race_sregs_cr4) {
+ tr = (struct kvm_translation) { .linear_address = 0 };
+ __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+ }
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_x86_state_cleanup(state);
+ kvm_vm_free(vm);
+}
+
int main(int argc, char *argv[])
{
struct kvm_vcpu *vcpu;
kvm_vm_free(vm);
+ race_sync_regs(race_sregs_cr4);
+ race_sync_regs(race_events_exc);
+ race_sync_regs(race_events_inj_pen);
+
return 0;
}
ksft_test_result_pass("stage %d passed\n", stage + 1);
return;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
exit_reason_str(vcpu->run->exit_reason));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
val = 0;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC affect both MSRs. */
run_vcpu(vcpu, 1);
val = 1ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
run_vcpu(vcpu, 2);
val = 2ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Host: writes to MSR_IA32_TSC set the host-side offset
* and therefore do not change MSR_IA32_TSC_ADJUST.
*/
vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
run_vcpu(vcpu, 3);
/* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
/* Restore previous value. */
vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
*/
run_vcpu(vcpu, 4);
val = 3ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
/*
* Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
*/
run_vcpu(vcpu, 5);
val = 4ull * GUEST_STEP;
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
- ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
kvm_vm_free(vm);
end = (unsigned long)buffer + 8192;
asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
- GUEST_ASSERT_1(count == 0, count);
- GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end);
+ GUEST_ASSERT_EQ(count, 0);
+ GUEST_ASSERT_EQ((unsigned long)buffer, end);
}
static void guest_code(void)
memset(buffer, 0, sizeof(buffer));
guest_ins_port80(buffer, 8192);
for (i = 0; i < 8192; i++)
- GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]);
+ __GUEST_ASSERT(buffer[i] == 0xaa,
+ "Expected '0xaa', got '0x%x' at buffer[%u]",
+ buffer[i], i);
GUEST_DONE();
}
case UCALL_DONE:
break;
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx");
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
timer.it_value.tv_sec = 0;
timer.it_value.tv_usec = 200;
timer.it_interval = timer.it_value;
- ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+ TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
}
static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
* and check it can be retrieved with KVM_GET_MSR, also test
* the invalid LBR formats are rejected.
*/
-
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
.pebs_format = -1,
};
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+ uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for value '0x%llx', got vector '0x%x'",
+ val, vector);
+}
+
static void guest_code(uint64_t current_val)
{
- uint8_t vector;
int i;
- vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val);
- GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector);
-
- vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0);
- GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector);
+ guest_test_perf_capabilities_gp(current_val);
+ guest_test_perf_capabilities_gp(0);
- for (i = 0; i < 64; i++) {
- vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES,
- current_val ^ BIT_ULL(i));
- GUEST_ASSERT_2(vector == GP_VECTOR,
- current_val ^ BIT_ULL(i), vector);
- }
+ for (i = 0; i < 64; i++)
+ guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
GUEST_DONE();
}
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
break;
TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
}
- ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+ host_cap.capabilities);
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
vcpu_run(vcpu);
- ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
- ASSERT_EQ(uc.args[1], val);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], val);
vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
(u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
if (!x->is_x2apic) {
val &= (-1u | (0xffull << (32 + 24)));
- ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
} else {
- ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
}
}
*
* Copyright (C) 2022, Google LLC.
*/
-
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
* Assert that architectural dependency rules are satisfied, e.g. that AVX is
* supported if and only if SSE is supported.
*/
-#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
-do { \
- uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
- \
- GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) || \
- __supported == ((xfeatures) | (dependencies)), \
- __supported, (xfeatures), (dependencies)); \
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+ __supported, (xfeatures), (dependencies)); \
} while (0)
/*
do { \
uint64_t __supported = (supported_xcr0) & (xfeatures); \
\
- GUEST_ASSERT_2(!__supported || __supported == (xfeatures), \
+ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
+ "supported = 0x%llx, xfeatures = 0x%llx", \
__supported, (xfeatures)); \
} while (0)
XFEATURE_MASK_XTILE);
vector = xsetbv_safe(0, supported_xcr0);
- GUEST_ASSERT_2(!vector, supported_xcr0, vector);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(0x%llx), got vector '0x%x'",
+ supported_xcr0, vector);
for (i = 0; i < 64; i++) {
if (supported_xcr0 & BIT_ULL(i))
continue;
vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
- GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'",
+ BIT_ULL(i), supported_xcr0, vector);
}
GUEST_DONE();
switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_XEN) {
- ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
- ASSERT_EQ(run->xen.u.hcall.cpl, 0);
- ASSERT_EQ(run->xen.u.hcall.longmode, 1);
- ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
- ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
- ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
- ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
- ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
- ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
- ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
run->xen.u.hcall.result = RETVALUE;
continue;
}
{
char str[32];
int len;
- bool res;
+ bool res = true;
FILE *const inf = fopen("/proc/filesystems", "r");
/*
/* filesystem can be null for bind mounts. */
if (!filesystem)
- return true;
+ goto out;
len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
if (len >= sizeof(str))
/* Ignores too-long filesystem names. */
- return true;
+ goto out;
res = fgrep(inf, str);
+
+out:
fclose(inf);
return res;
}
TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
- ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test"
+ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+ ipv4_mpath_list ipv6_mpath_list"
VERBOSE=0
PAUSE_ON_FAIL=no
cleanup
}
+mpath_dep_check()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "mausezahn command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v bc)" ]; then
+ echo "bc command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v perf)" ]; then
+ echo "perf command not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib:* | grep -q fib_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv4 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib6:* | grep -q fib6_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv6 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+link_stats_get()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local dir=$1; shift
+ local stat=$1; shift
+
+ ip -n $ns -j -s link show dev $dev \
+ | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+ local file=$1; shift
+ local expected=$1; shift
+
+ local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+ local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+ local res=$(echo "$ratio >= 0.95" | bc)
+ [[ $res -eq 1 ]]
+ log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+ echo
+ echo "IPv4 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
+ run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
+ run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n ns2 route add 203.0.113.0/24
+ nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+ set +e
+
+ local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
+ -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+ echo
+ echo "IPv6 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+ run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+ run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
+ nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+ set +e
+
+ local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
+ -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
################################################################################
# usage
ipv6_mangle) ipv6_mangle_test;;
ipv4_bcast_neigh) ipv4_bcast_neigh_test;;
fib6_gc_test|ipv6_gc) fib6_gc_test;;
+ ipv4_mpath_list) ipv4_mpath_list_test;;
+ ipv6_mpath_list) ipv6_mpath_list_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
config KVM_VFIO
bool
-config HAVE_KVM_ARCH_TLB_FLUSH_ALL
- bool
-
config HAVE_KVM_INVALID_WAKEUPS
bool
}
EXPORT_SYMBOL_GPL(kvm_make_all_cpus_request);
-#ifndef CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
++kvm->stat.generic.remote_tlb_flush_requests;
* kvm_make_all_cpus_request() reads vcpu->mode. We reuse that
* barrier here.
*/
- if (!kvm_arch_flush_remote_tlb(kvm)
+ if (!kvm_arch_flush_remote_tlbs(kvm)
|| kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.generic.remote_tlb_flush;
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
-#endif
+
+void kvm_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, u64 nr_pages)
+{
+ if (!kvm_arch_flush_remote_tlbs_range(kvm, gfn, nr_pages))
+ return;
+
+ /*
+ * Fall back to a flushing entire TLBs if the architecture range-based
+ * TLB invalidation is unsupported or can't be performed for whatever
+ * reason.
+ */
+ kvm_flush_remote_tlbs(kvm);
+}
+
+void kvm_flush_remote_tlbs_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot)
+{
+ /*
+ * All current use cases for flushing the TLBs for a specific memslot
+ * are related to dirty logging, and many do the TLB flush out of
+ * mmu_lock. The interaction between the various operations on memslot
+ * must be serialized by slots_locks to ensure the TLB flush from one
+ * operation is observed by any other operation on the same memslot.
+ */
+ lockdep_assert_held(&kvm->slots_lock);
+ kvm_flush_remote_tlbs_range(kvm, memslot->base_gfn, memslot->npages);
+}
static void kvm_flush_shadow_all(struct kvm *kvm)
{
struct kvm_hva_range {
unsigned long start;
unsigned long end;
- pte_t pte;
+ union kvm_mmu_notifier_arg arg;
hva_handler_t handler;
on_lock_fn_t on_lock;
on_unlock_fn_t on_unlock;
}
#define IS_KVM_NULL_FN(fn) ((fn) == (void *)kvm_null_fn)
+static const union kvm_mmu_notifier_arg KVM_MMU_NOTIFIER_NO_ARG;
+
/* Iterate over each memslot intersecting [start, last] (inclusive) range */
#define kvm_for_each_memslot_in_hva_range(node, slots, start, last) \
for (node = interval_tree_iter_first(&slots->hva_tree, start, last); \
* bother making these conditional (to avoid writes on
* the second or later invocation of the handler).
*/
- gfn_range.pte = range->pte;
+ gfn_range.arg = range->arg;
gfn_range.may_block = range->may_block;
/*
static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
unsigned long start,
unsigned long end,
- pte_t pte,
+ union kvm_mmu_notifier_arg arg,
hva_handler_t handler)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
const struct kvm_hva_range range = {
.start = start,
.end = end,
- .pte = pte,
+ .arg = arg,
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.on_unlock = (void *)kvm_null_fn,
const struct kvm_hva_range range = {
.start = start,
.end = end,
- .pte = __pte(0),
.handler = handler,
.on_lock = (void *)kvm_null_fn,
.on_unlock = (void *)kvm_null_fn,
pte_t pte)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
+ const union kvm_mmu_notifier_arg arg = { .pte = pte };
trace_kvm_set_spte_hva(address);
if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
return;
- kvm_handle_hva_range(mn, address, address + 1, pte, kvm_change_spte_gfn);
+ kvm_handle_hva_range(mn, address, address + 1, arg, kvm_change_spte_gfn);
}
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
const struct kvm_hva_range hva_range = {
.start = range->start,
.end = range->end,
- .pte = __pte(0),
.handler = kvm_unmap_gfn_range,
.on_lock = kvm_mmu_invalidate_begin,
.on_unlock = kvm_arch_guest_memory_reclaimed,
const struct kvm_hva_range hva_range = {
.start = range->start,
.end = range->end,
- .pte = __pte(0),
.handler = (void *)kvm_null_fn,
.on_lock = kvm_mmu_invalidate_end,
.on_unlock = (void *)kvm_null_fn,
{
trace_kvm_age_hva(start, end);
- return kvm_handle_hva_range(mn, start, end, __pte(0), kvm_age_gfn);
+ return kvm_handle_hva_range(mn, start, end, KVM_MMU_NOTIFIER_NO_ARG,
+ kvm_age_gfn);
}
static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
}
if (flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
if (copy_to_user(log->dirty_bitmap, dirty_bitmap_buffer, n))
return -EFAULT;
KVM_MMU_UNLOCK(kvm);
if (flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
return 0;
}