From e28a9455eb7dc4b50a718b278678d3a1671f2f5c Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Wed, 30 Mar 2022 15:33:57 +0900 Subject: [PATCH 01/16] USB: serial: cp210x: add support smartthings link devices Add support smartthings link devices. Change-Id: I54293b816779754f987d876272b1c1a145d9baf2 Signed-off-by: Seung-Woo Kim --- drivers/usb/serial/cp210x.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index f906c13..cb0de9e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -244,6 +244,10 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3923, 0x7A0B) }, /* National Instruments USB Serial Console */ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ + { USB_DEVICE(0x04E8, 0x20C6) }, /* SmartThings link WW (Zigbee Only) */ + { USB_DEVICE(0x04E8, 0x20C7) }, /* SmartThings link US */ + { USB_DEVICE(0x04E8, 0x20C8) }, /* SmartThings link EU */ + { USB_DEVICE(0x04E8, 0x20C9) }, /* SmartThings link KR */ { } /* Terminating Entry */ }; -- 2.7.4 From 92f5b81d2e61539bca80fc2a6ce682ee503d8d6b Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 4 Apr 2022 19:48:10 +0900 Subject: [PATCH 02/16] ARM/ARM64: tizen_bcm2711_defconfig: Enable BT_HCIBTUSB config Enable CONFIG_BT_HCIBTUSB config. Tizen GHI image is using the USB BT device. To test BT, it needs to enable. Change-Id: Ic1a233dd10978ee6cd7b9535c4cf240352605bd6 Signed-off-by: Jaehoon Chung --- arch/arm/configs/tizen_bcm2711_defconfig | 1 + arch/arm/configs/tizen_bcm2711_rt_defconfig | 1 + arch/arm64/configs/tizen_bcm2711_defconfig | 1 + arch/arm64/configs/tizen_bcm2711_rt_defconfig | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm/configs/tizen_bcm2711_defconfig b/arch/arm/configs/tizen_bcm2711_defconfig index ab3c7d8..0465bdb 100644 --- a/arch/arm/configs/tizen_bcm2711_defconfig +++ b/arch/arm/configs/tizen_bcm2711_defconfig @@ -189,6 +189,7 @@ CONFIG_BT_BNEP=y CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=y +CONFIG_BT_HCIBTUSB=m CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIBCM203X=m diff --git a/arch/arm/configs/tizen_bcm2711_rt_defconfig b/arch/arm/configs/tizen_bcm2711_rt_defconfig index 1e2015c..884b6a2 100644 --- a/arch/arm/configs/tizen_bcm2711_rt_defconfig +++ b/arch/arm/configs/tizen_bcm2711_rt_defconfig @@ -188,6 +188,7 @@ CONFIG_BT_BNEP=y CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=y +CONFIG_BT_HCIBTUSB=m CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIBCM203X=m diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 305a579..8ebb4dd 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -192,6 +192,7 @@ CONFIG_BT_BNEP=y CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=y +CONFIG_BT_HCIBTUSB=m CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIBCM203X=m diff --git a/arch/arm64/configs/tizen_bcm2711_rt_defconfig b/arch/arm64/configs/tizen_bcm2711_rt_defconfig index a6f68bd..0b18a41 100644 --- a/arch/arm64/configs/tizen_bcm2711_rt_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_rt_defconfig @@ -189,6 +189,7 @@ CONFIG_BT_BNEP=y CONFIG_BT_BNEP_MC_FILTER=y CONFIG_BT_BNEP_PROTO_FILTER=y CONFIG_BT_HIDP=y +CONFIG_BT_HCIBTUSB=m CONFIG_BT_HCIUART=y CONFIG_BT_HCIUART_BCM=y CONFIG_BT_HCIBCM203X=m -- 2.7.4 From 63423c8e70cf8b80226d72944696ab8c00350aaa Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Wed, 6 Apr 2022 19:52:06 +0900 Subject: [PATCH 03/16] Revert "USB: serial: cp210x: add support smartthings link devices" This reverts commit e28a9455eb7dc4b50a718b278678d3a1671f2f5c. The app for st link uses gpio ioctls not in normal cp210x driver and it will be supported with new driver instead of changing existing cp210x. Change-Id: I76d126ae88ddf2f0cfc7b6fffcca930741be2388 Signed-off-by: Seung-Woo Kim --- drivers/usb/serial/cp210x.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index cb0de9e..f906c13 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -244,10 +244,6 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x3195, 0xF281) }, /* Link Instruments MSO-28 */ { USB_DEVICE(0x3923, 0x7A0B) }, /* National Instruments USB Serial Console */ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */ - { USB_DEVICE(0x04E8, 0x20C6) }, /* SmartThings link WW (Zigbee Only) */ - { USB_DEVICE(0x04E8, 0x20C7) }, /* SmartThings link US */ - { USB_DEVICE(0x04E8, 0x20C8) }, /* SmartThings link EU */ - { USB_DEVICE(0x04E8, 0x20C9) }, /* SmartThings link KR */ { } /* Terminating Entry */ }; -- 2.7.4 From 07f411a9da057be24b09aad706040de08d93de56 Mon Sep 17 00:00:00 2001 From: Seung-Woo Kim Date: Wed, 6 Apr 2022 19:55:03 +0900 Subject: [PATCH 04/16] USB: serial: cp210x: add cp210x-stlink driver Support usb st-link device and its application, add fixed version of cp210x driver. Change-Id: Ic1d1a54723697cabf55545d708e2e92f8caddee2 Signed-off-by: Seung-Woo Kim --- drivers/usb/serial/Makefile | 1 + drivers/usb/serial/cp210x-stlink.c | 1327 ++++++++++++++++++++++++++++++++++++ 2 files changed, 1328 insertions(+) create mode 100644 drivers/usb/serial/cp210x-stlink.c diff --git a/drivers/usb/serial/Makefile b/drivers/usb/serial/Makefile index 2d491e4..22ee497 100644 --- a/drivers/usb/serial/Makefile +++ b/drivers/usb/serial/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_USB_SERIAL_ARK3116) += ark3116.o obj-$(CONFIG_USB_SERIAL_BELKIN) += belkin_sa.o obj-$(CONFIG_USB_SERIAL_CH341) += ch341.o obj-$(CONFIG_USB_SERIAL_CP210X) += cp210x.o +obj-$(CONFIG_USB_SERIAL_CP210X) += cp210x-stlink.o obj-$(CONFIG_USB_SERIAL_CYBERJACK) += cyberjack.o obj-$(CONFIG_USB_SERIAL_CYPRESS_M8) += cypress_m8.o obj-$(CONFIG_USB_SERIAL_DEBUG) += usb_debug.o diff --git a/drivers/usb/serial/cp210x-stlink.c b/drivers/usb/serial/cp210x-stlink.c new file mode 100644 index 0000000..19bad5d --- /dev/null +++ b/drivers/usb/serial/cp210x-stlink.c @@ -0,0 +1,1327 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Silicon Laboratories CP210x USB to RS232 serial adaptor driver + * + * Copyright (C) 2005 Craig Shelley (craig@microtron.org.uk) + * + * Support to set flow control line levels using TIOCMGET and TIOCMSET + * thanks to Karl Hiramoto karl@hiramoto.org. RTSCTS hardware flow + * control thanks to Munir Nassar nassarmu@real-time.com + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_DESC "Silicon Labs CP210x RS232 serial adaptor driver" + +/* + * Function Prototypes + */ +static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *); +static void cp210x_close(struct usb_serial_port *); +static int cp210x_ioctl(struct tty_struct *tty, + unsigned int cmd, unsigned long arg); +static void cp210x_get_termios(struct tty_struct *, struct usb_serial_port *); +static void cp210x_get_termios_port(struct usb_serial_port *port, + tcflag_t *cflagp, unsigned int *baudp); +static void cp210x_change_speed(struct tty_struct *, struct usb_serial_port *, + struct ktermios *); +static void cp210x_set_termios(struct tty_struct *, struct usb_serial_port *, + struct ktermios*); +static bool cp210x_tx_empty(struct usb_serial_port *port); +static int cp210x_tiocmget(struct tty_struct *); +static int cp210x_tiocmset(struct tty_struct *, unsigned int, unsigned int); +static int cp210x_tiocmset_port(struct usb_serial_port *port, + unsigned int, unsigned int); +static void cp210x_break_ctl(struct tty_struct *, int); +static int cp210x_port_probe(struct usb_serial_port *); +static int cp210x_port_remove(struct usb_serial_port *); +static void cp210x_dtr_rts(struct usb_serial_port *p, int on); + +static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x04E8, 0x20C6) }, /* SmartThings link WW (Zigbee Only) */ + { USB_DEVICE(0x04E8, 0x20C7) }, /* SmartThings link US */ + { USB_DEVICE(0x04E8, 0x20C8) }, /* SmartThings link EU */ + { USB_DEVICE(0x04E8, 0x20C9) }, /* SmartThings link KR */ + { } /* Terminating Entry */ +}; + +MODULE_DEVICE_TABLE(usb, id_table); + +struct cp210x_port_private { + __u8 bPartNumber; + __u8 bInterfaceNumber; + bool has_swapped_line_ctl; + bool is_cp2102n_a01; +}; + +static struct usb_serial_driver cp210x_device = { + .driver = { + .owner = THIS_MODULE, + .name = "cp210x", + }, + .id_table = id_table, + .num_ports = 1, + .bulk_in_size = 256, + .bulk_out_size = 256, + .open = cp210x_open, + .close = cp210x_close, + .ioctl = cp210x_ioctl, + .break_ctl = cp210x_break_ctl, + .set_termios = cp210x_set_termios, + .tx_empty = cp210x_tx_empty, + .tiocmget = cp210x_tiocmget, + .tiocmset = cp210x_tiocmset, + .port_probe = cp210x_port_probe, + .port_remove = cp210x_port_remove, + .dtr_rts = cp210x_dtr_rts +}; + +/* IOCTLs */ +#define IOCTL_GPIOGET 0x8000 +#define IOCTL_GPIOSET 0x8001 + +static struct usb_serial_driver * const serial_drivers[] = { + &cp210x_device, NULL +}; + +/* Config request types */ +#define REQTYPE_HOST_TO_INTERFACE 0x41 +#define REQTYPE_INTERFACE_TO_HOST 0xc1 +#define REQTYPE_HOST_TO_DEVICE 0x40 +#define REQTYPE_DEVICE_TO_HOST 0xc0 + +/* Config request codes */ +#define CP210X_IFC_ENABLE 0x00 +#define CP210X_SET_BAUDDIV 0x01 +#define CP210X_GET_BAUDDIV 0x02 +#define CP210X_SET_LINE_CTL 0x03 +#define CP210X_GET_LINE_CTL 0x04 +#define CP210X_SET_BREAK 0x05 +#define CP210X_IMM_CHAR 0x06 +#define CP210X_SET_MHS 0x07 +#define CP210X_GET_MDMSTS 0x08 +#define CP210X_SET_XON 0x09 +#define CP210X_SET_XOFF 0x0A +#define CP210X_SET_EVENTMASK 0x0B +#define CP210X_GET_EVENTMASK 0x0C +#define CP210X_SET_CHAR 0x0D +#define CP210X_GET_CHARS 0x0E +#define CP210X_GET_PROPS 0x0F +#define CP210X_GET_COMM_STATUS 0x10 +#define CP210X_RESET 0x11 +#define CP210X_PURGE 0x12 +#define CP210X_SET_FLOW 0x13 +#define CP210X_GET_FLOW 0x14 +#define CP210X_EMBED_EVENTS 0x15 +#define CP210X_GET_EVENTSTATE 0x16 +#define CP210X_SET_CHARS 0x19 +#define CP210X_GET_BAUDRATE 0x1D +#define CP210X_SET_BAUDRATE 0x1E +#define CP210X_VENDOR_SPECIFIC 0xFF + +/* CP210X_IFC_ENABLE */ +#define UART_ENABLE 0x0001 +#define UART_DISABLE 0x0000 + +/* CP210X_(SET|GET)_BAUDDIV */ +#define BAUD_RATE_GEN_FREQ 0x384000 + +/* CP210X_(SET|GET)_LINE_CTL */ +#define BITS_DATA_MASK 0X0f00 +#define BITS_DATA_5 0X0500 +#define BITS_DATA_6 0X0600 +#define BITS_DATA_7 0X0700 +#define BITS_DATA_8 0X0800 +#define BITS_DATA_9 0X0900 + +#define BITS_PARITY_MASK 0x00f0 +#define BITS_PARITY_NONE 0x0000 +#define BITS_PARITY_ODD 0x0010 +#define BITS_PARITY_EVEN 0x0020 +#define BITS_PARITY_MARK 0x0030 +#define BITS_PARITY_SPACE 0x0040 + +#define BITS_STOP_MASK 0x000f +#define BITS_STOP_1 0x0000 +#define BITS_STOP_1_5 0x0001 +#define BITS_STOP_2 0x0002 + +/* CP210X_SET_BREAK */ +#define BREAK_ON 0x0001 +#define BREAK_OFF 0x0000 + +/* CP210X_(SET_MHS|GET_MDMSTS) */ +#define CONTROL_DTR 0x0001 +#define CONTROL_RTS 0x0002 +#define CONTROL_CTS 0x0010 +#define CONTROL_DSR 0x0020 +#define CONTROL_RING 0x0040 +#define CONTROL_DCD 0x0080 +#define CONTROL_WRITE_DTR 0x0100 +#define CONTROL_WRITE_RTS 0x0200 + +/* CP210X_VENDOR_SPECIFIC sub-commands passed in wValue */ +#define CP210X_WRITE_LATCH 0x37E1 +#define CP210X_READ_LATCH 0x00C2 +#define CP210X_GET_PARTNUM 0x370B +#define CP2102N_GET_FW_VERS 0x0010 + +/* CP210X_GET_PARTNUM returns one of these */ +#define CP2101_PARTNUM 0x01 +#define CP2102_PARTNUM 0x02 +#define CP2103_PARTNUM 0x03 +#define CP2104_PARTNUM 0x04 +#define CP2105_PARTNUM 0x05 +#define CP2108_PARTNUM 0x08 +#define CP210x_PARTNUM_CP2102N_QFN28 0x20 +#define CP210x_PARTNUM_CP2102N_QFN24 0x21 +#define CP210x_PARTNUM_CP2102N_QFN20 0x22 + +/* CP210X_GET_COMM_STATUS returns these 0x13 bytes */ +struct cp210x_comm_status { + __le32 ulErrors; + __le32 ulHoldReasons; + __le32 ulAmountInInQueue; + __le32 ulAmountInOutQueue; + u8 bEofReceived; + u8 bWaitForImmediate; + u8 bReserved; +} __packed; + +/* + * CP210X_PURGE - 16 bits passed in wValue of USB request. + * SiLabs app note AN571 gives a strange description of the 4 bits: + * bit 0 or bit 2 clears the transmit queue and 1 or 3 receive. + * writing 1 to all, however, purges cp2108 well enough to avoid the hang. + */ +#define PURGE_ALL 0x000f + +/* CP210X_GET_FLOW/CP210X_SET_FLOW read/write these 0x10 bytes */ +struct cp210x_flow_ctl { + __le32 ulControlHandshake; + __le32 ulFlowReplace; + __le32 ulXonLimit; + __le32 ulXoffLimit; +} __packed; + +/* cp210x_flow_ctl::ulControlHandshake */ +#define CP210X_SERIAL_DTR_MASK GENMASK(1, 0) +#define CP210X_SERIAL_DTR_SHIFT(_mode) (_mode) +#define CP210X_SERIAL_CTS_HANDSHAKE BIT(3) +#define CP210X_SERIAL_DSR_HANDSHAKE BIT(4) +#define CP210X_SERIAL_DCD_HANDSHAKE BIT(5) +#define CP210X_SERIAL_DSR_SENSITIVITY BIT(6) + +/* values for cp210x_flow_ctl::ulControlHandshake::CP210X_SERIAL_DTR_MASK */ +#define CP210X_SERIAL_DTR_INACTIVE 0 +#define CP210X_SERIAL_DTR_ACTIVE 1 +#define CP210X_SERIAL_DTR_FLOW_CTL 2 + +/* cp210x_flow_ctl::ulFlowReplace */ +#define CP210X_SERIAL_AUTO_TRANSMIT BIT(0) +#define CP210X_SERIAL_AUTO_RECEIVE BIT(1) +#define CP210X_SERIAL_ERROR_CHAR BIT(2) +#define CP210X_SERIAL_NULL_STRIPPING BIT(3) +#define CP210X_SERIAL_BREAK_CHAR BIT(4) +#define CP210X_SERIAL_RTS_MASK GENMASK(7, 6) +#define CP210X_SERIAL_RTS_SHIFT(_mode) (_mode << 6) +#define CP210X_SERIAL_XOFF_CONTINUE BIT(31) + +/* values for cp210x_flow_ctl::ulFlowReplace::CP210X_SERIAL_RTS_MASK */ +#define CP210X_SERIAL_RTS_INACTIVE 0 +#define CP210X_SERIAL_RTS_ACTIVE 1 +#define CP210X_SERIAL_RTS_FLOW_CTL 2 + +/* + * Reads a variable-sized block of CP210X_ registers, identified by req. + * Returns data into buf in native USB byte order. + */ +static int cp210x_read_reg_block(struct usb_serial_port *port, u8 req, + void *buf, int bufsize) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + void *dmabuf; + int result; + + dmabuf = kmalloc(bufsize, GFP_KERNEL); + if (!dmabuf) { + /* + * FIXME Some callers don't bother to check for error, + * at least give them consistent junk until they are fixed + */ + memset(buf, 0, bufsize); + return -ENOMEM; + } + + result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + req, REQTYPE_INTERFACE_TO_HOST, 0, + port_priv->bInterfaceNumber, dmabuf, bufsize, + USB_CTRL_SET_TIMEOUT); + if (result == bufsize) { + memcpy(buf, dmabuf, bufsize); + result = 0; + } else { + dev_err(&port->dev, "failed get req 0x%x size %d status: %d\n", + req, bufsize, result); + if (result >= 0) + result = -EPROTO; + + /* + * FIXME Some callers don't bother to check for error, + * at least give them consistent junk until they are fixed + */ + memset(buf, 0, bufsize); + } + + kfree(dmabuf); + + return result; +} + +/* + * Reads any 32-bit CP210X_ register identified by req. + */ +static int cp210x_read_u32_reg(struct usb_serial_port *port, u8 req, u32 *val) +{ + __le32 le32_val; + int err; + + err = cp210x_read_reg_block(port, req, &le32_val, sizeof(le32_val)); + if (err) { + /* + * FIXME Some callers don't bother to check for error, + * at least give them consistent junk until they are fixed + */ + *val = 0; + return err; + } + + *val = le32_to_cpu(le32_val); + + return 0; +} + +/* + * Reads any 16-bit CP210X_ register identified by req. + */ +static int cp210x_read_u16_reg(struct usb_serial_port *port, u8 req, u16 *val) +{ + __le16 le16_val; + int err; + + err = cp210x_read_reg_block(port, req, &le16_val, sizeof(le16_val)); + if (err) + return err; + + *val = le16_to_cpu(le16_val); + + return 0; +} + +/* + * Reads any 8-bit CP210X_ register identified by req. + */ +static int cp210x_read_u8_reg(struct usb_serial_port *port, u8 req, u8 *val) +{ + return cp210x_read_reg_block(port, req, val, sizeof(*val)); +} + +/* + * Writes any 16-bit CP210X_ register (req) whose value is passed + * entirely in the wValue field of the USB request. + */ +static int cp210x_write_u16_reg(struct usb_serial_port *port, u8 req, u16 val) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + int result; + + result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), + req, REQTYPE_HOST_TO_INTERFACE, val, + port_priv->bInterfaceNumber, NULL, 0, + USB_CTRL_SET_TIMEOUT); + if (result < 0) { + dev_err(&port->dev, "failed set request 0x%x status: %d\n", + req, result); + } + + return result; +} + +/* + * Writes a variable-sized block of CP210X_ registers, identified by req. + * Data in buf must be in native USB byte order. + */ +static int cp210x_write_reg_block(struct usb_serial_port *port, u8 req, + void *buf, int bufsize) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + void *dmabuf; + int result; + + dmabuf = kmemdup(buf, bufsize, GFP_KERNEL); + if (!dmabuf) + return -ENOMEM; + + result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), + req, REQTYPE_HOST_TO_INTERFACE, 0, + port_priv->bInterfaceNumber, dmabuf, bufsize, + USB_CTRL_SET_TIMEOUT); + + kfree(dmabuf); + + if (result == bufsize) { + result = 0; + } else { + dev_err(&port->dev, "failed set req 0x%x size %d status: %d\n", + req, bufsize, result); + if (result >= 0) + result = -EPROTO; + } + + return result; +} + +/* + * Writes any 32-bit CP210X_ register identified by req. + */ +static int cp210x_write_u32_reg(struct usb_serial_port *port, u8 req, u32 val) +{ + __le32 le32_val; + + le32_val = cpu_to_le32(val); + + return cp210x_write_reg_block(port, req, &le32_val, sizeof(le32_val)); +} + +/* + * Detect CP2108 GET_LINE_CTL bug and activate workaround. + * Write a known good value 0x800, read it back. + * If it comes back swapped the bug is detected. + * Preserve the original register value. + */ +static int cp210x_detect_swapped_line_ctl(struct usb_serial_port *port) +{ + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + u16 line_ctl_save; + u16 line_ctl_test; + int err; + + err = cp210x_read_u16_reg(port, CP210X_GET_LINE_CTL, &line_ctl_save); + if (err) + return err; + + err = cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, 0x800); + if (err) + return err; + + err = cp210x_read_u16_reg(port, CP210X_GET_LINE_CTL, &line_ctl_test); + if (err) + return err; + + if (line_ctl_test == 8) { + port_priv->has_swapped_line_ctl = true; + line_ctl_save = swab16(line_ctl_save); + } + + return cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, line_ctl_save); +} + +/* + * Must always be called instead of cp210x_read_u16_reg(CP210X_GET_LINE_CTL) + * to workaround cp2108 bug and get correct value. + */ +static int cp210x_get_line_ctl(struct usb_serial_port *port, u16 *ctl) +{ + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + int err; + + err = cp210x_read_u16_reg(port, CP210X_GET_LINE_CTL, ctl); + if (err) + return err; + + /* Workaround swapped bytes in 16-bit value from CP210X_GET_LINE_CTL */ + if (port_priv->has_swapped_line_ctl) + *ctl = swab16(*ctl); + + return 0; +} + +/* + * cp210x_quantise_baudrate + * Quantises the baud rate as per AN205 Table 1 + */ +static unsigned int cp210x_quantise_baudrate(unsigned int baud) +{ + if (baud <= 300) + baud = 300; + else if (baud <= 600) baud = 600; + else if (baud <= 1200) baud = 1200; + else if (baud <= 1800) baud = 1800; + else if (baud <= 2400) baud = 2400; + else if (baud <= 4000) baud = 4000; + else if (baud <= 4803) baud = 4800; + else if (baud <= 7207) baud = 7200; + else if (baud <= 9612) baud = 9600; + else if (baud <= 14428) baud = 14400; + else if (baud <= 16062) baud = 16000; + else if (baud <= 19250) baud = 19200; + else if (baud <= 28912) baud = 28800; + else if (baud <= 38601) baud = 38400; + else if (baud <= 51558) baud = 51200; + else if (baud <= 56280) baud = 56000; + else if (baud <= 58053) baud = 57600; + else if (baud <= 64111) baud = 64000; + else if (baud <= 77608) baud = 76800; + else if (baud <= 117028) baud = 115200; + else if (baud <= 129347) baud = 128000; + else if (baud <= 156868) baud = 153600; + else if (baud <= 237832) baud = 230400; + else if (baud <= 254234) baud = 250000; + else if (baud <= 273066) baud = 256000; + else if (baud <= 491520) baud = 460800; + else if (baud <= 567138) baud = 500000; + else if (baud <= 670254) baud = 576000; + else if (baud < 1000000) + baud = 921600; + else if (baud > 2000000) + baud = 2000000; + return baud; +} + +static int cp210x_open(struct tty_struct *tty, struct usb_serial_port *port) +{ + int result; + + result = cp210x_write_u16_reg(port, CP210X_IFC_ENABLE, UART_ENABLE); + if (result) { + dev_err(&port->dev, "%s - Unable to enable UART\n", __func__); + return result; + } + + /* Configure the termios structure */ + cp210x_get_termios(tty, port); + + /* The baud rate must be initialised on cp2104 */ + if (tty) + cp210x_change_speed(tty, port, NULL); + + return usb_serial_generic_open(tty, port); +} + +static void cp210x_close(struct usb_serial_port *port) +{ + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + + usb_serial_generic_close(port); + + /* Clear both queues; cp2108 needs this to avoid an occasional hang */ + cp210x_write_u16_reg(port, CP210X_PURGE, PURGE_ALL); + + cp210x_write_u16_reg(port, CP210X_IFC_ENABLE, UART_DISABLE); + + if (port_priv->is_cp2102n_a01) { + int result; + + struct usb_serial *serial = port->serial; + + result = usb_control_msg(serial->dev, + usb_sndctrlpipe(serial->dev, 0), + 0x09, + 0, + 0, + port_priv->bInterfaceNumber, + NULL, + 0, + USB_CTRL_SET_TIMEOUT); + + + if (result < 0) { + dev_err(&port->dev, "failed usb_control_msg (1) request 0x09 status: %d\n", + result); + } + + result = usb_control_msg(serial->dev, + usb_sndctrlpipe(serial->dev, 0), + 0x09, + 0, + 1, + port_priv->bInterfaceNumber, + NULL, + 0, + USB_CTRL_SET_TIMEOUT); + + + if (result < 0) { + dev_err(&port->dev, "failed usb_control_msg (2) request 0x09 status: %d\n", + result); + } + + + result = usb_control_msg(serial->dev, + usb_sndctrlpipe(serial->dev, 0), + 0x01, + 2, + 0, + 0x82, + NULL, + 0, + USB_CTRL_SET_TIMEOUT); + + + if (result < 0) { + dev_err(&port->dev, "failed usb_control_msg (1) request 0x01 status: %d\n", + result); + } + + result = usb_control_msg(serial->dev, + usb_sndctrlpipe(serial->dev, 0), + 0x01, + 2, + 0, + 0x02, + NULL, + 0, + USB_CTRL_SET_TIMEOUT); + + + if (result < 0) { + dev_err(&port->dev, "failed usb_control_msg (2) request 0x01 status: %d\n", + result); + } + + } +} + +/* + * Reads a variable-sized vendor-specific register identified by wValue + * Returns data into buf in native USB byte order. + */ +static int cp210x_read_vs_reg_block(struct usb_serial_port *port, u8 bmRequestType, + u16 wValue, void *buf, int bufsize) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + void *dmabuf; + int result; + + dmabuf = kmalloc(bufsize, GFP_KERNEL); + if (!dmabuf) + return -ENOMEM; + + result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + CP210X_VENDOR_SPECIFIC, bmRequestType, wValue, + port_priv->bInterfaceNumber, dmabuf, bufsize, + USB_CTRL_SET_TIMEOUT); + if (result == bufsize) { + memcpy(buf, dmabuf, bufsize); + result = 0; + } else { + dev_err(&port->dev, "failed get VENDOR_SPECIFIC wValue 0x%x size %d status: %d\n", + wValue, bufsize, result); + if (result >= 0) + result = -EPROTO; + } + + kfree(dmabuf); + + return result; +} + +/* GPIO register read from single-interface CP210x */ +static int cp210x_read_device_gpio_u8(struct usb_serial_port *port, u8 *val) +{ + return cp210x_read_vs_reg_block(port, REQTYPE_DEVICE_TO_HOST, CP210X_READ_LATCH, val, 1); +} + +/* GPIO register read from CP2105 */ +static int cp210x_read_interface_gpio_u8(struct usb_serial_port *port, u8 *val) +{ + return cp210x_read_vs_reg_block(port, REQTYPE_INTERFACE_TO_HOST, CP210X_READ_LATCH, val, 1); +} + +/* GPIO register read from CP2108 */ +static int cp210x_read_device_gpio_u16(struct usb_serial_port *port, u16 *val) +{ + __le16 le16_val; + int err = cp210x_read_vs_reg_block(port, REQTYPE_DEVICE_TO_HOST, CP210X_READ_LATCH, &le16_val, 2); + if (err) + return err; + + *val = le16_to_cpu(le16_val); + return 0; +} + +/* GPIO register write to single-interface CP210x */ +static int cp210x_write_device_gpio_u16(struct usb_serial_port *port, u16 val) +{ + int result; + + result = usb_control_msg(port->serial->dev, + usb_sndctrlpipe(port->serial->dev, 0), + CP210X_VENDOR_SPECIFIC, + REQTYPE_HOST_TO_DEVICE, + CP210X_WRITE_LATCH, /* wValue */ + val, /* wIndex */ + NULL, 0, USB_CTRL_SET_TIMEOUT); + if (result != 0) { + dev_err(&port->dev, "failed set WRITE_LATCH status: %d\n", + result); + if (result >= 0) + result = -EPROTO; + } + return result; +} + +/* + * Writes a variable-sized block of CP210X_ registers, identified by req. + * Data in buf must be in native USB byte order. + */ +static int cp210x_write_gpio_reg_block(struct usb_serial_port *port, + u8 bmRequestType, void *buf, int bufsize) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + void *dmabuf; + int result; + + dmabuf = kmalloc(bufsize, GFP_KERNEL); + if (!dmabuf) + return -ENOMEM; + + memcpy(dmabuf, buf, bufsize); + + result = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), + CP210X_VENDOR_SPECIFIC, bmRequestType, + CP210X_WRITE_LATCH, /* wValue */ + port_priv->bInterfaceNumber, /* wIndex */ + dmabuf, bufsize, + USB_CTRL_SET_TIMEOUT); + + kfree(dmabuf); + + if (result == bufsize) { + result = 0; + } else { + dev_err(&port->dev, "failed set WRITE_LATCH size %d status: %d\n", + bufsize, result); + if (result >= 0) + result = -EPROTO; + } + + return result; +} + +/* GPIO register write to CP2105 */ +static int cp210x_write_interface_gpio_u16(struct usb_serial_port *port, u16 val) +{ + __le16 le16_val = cpu_to_le16(val); + + return cp210x_write_gpio_reg_block(port, REQTYPE_HOST_TO_INTERFACE, &le16_val, 2); +} + +/* GPIO register write to CP2108 */ +static int cp210x_write_device_gpio_u32(struct usb_serial_port *port, u32 val) +{ + __le32 le32_val = cpu_to_le32(val); + + return cp210x_write_gpio_reg_block(port, REQTYPE_HOST_TO_DEVICE, &le32_val, 4); +} + +static int cp210x_ioctl(struct tty_struct *tty, + unsigned int cmd, unsigned long arg) +{ + struct usb_serial_port *port = tty->driver_data; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + + switch (cmd) { + case IOCTL_GPIOGET: + if ((port_priv->bPartNumber == CP2103_PARTNUM) || + (port_priv->bPartNumber == CP2104_PARTNUM) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN28) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN24) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN20)) { + u8 gpio; + int err = cp210x_read_device_gpio_u8(port, &gpio); + if (err) + return err; + if (copy_to_user((void*)arg, &gpio, sizeof(gpio))) + return -EFAULT; + return 0; + } + else if (port_priv->bPartNumber == CP2105_PARTNUM) { + u8 gpio; + int err = cp210x_read_interface_gpio_u8(port, &gpio); + if (err) + return err; + if (copy_to_user((void*)arg, &gpio, sizeof(gpio))) + return -EFAULT; + return 0; + } + else if (port_priv->bPartNumber == CP2108_PARTNUM) { + u16 gpio; + int err = cp210x_read_device_gpio_u16(port, &gpio); + if (err) + return err; + if (copy_to_user((void*)arg, &gpio, sizeof(gpio))) + return -EFAULT; + return 0; + } + else { + return -ENOTSUPP; + } + break; + case IOCTL_GPIOSET: + if ((port_priv->bPartNumber == CP2103_PARTNUM) || + (port_priv->bPartNumber == CP2104_PARTNUM) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN28) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN24) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN20)) { + u16 gpio; + if (copy_from_user(&gpio, (void*)arg, sizeof(gpio))) + return -EFAULT; + return cp210x_write_device_gpio_u16(port, gpio); + } + else if (port_priv->bPartNumber == CP2105_PARTNUM) { + u16 gpio; + if (copy_from_user(&gpio, (void*)arg, sizeof(gpio))) + return -EFAULT; + return cp210x_write_interface_gpio_u16(port, gpio); + } + else if (port_priv->bPartNumber == CP2108_PARTNUM) { + u32 gpio; + if (copy_from_user(&gpio, (void*)arg, sizeof(gpio))) + return -EFAULT; + return cp210x_write_device_gpio_u32(port, gpio); + } + else { + return -ENOTSUPP; + } + break; + + default: + break; + } + + return -ENOIOCTLCMD; +} + +/* + * Read how many bytes are waiting in the TX queue. + */ +static int cp210x_get_tx_queue_byte_count(struct usb_serial_port *port, + u32 *count) +{ + struct usb_serial *serial = port->serial; + struct cp210x_port_private *port_priv = usb_get_serial_port_data(port); + struct cp210x_comm_status *sts; + int result; + + sts = kmalloc(sizeof(*sts), GFP_KERNEL); + if (!sts) + return -ENOMEM; + + result = usb_control_msg(serial->dev, usb_rcvctrlpipe(serial->dev, 0), + CP210X_GET_COMM_STATUS, REQTYPE_INTERFACE_TO_HOST, + 0, port_priv->bInterfaceNumber, sts, sizeof(*sts), + USB_CTRL_GET_TIMEOUT); + if (result == sizeof(*sts)) { + *count = le32_to_cpu(sts->ulAmountInOutQueue); + result = 0; + } else { + dev_err(&port->dev, "failed to get comm status: %d\n", result); + if (result >= 0) + result = -EPROTO; + } + + kfree(sts); + + return result; +} + +static bool cp210x_tx_empty(struct usb_serial_port *port) +{ + int err; + u32 count; + + err = cp210x_get_tx_queue_byte_count(port, &count); + if (err) + return true; + + return !count; +} + +/* + * cp210x_get_termios + * Reads the baud rate, data bits, parity, stop bits and flow control mode + * from the device, corrects any unsupported values, and configures the + * termios structure to reflect the state of the device + */ +static void cp210x_get_termios(struct tty_struct *tty, + struct usb_serial_port *port) +{ + unsigned int baud; + + if (tty) { + cp210x_get_termios_port(tty->driver_data, + &tty->termios.c_cflag, &baud); + tty_encode_baud_rate(tty, baud, baud); + } else { + tcflag_t cflag; + cflag = 0; + cp210x_get_termios_port(port, &cflag, &baud); + } +} + +/* + * cp210x_get_termios_port + * This is the heart of cp210x_get_termios which always uses a &usb_serial_port. + */ +static void cp210x_get_termios_port(struct usb_serial_port *port, + tcflag_t *cflagp, unsigned int *baudp) +{ + struct device *dev = &port->dev; + tcflag_t cflag; + struct cp210x_flow_ctl flow_ctl; + u32 baud; + u16 bits; + u32 ctl_hs; + + cp210x_read_u32_reg(port, CP210X_GET_BAUDRATE, &baud); + + dev_dbg(dev, "%s - baud rate = %d\n", __func__, baud); + *baudp = baud; + + cflag = *cflagp; + + cp210x_get_line_ctl(port, &bits); + cflag &= ~CSIZE; + switch (bits & BITS_DATA_MASK) { + case BITS_DATA_5: + dev_dbg(dev, "%s - data bits = 5\n", __func__); + cflag |= CS5; + break; + case BITS_DATA_6: + dev_dbg(dev, "%s - data bits = 6\n", __func__); + cflag |= CS6; + break; + case BITS_DATA_7: + dev_dbg(dev, "%s - data bits = 7\n", __func__); + cflag |= CS7; + break; + case BITS_DATA_8: + dev_dbg(dev, "%s - data bits = 8\n", __func__); + cflag |= CS8; + break; + case BITS_DATA_9: + dev_dbg(dev, "%s - data bits = 9 (not supported, using 8 data bits)\n", __func__); + cflag |= CS8; + bits &= ~BITS_DATA_MASK; + bits |= BITS_DATA_8; + cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits); + break; + default: + dev_dbg(dev, "%s - Unknown number of data bits, using 8\n", __func__); + cflag |= CS8; + bits &= ~BITS_DATA_MASK; + bits |= BITS_DATA_8; + cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits); + break; + } + + switch (bits & BITS_PARITY_MASK) { + case BITS_PARITY_NONE: + dev_dbg(dev, "%s - parity = NONE\n", __func__); + cflag &= ~PARENB; + break; + case BITS_PARITY_ODD: + dev_dbg(dev, "%s - parity = ODD\n", __func__); + cflag |= (PARENB|PARODD); + break; + case BITS_PARITY_EVEN: + dev_dbg(dev, "%s - parity = EVEN\n", __func__); + cflag &= ~PARODD; + cflag |= PARENB; + break; + case BITS_PARITY_MARK: + dev_dbg(dev, "%s - parity = MARK\n", __func__); + cflag |= (PARENB|PARODD|CMSPAR); + break; + case BITS_PARITY_SPACE: + dev_dbg(dev, "%s - parity = SPACE\n", __func__); + cflag &= ~PARODD; + cflag |= (PARENB|CMSPAR); + break; + default: + dev_dbg(dev, "%s - Unknown parity mode, disabling parity\n", __func__); + cflag &= ~PARENB; + bits &= ~BITS_PARITY_MASK; + cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits); + break; + } + + cflag &= ~CSTOPB; + switch (bits & BITS_STOP_MASK) { + case BITS_STOP_1: + dev_dbg(dev, "%s - stop bits = 1\n", __func__); + break; + case BITS_STOP_1_5: + dev_dbg(dev, "%s - stop bits = 1.5 (not supported, using 1 stop bit)\n", __func__); + bits &= ~BITS_STOP_MASK; + cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits); + break; + case BITS_STOP_2: + dev_dbg(dev, "%s - stop bits = 2\n", __func__); + cflag |= CSTOPB; + break; + default: + dev_dbg(dev, "%s - Unknown number of stop bits, using 1 stop bit\n", __func__); + bits &= ~BITS_STOP_MASK; + cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits); + break; + } + + cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl, + sizeof(flow_ctl)); + ctl_hs = le32_to_cpu(flow_ctl.ulControlHandshake); + if (ctl_hs & CP210X_SERIAL_CTS_HANDSHAKE) { + dev_dbg(dev, "%s - flow control = CRTSCTS\n", __func__); + cflag |= CRTSCTS; + } else { + dev_dbg(dev, "%s - flow control = NONE\n", __func__); + cflag &= ~CRTSCTS; + } + + *cflagp = cflag; +} + +/* + * CP2101 supports the following baud rates: + * + * 300, 600, 1200, 1800, 2400, 4800, 7200, 9600, 14400, 19200, 28800, + * 38400, 56000, 57600, 115200, 128000, 230400, 460800, 921600 + * + * CP2102 and CP2103 support the following additional rates: + * + * 4000, 16000, 51200, 64000, 76800, 153600, 250000, 256000, 500000, + * 576000 + * + * The device will map a requested rate to a supported one, but the result + * of requests for rates greater than 1053257 is undefined (see AN205). + * + * CP2104, CP2105 and CP2110 support most rates up to 2M, 921k and 1M baud, + * respectively, with an error less than 1%. The actual rates are determined + * by + * + * div = round(freq / (2 x prescale x request)) + * actual = freq / (2 x prescale x div) + * + * For CP2104 and CP2105 freq is 48Mhz and prescale is 4 for request <= 365bps + * or 1 otherwise. + * For CP2110 freq is 24Mhz and prescale is 4 for request <= 300bps or 1 + * otherwise. + */ +static void cp210x_change_speed(struct tty_struct *tty, + struct usb_serial_port *port, struct ktermios *old_termios) +{ + u32 baud; + + baud = tty->termios.c_ospeed; + + /* This maps the requested rate to a rate valid on cp2102 or cp2103, + * or to an arbitrary rate in [1M,2M]. + * + * NOTE: B0 is not implemented. + */ + baud = cp210x_quantise_baudrate(baud); + + dev_dbg(&port->dev, "%s - setting baud rate to %u\n", __func__, baud); + if (cp210x_write_u32_reg(port, CP210X_SET_BAUDRATE, baud)) { + dev_warn(&port->dev, "failed to set baud rate to %u\n", baud); + if (old_termios) + baud = old_termios->c_ospeed; + else + baud = 9600; + } + + tty_encode_baud_rate(tty, baud, baud); +} + +static void cp210x_set_termios(struct tty_struct *tty, + struct usb_serial_port *port, struct ktermios *old_termios) +{ + struct device *dev = &port->dev; + unsigned int cflag, old_cflag; + u16 bits; + + cflag = tty->termios.c_cflag; + old_cflag = old_termios->c_cflag; + + if (tty->termios.c_ospeed != old_termios->c_ospeed) + cp210x_change_speed(tty, port, old_termios); + + /* If the number of data bits is to be updated */ + if ((cflag & CSIZE) != (old_cflag & CSIZE)) { + cp210x_get_line_ctl(port, &bits); + bits &= ~BITS_DATA_MASK; + switch (cflag & CSIZE) { + case CS5: + bits |= BITS_DATA_5; + dev_dbg(dev, "%s - data bits = 5\n", __func__); + break; + case CS6: + bits |= BITS_DATA_6; + dev_dbg(dev, "%s - data bits = 6\n", __func__); + break; + case CS7: + bits |= BITS_DATA_7; + dev_dbg(dev, "%s - data bits = 7\n", __func__); + break; + case CS8: + bits |= BITS_DATA_8; + dev_dbg(dev, "%s - data bits = 8\n", __func__); + break; + default: + dev_dbg(dev, "cp210x driver does not support the number of bits requested, using 8 bit mode\n"); + bits |= BITS_DATA_8; + break; + } + if (cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits)) + dev_dbg(dev, "Number of data bits requested not supported by device\n"); + } + + if ((cflag & (PARENB|PARODD|CMSPAR)) != + (old_cflag & (PARENB|PARODD|CMSPAR))) { + cp210x_get_line_ctl(port, &bits); + bits &= ~BITS_PARITY_MASK; + if (cflag & PARENB) { + if (cflag & CMSPAR) { + if (cflag & PARODD) { + bits |= BITS_PARITY_MARK; + dev_dbg(dev, "%s - parity = MARK\n", __func__); + } else { + bits |= BITS_PARITY_SPACE; + dev_dbg(dev, "%s - parity = SPACE\n", __func__); + } + } else { + if (cflag & PARODD) { + bits |= BITS_PARITY_ODD; + dev_dbg(dev, "%s - parity = ODD\n", __func__); + } else { + bits |= BITS_PARITY_EVEN; + dev_dbg(dev, "%s - parity = EVEN\n", __func__); + } + } + } + if (cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits)) + dev_dbg(dev, "Parity mode not supported by device\n"); + } + + if ((cflag & CSTOPB) != (old_cflag & CSTOPB)) { + cp210x_get_line_ctl(port, &bits); + bits &= ~BITS_STOP_MASK; + if (cflag & CSTOPB) { + bits |= BITS_STOP_2; + dev_dbg(dev, "%s - stop bits = 2\n", __func__); + } else { + bits |= BITS_STOP_1; + dev_dbg(dev, "%s - stop bits = 1\n", __func__); + } + if (cp210x_write_u16_reg(port, CP210X_SET_LINE_CTL, bits)) + dev_dbg(dev, "Number of stop bits requested not supported by device\n"); + } + + if ((cflag & CRTSCTS) != (old_cflag & CRTSCTS)) { + struct cp210x_flow_ctl flow_ctl; + u32 ctl_hs; + u32 flow_repl; + + cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl, + sizeof(flow_ctl)); + ctl_hs = le32_to_cpu(flow_ctl.ulControlHandshake); + flow_repl = le32_to_cpu(flow_ctl.ulFlowReplace); + dev_dbg(dev, "%s - read ulControlHandshake=0x%08x, ulFlowReplace=0x%08x\n", + __func__, ctl_hs, flow_repl); + + ctl_hs &= ~CP210X_SERIAL_DSR_HANDSHAKE; + ctl_hs &= ~CP210X_SERIAL_DCD_HANDSHAKE; + ctl_hs &= ~CP210X_SERIAL_DSR_SENSITIVITY; + ctl_hs &= ~CP210X_SERIAL_DTR_MASK; + ctl_hs |= CP210X_SERIAL_DTR_SHIFT(CP210X_SERIAL_DTR_ACTIVE); + if (cflag & CRTSCTS) { + ctl_hs |= CP210X_SERIAL_CTS_HANDSHAKE; + + flow_repl &= ~CP210X_SERIAL_RTS_MASK; + flow_repl |= CP210X_SERIAL_RTS_SHIFT( + CP210X_SERIAL_RTS_FLOW_CTL); + dev_dbg(dev, "%s - flow control = CRTSCTS\n", __func__); + } else { + ctl_hs &= ~CP210X_SERIAL_CTS_HANDSHAKE; + + flow_repl &= ~CP210X_SERIAL_RTS_MASK; + flow_repl |= CP210X_SERIAL_RTS_SHIFT( + CP210X_SERIAL_RTS_ACTIVE); + dev_dbg(dev, "%s - flow control = NONE\n", __func__); + } + + dev_dbg(dev, "%s - write ulControlHandshake=0x%08x, ulFlowReplace=0x%08x\n", + __func__, ctl_hs, flow_repl); + flow_ctl.ulControlHandshake = cpu_to_le32(ctl_hs); + flow_ctl.ulFlowReplace = cpu_to_le32(flow_repl); + cp210x_write_reg_block(port, CP210X_SET_FLOW, &flow_ctl, + sizeof(flow_ctl)); + } + +} + +static int cp210x_tiocmset(struct tty_struct *tty, + unsigned int set, unsigned int clear) +{ + struct usb_serial_port *port = tty->driver_data; + return cp210x_tiocmset_port(port, set, clear); +} + +static int cp210x_tiocmset_port(struct usb_serial_port *port, + unsigned int set, unsigned int clear) +{ + u16 control = 0; + + if (set & TIOCM_RTS) { + control |= CONTROL_RTS; + control |= CONTROL_WRITE_RTS; + } + if (set & TIOCM_DTR) { + control |= CONTROL_DTR; + control |= CONTROL_WRITE_DTR; + } + if (clear & TIOCM_RTS) { + control &= ~CONTROL_RTS; + control |= CONTROL_WRITE_RTS; + } + if (clear & TIOCM_DTR) { + control &= ~CONTROL_DTR; + control |= CONTROL_WRITE_DTR; + } + + dev_dbg(&port->dev, "%s - control = 0x%.4x\n", __func__, control); + + return cp210x_write_u16_reg(port, CP210X_SET_MHS, control); +} + +static void cp210x_dtr_rts(struct usb_serial_port *p, int on) +{ + if (on) + cp210x_tiocmset_port(p, TIOCM_DTR|TIOCM_RTS, 0); + else + cp210x_tiocmset_port(p, 0, TIOCM_DTR|TIOCM_RTS); +} + +static int cp210x_tiocmget(struct tty_struct *tty) +{ + struct usb_serial_port *port = tty->driver_data; + u8 control; + int result; + + result = cp210x_read_u8_reg(port, CP210X_GET_MDMSTS, &control); + if (result) + return result; + + result = ((control & CONTROL_DTR) ? TIOCM_DTR : 0) + |((control & CONTROL_RTS) ? TIOCM_RTS : 0) + |((control & CONTROL_CTS) ? TIOCM_CTS : 0) + |((control & CONTROL_DSR) ? TIOCM_DSR : 0) + |((control & CONTROL_RING)? TIOCM_RI : 0) + |((control & CONTROL_DCD) ? TIOCM_CD : 0); + + dev_dbg(&port->dev, "%s - control = 0x%.2x\n", __func__, control); + + return result; +} + +static void cp210x_break_ctl(struct tty_struct *tty, int break_state) +{ + struct usb_serial_port *port = tty->driver_data; + u16 state; + + if (break_state == 0) + state = BREAK_OFF; + else + state = BREAK_ON; + dev_dbg(&port->dev, "%s - turning break %s\n", __func__, + state == BREAK_OFF ? "off" : "on"); + cp210x_write_u16_reg(port, CP210X_SET_BREAK, state); +} + +static int cp210x_port_probe(struct usb_serial_port *port) +{ + struct usb_serial *serial = port->serial; + struct usb_host_interface *cur_altsetting; + struct cp210x_port_private *port_priv; + int ret; + + port_priv = kzalloc(sizeof(*port_priv), GFP_KERNEL); + if (!port_priv) + return -ENOMEM; + + cur_altsetting = serial->interface->cur_altsetting; + port_priv->bInterfaceNumber = cur_altsetting->desc.bInterfaceNumber; + port_priv->is_cp2102n_a01 = false; + + usb_set_serial_port_data(port, port_priv); + + ret = cp210x_read_vs_reg_block(port, REQTYPE_DEVICE_TO_HOST, + CP210X_GET_PARTNUM, &port_priv->bPartNumber, 1); + if (ret) { + kfree(port_priv); + return ret; + } + + if ((port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN28) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN24) || + (port_priv->bPartNumber == CP210x_PARTNUM_CP2102N_QFN20)) { + // check the firmware version on the CP2102N. + u8 fwversion[3]; + ret = cp210x_read_vs_reg_block(port, REQTYPE_DEVICE_TO_HOST, CP2102N_GET_FW_VERS, fwversion, sizeof(fwversion)); + if (0 == ret) { + if (1 == fwversion[0] && 0 == fwversion[1] && fwversion[2] < 8) { + port_priv->is_cp2102n_a01 = true; + } + } + + } + + ret = cp210x_detect_swapped_line_ctl(port); + if (ret) { + kfree(port_priv); + return ret; + } + + return 0; +} + +static int cp210x_port_remove(struct usb_serial_port *port) +{ + struct cp210x_port_private *port_priv; + + port_priv = usb_get_serial_port_data(port); + kfree(port_priv); + + return 0; +} + +module_usb_serial_driver(serial_drivers, id_table); + +MODULE_DESCRIPTION(DRIVER_DESC); +MODULE_LICENSE("GPL v2"); -- 2.7.4 From 4a0976bd7409c525d8be63ec0b6af3448568bb7f Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 11 Apr 2022 09:48:04 +0900 Subject: [PATCH 05/16] ARM/ARM64: tizen_bcm2711_defconfig: Enable RT_GROUP_SCHED config Enable RT_GROUP_SCHED configuration to use the cgroup scheduling. Change-Id: Iab166b7ab312a780b77316118194af1999abc4fb Signed-off-by: Jaehoon Chung --- arch/arm/configs/tizen_bcm2711_defconfig | 1 + arch/arm64/configs/tizen_bcm2711_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm/configs/tizen_bcm2711_defconfig b/arch/arm/configs/tizen_bcm2711_defconfig index 0465bdb..a96d9e6 100644 --- a/arch/arm/configs/tizen_bcm2711_defconfig +++ b/arch/arm/configs/tizen_bcm2711_defconfig @@ -15,6 +15,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y +CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 8ebb4dd..5ba404b 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -15,6 +15,7 @@ CONFIG_TASK_XACCT=y CONFIG_TASK_IO_ACCOUNTING=y CONFIG_MEMCG=y CONFIG_BLK_CGROUP=y +CONFIG_RT_GROUP_SCHED=y CONFIG_CGROUP_FREEZER=y CONFIG_CPUSETS=y CONFIG_CGROUP_DEVICE=y -- 2.7.4 From 673a67c22acdc78d685d4af172565b0627caaba0 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 19 Apr 2022 20:11:40 +0900 Subject: [PATCH 06/16] ARM64: tizen_bcm2711_defconfig: enable configs reelvant to RT2800USB To use USB Wifi, enable configs relevant to RT2800USB. Change-Id: Ib0b0f6f65d0e2a6420e2fd220a29fcccbce68c94 Signed-off-by: Jaehoon Chung --- arch/arm64/configs/tizen_bcm2711_defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 5ba404b..105b623 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -251,6 +251,11 @@ CONFIG_BRCMFMAC=m CONFIG_BRCMDBG=y CONFIG_RT2X00=y CONFIG_RT2500USB=y +CONFIG_RT73USB=m +CONFIG_RT2800USB=m +CONFIG_RT2800USB_RT3573=y +CONFIG_RT2800USB_RT53XX=y +CONFIG_RT2800USB_RT55XX=y CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y -- 2.7.4 From e02b8df53ba757df995ac2a23a0e7724d48f0478 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 26 Apr 2022 13:36:16 +0900 Subject: [PATCH 07/16] net: rtl8192cu: fix wrong configuration in Makefile RTL8192CU_VENDOR is to distinguish with mainline and non-mainlne. RTL8192CU_VENDOR is for non-mainline. So it needs to use CONFIG_RTL8192CU_VENDOR instead of CONFIG_RTL8182CU. It's why rtl8192cu dongle doesn't work in RPI4 target. Change-Id: I93a50f5db3a965fdb4d8ee1e5a70d3956d5f6679 Signed-off-by: Jaehoon Chung --- drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile index 8b3921f..f1e4ef2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/Makefile @@ -10,4 +10,4 @@ rtl8192cu-objs := \ table.o \ trx.o -obj-$(CONFIG_RTL8192CU) += rtl8192cu.o +obj-$(CONFIG_RTL8192CU_VENDOR) += rtl8192cu.o -- 2.7.4 From 7e503d9d2442106d393a7359890f16df5967aaa8 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 26 Apr 2022 14:33:01 +0900 Subject: [PATCH 08/16] ARM/ARM64: tizen_bcm2711: enable CONFIG_RTL8192CU_VENDOR Enable CONFIG_RTL8192CU_VENDOR as module. Change-Id: Ic3e8954b432bafda47b50dbc1f30c922035435b1 Signed-off-by: Jaehoon Chung --- arch/arm/configs/tizen_bcm2711_defconfig | 1 + arch/arm/configs/tizen_bcm2711_rt_defconfig | 1 + arch/arm64/configs/tizen_bcm2711_defconfig | 1 + arch/arm64/configs/tizen_bcm2711_rt_defconfig | 1 + 4 files changed, 4 insertions(+) diff --git a/arch/arm/configs/tizen_bcm2711_defconfig b/arch/arm/configs/tizen_bcm2711_defconfig index a96d9e6..eb6bbe3 100644 --- a/arch/arm/configs/tizen_bcm2711_defconfig +++ b/arch/arm/configs/tizen_bcm2711_defconfig @@ -253,6 +253,7 @@ CONFIG_RT2800USB=y CONFIG_RT2800USB_RT3573=y CONFIG_RT2800USB_RT53XX=y CONFIG_RT2800USB_RT55XX=y +CONFIG_RTL8192CU_VENDOR=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y diff --git a/arch/arm/configs/tizen_bcm2711_rt_defconfig b/arch/arm/configs/tizen_bcm2711_rt_defconfig index 884b6a2..84ca2c6 100644 --- a/arch/arm/configs/tizen_bcm2711_rt_defconfig +++ b/arch/arm/configs/tizen_bcm2711_rt_defconfig @@ -251,6 +251,7 @@ CONFIG_RT2800USB=y CONFIG_RT2800USB_RT3573=y CONFIG_RT2800USB_RT53XX=y CONFIG_RT2800USB_RT55XX=y +CONFIG_RTL8192CU_VENDOR=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 105b623..67eb5f1 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -256,6 +256,7 @@ CONFIG_RT2800USB=m CONFIG_RT2800USB_RT3573=y CONFIG_RT2800USB_RT53XX=y CONFIG_RT2800USB_RT55XX=y +CONFIG_RTL8192CU_VENDOR=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y diff --git a/arch/arm64/configs/tizen_bcm2711_rt_defconfig b/arch/arm64/configs/tizen_bcm2711_rt_defconfig index 0b18a41..47a24b6 100644 --- a/arch/arm64/configs/tizen_bcm2711_rt_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_rt_defconfig @@ -247,6 +247,7 @@ CONFIG_BRCMFMAC=m CONFIG_BRCMDBG=y CONFIG_RT2X00=y CONFIG_RT2500USB=y +CONFIG_RTL8192CU_VENDOR=m CONFIG_INPUT_MOUSEDEV=y CONFIG_INPUT_JOYDEV=m CONFIG_INPUT_EVDEV=y -- 2.7.4 From bc771c4b5a58c15ec1527bfc9290e300e792886b Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Thu, 19 May 2022 20:53:02 +0900 Subject: [PATCH 09/16] mm: LKSM: fix a bug in the filter length calculation Since type of a size variable in the calculation of the filter length is signed integer, it can make a buggy situation when the size of a vma is bigger than a range of a signed integer variable. In this case, the size is treated as a negative number. As a result, it incurs a kernel bug at BUG_ON(size < 0). This patch changes types of variables and members in lksm_region data structure from signed integer to unsigned long. Additionally, remove an assertion BUG_ON(size < 0) because the size never be a negative number. Change-Id: I822a2ba5d372596ee55dcbec90a019fdf1dc2416 Signed-off-by: Sung-hun Kim --- mm/lksm.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/mm/lksm.c b/mm/lksm.c index 31d8601..ff9fdd0 100644 --- a/mm/lksm.c +++ b/mm/lksm.c @@ -550,12 +550,12 @@ static int region_share[LKSM_REGION_UNKNOWN + 1]; struct lksm_region { enum lksm_region_type type; - int len; int ino; int merge_cnt; int filter_cnt; int scan_round; int conflict; + unsigned long len; atomic_t refcount; struct hlist_node hnode; struct lksm_region *next; @@ -582,7 +582,7 @@ static unsigned int lksm_nr_regions; /* the upper limit for region lookup */ #define LKSM_REGION_ITER_MAX 8 -#define lksm_region_size(start, end) ((int)(end - start) >> PAGE_SHIFT) +#define lksm_region_size(start, end) ((end - start) >> PAGE_SHIFT) #define lksm_bitmap_size(size) ((size >> 6) + ((size % BITS_PER_LONG) ? 1 : 0)) /* all processes share one lksm_region for their heaps */ @@ -2915,9 +2915,9 @@ static void lksm_insert_mm_slot_ordered(struct mm_slot *slot) */ static inline void __lksm_copy_filter -(unsigned long *orig, unsigned long *newer, int size) +(unsigned long *orig, unsigned long *newer, unsigned long size) { - while (--size >= 0) + while (size-- > 0) *(newer++) = *(orig++); } @@ -2964,8 +2964,8 @@ static struct vm_area_struct *lksm_find_next_vma else if (region->type != LKSM_REGION_HEAP && region->type != LKSM_REGION_CONFLICT && region->type != LKSM_REGION_UNKNOWN) { - int size = lksm_region_size(vma->vm_start, vma->vm_end); - int len = (size > BITS_PER_LONG) ? lksm_bitmap_size(size) + unsigned long size = lksm_region_size(vma->vm_start, vma->vm_end); + unsigned long len = (size > BITS_PER_LONG) ? lksm_bitmap_size(size) : SINGLE_FILTER_LEN; if (len > SINGLE_FILTER_LEN && unlikely(region->len != len)) { @@ -2982,7 +2982,7 @@ static struct vm_area_struct *lksm_find_next_vma } if (region->len < len) { unsigned long *filter; - ksm_debug("size of region(%p) is changed: %d -> %d (size: %d)", + ksm_debug("size of region(%p) is changed: %lu -> %lu (size: %lu)", region, region->len, len, size); filter = kcalloc(len, sizeof(long), GFP_KERNEL); if (!filter) { @@ -4815,7 +4815,7 @@ static const struct attribute_group ksm_attr_group = { #ifdef CONFIG_LKSM_FILTER static inline void init_lksm_region -(struct lksm_region *region, unsigned long ino, int type, int len) +(struct lksm_region *region, unsigned long ino, int type, unsigned long len) { region->ino = ino; region->type = type; @@ -4827,12 +4827,12 @@ static void lksm_insert_region (struct lksm_region **region, unsigned long ino, struct vm_area_struct *vma, int type) { - int size, len, need_hash_add = 0; + int need_hash_add = 0; + unsigned long len, size; struct lksm_region *next = NULL; unsigned long flags; size = lksm_region_size(vma->vm_start, vma->vm_end); - BUG_ON(size < 0); len = (size > BITS_PER_LONG) ? lksm_bitmap_size(size) : SINGLE_FILTER_LEN; if (!(*region)) { -- 2.7.4 From 15c58f0ece0d67befde84866ee8fa6d862f33a57 Mon Sep 17 00:00:00 2001 From: Sung-hun Kim Date: Thu, 2 Jun 2022 17:32:47 +0900 Subject: [PATCH 10/16] ARM64: tizen_bcm2711: disable configurations for fTHP Disable configurations for fTHP (CONFIG_TRANSPARENT_HUGEPAGE, CONFIG_READ_ONLY_THP_FOR_FS, CONFIG_FINEGRAINED_THP) on Tizen-7.0. Change-Id: If17a7a582a019ad7b45b982102282aa12d4bd23f Signed-off-by: Sung-hun Kim --- arch/arm64/configs/tizen_bcm2711_defconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/configs/tizen_bcm2711_defconfig b/arch/arm64/configs/tizen_bcm2711_defconfig index 67eb5f1..db97098 100644 --- a/arch/arm64/configs/tizen_bcm2711_defconfig +++ b/arch/arm64/configs/tizen_bcm2711_defconfig @@ -63,13 +63,10 @@ CONFIG_MAC_PARTITION=y CONFIG_BINFMT_MISC=m CONFIG_LKSM=y CONFIG_LKSM_FILTER=y -CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_CMA=y CONFIG_ZSMALLOC=y -CONFIG_READ_ONLY_THP_FOR_FS=y -CONFIG_FINEGRAINED_THP=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y -- 2.7.4 From 6d12503994b0e656e7a12ac98da564060fa81d61 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 25 Feb 2021 17:17:37 -0800 Subject: [PATCH 11/16] arm64/mm: define arch_get_mappable_range() This overrides arch_get_mappable_range() on arm64 platform which will be used with recently added generic framework. It drops inside_linear_region() and subsequent check in arch_add_memory() which are no longer required. It also adds a VM_BUG_ON() check that would ensure that mhp_range_allowed() has already been called. Link: https://lkml.kernel.org/r/1612149902-7867-3-git-send-email-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: David Hildenbrand Reviewed-by: Catalin Marinas Cc: Will Deacon Cc: Ard Biesheuvel Cc: Mark Rutland Cc: Heiko Carstens Cc: Jason Wang Cc: Jonathan Cameron Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Oscar Salvador Cc: Pankaj Gupta Cc: Pankaj Gupta Cc: teawater Cc: Vasily Gorbik Cc: Wei Yang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I6c68c5b0382419a98a3ae03045790e0078e47898 --- arch/arm64/mm/mmu.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 219f995..c68e444 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1456,8 +1456,9 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) free_empty_tables(start, end, PAGE_OFFSET, PAGE_END); } -static bool inside_linear_region(u64 start, u64 size) +struct range arch_get_mappable_range(void) { + struct range mhp_range; u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual)); u64 end_linear_pa = __pa(PAGE_END - 1); @@ -1481,7 +1482,10 @@ static bool inside_linear_region(u64 start, u64 size) * range which can be mapped inside this linear mapping range, must * also be derived from its end points. */ - return start >= start_linear_pa && (start + size - 1) <= end_linear_pa; + mhp_range.start = start_linear_pa; + mhp_range.end = end_linear_pa; + + return mhp_range; } int arch_add_memory(int nid, u64 start, u64 size, @@ -1489,11 +1493,7 @@ int arch_add_memory(int nid, u64 start, u64 size, { int ret, flags = 0; - if (!inside_linear_region(start, size)) { - pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size); - return -EINVAL; - } - + VM_BUG_ON(!mhp_range_allowed(start, size, true)); if (rodata_full || debug_pagealloc_enabled()) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; -- 2.7.4 From c83b114a131cbcd8b38beae81f0edd29e32546cb Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 3 Nov 2020 18:58:33 +0100 Subject: [PATCH 12/16] mm: add Kernel Electric-Fence infrastructure This adds the Kernel Electric-Fence (KFENCE) infrastructure. KFENCE is a low-overhead sampling-based memory safety error detector of heap use-after-free, invalid-free, and out-of-bounds access errors. KFENCE is designed to be enabled in production kernels, and has near zero performance overhead. Compared to KASAN, KFENCE trades performance for precision. The main motivation behind KFENCE's design, is that with enough total uptime KFENCE will detect bugs in code paths not typically exercised by non-production test workloads. One way to quickly achieve a large enough total uptime is when the tool is deployed across a large fleet of machines. KFENCE objects each reside on a dedicated page, at either the left or right page boundaries. The pages to the left and right of the object page are "guard pages", whose attributes are changed to a protected state, and cause page faults on any attempted access to them. Such page faults are then intercepted by KFENCE, which handles the fault gracefully by reporting a memory access error. To detect out-of-bounds writes to memory within the object's page itself, KFENCE also uses pattern-based redzones. The following figure illustrates the page layout: ---+-----------+-----------+-----------+-----------+-----------+--- | xxxxxxxxx | O : | xxxxxxxxx | : O | xxxxxxxxx | | xxxxxxxxx | B : | xxxxxxxxx | : B | xxxxxxxxx | | x GUARD x | J : RED- | x GUARD x | RED- : J | x GUARD x | | xxxxxxxxx | E : ZONE | xxxxxxxxx | ZONE : E | xxxxxxxxx | | xxxxxxxxx | C : | xxxxxxxxx | : C | xxxxxxxxx | | xxxxxxxxx | T : | xxxxxxxxx | : T | xxxxxxxxx | ---+-----------+-----------+-----------+-----------+-----------+--- Guarded allocations are set up based on a sample interval (can be set via kfence.sample_interval). After expiration of the sample interval, a guarded allocation from the KFENCE object pool is returned to the main allocator (SLAB or SLUB). At this point, the timer is reset, and the next allocation is set up after the expiration of the interval. To enable/disable a KFENCE allocation through the main allocator's fast-path without overhead, KFENCE relies on static branches via the static keys infrastructure. The static branch is toggled to redirect the allocation to KFENCE. To date, we have verified by running synthetic benchmarks (sysbench I/O, hackbench) that a kernel compiled with KFENCE is performance-neutral compared to the non-KFENCE baseline. For more details, see Documentation/dev-tools/kfence.rst (added later in the series). Reviewed-by: Dmitry Vyukov Reviewed-by: SeongJae Park Co-developed-by: Marco Elver Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko Reviewed-by: Jann Horn [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I03cc6246dd8436b86e0681a1b3015bf0d142db67 --- include/linux/kfence.h | 201 ++++++++++++ init/main.c | 3 + lib/Kconfig.debug | 1 + lib/Kconfig.kfence | 57 ++++ mm/Makefile | 1 + mm/kfence/Makefile | 3 + mm/kfence/core.c | 822 +++++++++++++++++++++++++++++++++++++++++++++++++ mm/kfence/kfence.h | 107 +++++++ mm/kfence/report.c | 235 ++++++++++++++ 9 files changed, 1430 insertions(+) create mode 100644 include/linux/kfence.h create mode 100644 lib/Kconfig.kfence create mode 100644 mm/kfence/Makefile create mode 100644 mm/kfence/core.c create mode 100644 mm/kfence/kfence.h create mode 100644 mm/kfence/report.c diff --git a/include/linux/kfence.h b/include/linux/kfence.h new file mode 100644 index 0000000..ed2d48a --- /dev/null +++ b/include/linux/kfence.h @@ -0,0 +1,201 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KFENCE_H +#define _LINUX_KFENCE_H + +#include +#include +#include + +#ifdef CONFIG_KFENCE + +/* + * We allocate an even number of pages, as it simplifies calculations to map + * address to metadata indices; effectively, the very first page serves as an + * extended guard page, but otherwise has no special purpose. + */ +#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE) +extern char *__kfence_pool; + +DECLARE_STATIC_KEY_FALSE(kfence_allocation_key); + +/** + * is_kfence_address() - check if an address belongs to KFENCE pool + * @addr: address to check + * + * Return: true or false depending on whether the address is within the KFENCE + * object range. + * + * KFENCE objects live in a separate page range and are not to be intermixed + * with regular heap objects (e.g. KFENCE objects must never be added to the + * allocator freelists). Failing to do so may and will result in heap + * corruptions, therefore is_kfence_address() must be used to check whether + * an object requires specific handling. + * + * Note: This function may be used in fast-paths, and is performance critical. + * Future changes should take this into account; for instance, we want to avoid + * introducing another load and therefore need to keep KFENCE_POOL_SIZE a + * constant (until immediate patching support is added to the kernel). + */ +static __always_inline bool is_kfence_address(const void *addr) +{ + /* + * The non-NULL check is required in case the __kfence_pool pointer was + * never initialized; keep it in the slow-path after the range-check. + */ + return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr); +} + +/** + * kfence_alloc_pool() - allocate the KFENCE pool via memblock + */ +void __init kfence_alloc_pool(void); + +/** + * kfence_init() - perform KFENCE initialization at boot time + * + * Requires that kfence_alloc_pool() was called before. This sets up the + * allocation gate timer, and requires that workqueues are available. + */ +void __init kfence_init(void); + +/** + * kfence_shutdown_cache() - handle shutdown_cache() for KFENCE objects + * @s: cache being shut down + * + * Before shutting down a cache, one must ensure there are no remaining objects + * allocated from it. Because KFENCE objects are not referenced from the cache + * directly, we need to check them here. + * + * Note that shutdown_cache() is internal to SL*B, and kmem_cache_destroy() does + * not return if allocated objects still exist: it prints an error message and + * simply aborts destruction of a cache, leaking memory. + * + * If the only such objects are KFENCE objects, we will not leak the entire + * cache, but instead try to provide more useful debug info by making allocated + * objects "zombie allocations". Objects may then still be used or freed (which + * is handled gracefully), but usage will result in showing KFENCE error reports + * which include stack traces to the user of the object, the original allocation + * site, and caller to shutdown_cache(). + */ +void kfence_shutdown_cache(struct kmem_cache *s); + +/* + * Allocate a KFENCE object. Allocators must not call this function directly, + * use kfence_alloc() instead. + */ +void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags); + +/** + * kfence_alloc() - allocate a KFENCE object with a low probability + * @s: struct kmem_cache with object requirements + * @size: exact size of the object to allocate (can be less than @s->size + * e.g. for kmalloc caches) + * @flags: GFP flags + * + * Return: + * * NULL - must proceed with allocating as usual, + * * non-NULL - pointer to a KFENCE object. + * + * kfence_alloc() should be inserted into the heap allocation fast path, + * allowing it to transparently return KFENCE-allocated objects with a low + * probability using a static branch (the probability is controlled by the + * kfence.sample_interval boot parameter). + */ +static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) +{ + if (static_branch_unlikely(&kfence_allocation_key)) + return __kfence_alloc(s, size, flags); + return NULL; +} + +/** + * kfence_ksize() - get actual amount of memory allocated for a KFENCE object + * @addr: pointer to a heap object + * + * Return: + * * 0 - not a KFENCE object, must call __ksize() instead, + * * non-0 - this many bytes can be accessed without causing a memory error. + * + * kfence_ksize() returns the number of bytes requested for a KFENCE object at + * allocation time. This number may be less than the object size of the + * corresponding struct kmem_cache. + */ +size_t kfence_ksize(const void *addr); + +/** + * kfence_object_start() - find the beginning of a KFENCE object + * @addr - address within a KFENCE-allocated object + * + * Return: address of the beginning of the object. + * + * SL[AU]B-allocated objects are laid out within a page one by one, so it is + * easy to calculate the beginning of an object given a pointer inside it and + * the object size. The same is not true for KFENCE, which places a single + * object at either end of the page. This helper function is used to find the + * beginning of a KFENCE-allocated object. + */ +void *kfence_object_start(const void *addr); + +/** + * __kfence_free() - release a KFENCE heap object to KFENCE pool + * @addr: object to be freed + * + * Requires: is_kfence_address(addr) + * + * Release a KFENCE object and mark it as freed. + */ +void __kfence_free(void *addr); + +/** + * kfence_free() - try to release an arbitrary heap object to KFENCE pool + * @addr: object to be freed + * + * Return: + * * false - object doesn't belong to KFENCE pool and was ignored, + * * true - object was released to KFENCE pool. + * + * Release a KFENCE object and mark it as freed. May be called on any object, + * even non-KFENCE objects, to simplify integration of the hooks into the + * allocator's free codepath. The allocator must check the return value to + * determine if it was a KFENCE object or not. + */ +static __always_inline __must_check bool kfence_free(void *addr) +{ + if (!is_kfence_address(addr)) + return false; + __kfence_free(addr); + return true; +} + +/** + * kfence_handle_page_fault() - perform page fault handling for KFENCE pages + * @addr: faulting address + * + * Return: + * * false - address outside KFENCE pool, + * * true - page fault handled by KFENCE, no additional handling required. + * + * A page fault inside KFENCE pool indicates a memory error, such as an + * out-of-bounds access, a use-after-free or an invalid memory access. In these + * cases KFENCE prints an error message and marks the offending page as + * present, so that the kernel can proceed. + */ +bool __must_check kfence_handle_page_fault(unsigned long addr); + +#else /* CONFIG_KFENCE */ + +static inline bool is_kfence_address(const void *addr) { return false; } +static inline void kfence_alloc_pool(void) { } +static inline void kfence_init(void) { } +static inline void kfence_shutdown_cache(struct kmem_cache *s) { } +static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; } +static inline size_t kfence_ksize(const void *addr) { return 0; } +static inline void *kfence_object_start(const void *addr) { return NULL; } +static inline void __kfence_free(void *addr) { } +static inline bool __must_check kfence_free(void *addr) { return false; } +static inline bool __must_check kfence_handle_page_fault(unsigned long addr) { return false; } + +#endif + +#endif /* _LINUX_KFENCE_H */ diff --git a/init/main.c b/init/main.c index 4fe58ed..9fb295f 100644 --- a/init/main.c +++ b/init/main.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -827,6 +828,7 @@ static void __init mm_init(void) */ page_ext_init_flatmem(); init_debug_pagealloc(); + kfence_alloc_pool(); report_meminit(); mem_init(); kmem_cache_init(); @@ -952,6 +954,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) hrtimers_init(); softirq_init(); timekeeping_init(); + kfence_init(); /* * For best initial stack canary entropy, prepare it after: diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a601578..e1663cd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -878,6 +878,7 @@ config DEBUG_STACKOVERFLOW If in doubt, say "N". source "lib/Kconfig.kasan" +source "lib/Kconfig.kfence" endmenu # "Memory Debugging" diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence new file mode 100644 index 0000000..b209cd0 --- /dev/null +++ b/lib/Kconfig.kfence @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config HAVE_ARCH_KFENCE + bool + +menuconfig KFENCE + bool "KFENCE: low-overhead sampling-based memory safety error detector" + depends on HAVE_ARCH_KFENCE && !KASAN && (SLAB || SLUB) + depends on JUMP_LABEL # To ensure performance, require jump labels + select STACKTRACE + help + KFENCE is a low-overhead sampling-based detector of heap out-of-bounds + access, use-after-free, and invalid-free errors. KFENCE is designed + to have negligible cost to permit enabling it in production + environments. + + Note that, KFENCE is not a substitute for explicit testing with tools + such as KASAN. KFENCE can detect a subset of bugs that KASAN can + detect, albeit at very different performance profiles. If you can + afford to use KASAN, continue using KASAN, for example in test + environments. If your kernel targets production use, and cannot + enable KASAN due to its cost, consider using KFENCE. + +if KFENCE + +config KFENCE_SAMPLE_INTERVAL + int "Default sample interval in milliseconds" + default 100 + help + The KFENCE sample interval determines the frequency with which heap + allocations will be guarded by KFENCE. May be overridden via boot + parameter "kfence.sample_interval". + + Set this to 0 to disable KFENCE by default, in which case only + setting "kfence.sample_interval" to a non-zero value enables KFENCE. + +config KFENCE_NUM_OBJECTS + int "Number of guarded objects available" + range 1 65535 + default 255 + help + The number of guarded objects available. For each KFENCE object, 2 + pages are required; with one containing the object and two adjacent + ones used as guard pages. + +config KFENCE_STRESS_TEST_FAULTS + int "Stress testing of fault handling and error reporting" if EXPERT + default 0 + help + The inverse probability with which to randomly protect KFENCE object + pages, resulting in spurious use-after-frees. The main purpose of + this option is to stress test KFENCE with concurrent error reports + and allocations/frees. A value of 0 disables stress testing logic. + + Only for KFENCE testing; set to 0 if you are not a KFENCE developer. + +endif # KFENCE diff --git a/mm/Makefile b/mm/Makefile index de62f2c..1672c65 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -82,6 +82,7 @@ obj-$(CONFIG_PAGE_POISONING) += page_poison.o obj-$(CONFIG_SLAB) += slab.o obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_KASAN) += kasan/ +obj-$(CONFIG_KFENCE) += kfence/ obj-$(CONFIG_FAILSLAB) += failslab.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_MEMTEST) += memtest.o diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile new file mode 100644 index 0000000..d991e9a3 --- /dev/null +++ b/mm/kfence/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_KFENCE) := core.o report.o diff --git a/mm/kfence/core.c b/mm/kfence/core.c new file mode 100644 index 0000000..64f33b9 --- /dev/null +++ b/mm/kfence/core.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "kfence: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "kfence.h" + +/* Disables KFENCE on the first warning assuming an irrecoverable error. */ +#define KFENCE_WARN_ON(cond) \ + ({ \ + const bool __cond = WARN_ON(cond); \ + if (unlikely(__cond)) \ + WRITE_ONCE(kfence_enabled, false); \ + __cond; \ + }) + +/* === Data ================================================================= */ + +static bool kfence_enabled __read_mostly; + +static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL; + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "kfence." + +static int param_set_sample_interval(const char *val, const struct kernel_param *kp) +{ + unsigned long num; + int ret = kstrtoul(val, 0, &num); + + if (ret < 0) + return ret; + + if (!num) /* Using 0 to indicate KFENCE is disabled. */ + WRITE_ONCE(kfence_enabled, false); + else if (!READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING) + return -EINVAL; /* Cannot (re-)enable KFENCE on-the-fly. */ + + *((unsigned long *)kp->arg) = num; + return 0; +} + +static int param_get_sample_interval(char *buffer, const struct kernel_param *kp) +{ + if (!READ_ONCE(kfence_enabled)) + return sprintf(buffer, "0\n"); + + return param_get_ulong(buffer, kp); +} + +static const struct kernel_param_ops sample_interval_param_ops = { + .set = param_set_sample_interval, + .get = param_get_sample_interval, +}; +module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600); + +/* The pool of pages used for guard pages and objects. */ +char *__kfence_pool __ro_after_init; +EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */ + +/* + * Per-object metadata, with one-to-one mapping of object metadata to + * backing pages (in __kfence_pool). + */ +static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0); +struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; + +/* Freelist with available objects. */ +static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist); +static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */ + +/* The static key to set up a KFENCE allocation. */ +DEFINE_STATIC_KEY_FALSE(kfence_allocation_key); + +/* Gates the allocation, ensuring only one succeeds in a given period. */ +static atomic_t allocation_gate = ATOMIC_INIT(1); + +/* Wait queue to wake up allocation-gate timer task. */ +static DECLARE_WAIT_QUEUE_HEAD(allocation_wait); + +/* Statistics counters for debugfs. */ +enum kfence_counter_id { + KFENCE_COUNTER_ALLOCATED, + KFENCE_COUNTER_ALLOCS, + KFENCE_COUNTER_FREES, + KFENCE_COUNTER_ZOMBIES, + KFENCE_COUNTER_BUGS, + KFENCE_COUNTER_COUNT, +}; +static atomic_long_t counters[KFENCE_COUNTER_COUNT]; +static const char *const counter_names[] = { + [KFENCE_COUNTER_ALLOCATED] = "currently allocated", + [KFENCE_COUNTER_ALLOCS] = "total allocations", + [KFENCE_COUNTER_FREES] = "total frees", + [KFENCE_COUNTER_ZOMBIES] = "zombie allocations", + [KFENCE_COUNTER_BUGS] = "total bugs", +}; +static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT); + +/* === Internals ============================================================ */ + +static bool kfence_protect(unsigned long addr) +{ + return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true)); +} + +static bool kfence_unprotect(unsigned long addr) +{ + return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false)); +} + +static inline struct kfence_metadata *addr_to_metadata(unsigned long addr) +{ + long index; + + /* The checks do not affect performance; only called from slow-paths. */ + + if (!is_kfence_address((void *)addr)) + return NULL; + + /* + * May be an invalid index if called with an address at the edge of + * __kfence_pool, in which case we would report an "invalid access" + * error. + */ + index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1; + if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS) + return NULL; + + return &kfence_metadata[index]; +} + +static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta) +{ + unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2; + unsigned long pageaddr = (unsigned long)&__kfence_pool[offset]; + + /* The checks do not affect performance; only called from slow-paths. */ + + /* Only call with a pointer into kfence_metadata. */ + if (KFENCE_WARN_ON(meta < kfence_metadata || + meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS)) + return 0; + + /* + * This metadata object only ever maps to 1 page; verify that the stored + * address is in the expected range. + */ + if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr)) + return 0; + + return pageaddr; +} + +/* + * Update the object's metadata state, including updating the alloc/free stacks + * depending on the state transition. + */ +static noinline void metadata_update_state(struct kfence_metadata *meta, + enum kfence_object_state next) +{ + struct kfence_track *track = + next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track; + + lockdep_assert_held(&meta->lock); + + /* + * Skip over 1 (this) functions; noinline ensures we do not accidentally + * skip over the caller by never inlining. + */ + track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1); + track->pid = task_pid_nr(current); + + /* + * Pairs with READ_ONCE() in + * kfence_shutdown_cache(), + * kfence_handle_page_fault(). + */ + WRITE_ONCE(meta->state, next); +} + +/* Write canary byte to @addr. */ +static inline bool set_canary_byte(u8 *addr) +{ + *addr = KFENCE_CANARY_PATTERN(addr); + return true; +} + +/* Check canary byte at @addr. */ +static inline bool check_canary_byte(u8 *addr) +{ + if (likely(*addr == KFENCE_CANARY_PATTERN(addr))) + return true; + + atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); + kfence_report_error((unsigned long)addr, addr_to_metadata((unsigned long)addr), + KFENCE_ERROR_CORRUPTION); + return false; +} + +/* __always_inline this to ensure we won't do an indirect call to fn. */ +static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *)) +{ + const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE); + unsigned long addr; + + lockdep_assert_held(&meta->lock); + + /* + * We'll iterate over each canary byte per-side until fn() returns + * false. However, we'll still iterate over the canary bytes to the + * right of the object even if there was an error in the canary bytes to + * the left of the object. Specifically, if check_canary_byte() + * generates an error, showing both sides might give more clues as to + * what the error is about when displaying which bytes were corrupted. + */ + + /* Apply to left of object. */ + for (addr = pageaddr; addr < meta->addr; addr++) { + if (!fn((u8 *)addr)) + break; + } + + /* Apply to right of object. */ + for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) { + if (!fn((u8 *)addr)) + break; + } +} + +static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp) +{ + struct kfence_metadata *meta = NULL; + unsigned long flags; + struct page *page; + void *addr; + + /* Try to obtain a free object. */ + raw_spin_lock_irqsave(&kfence_freelist_lock, flags); + if (!list_empty(&kfence_freelist)) { + meta = list_entry(kfence_freelist.next, struct kfence_metadata, list); + list_del_init(&meta->list); + } + raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); + if (!meta) + return NULL; + + if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) { + /* + * This is extremely unlikely -- we are reporting on a + * use-after-free, which locked meta->lock, and the reporting + * code via printk calls kmalloc() which ends up in + * kfence_alloc() and tries to grab the same object that we're + * reporting on. While it has never been observed, lockdep does + * report that there is a possibility of deadlock. Fix it by + * using trylock and bailing out gracefully. + */ + raw_spin_lock_irqsave(&kfence_freelist_lock, flags); + /* Put the object back on the freelist. */ + list_add_tail(&meta->list, &kfence_freelist); + raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); + + return NULL; + } + + meta->addr = metadata_to_pageaddr(meta); + /* Unprotect if we're reusing this page. */ + if (meta->state == KFENCE_OBJECT_FREED) + kfence_unprotect(meta->addr); + + /* + * Note: for allocations made before RNG initialization, will always + * return zero. We still benefit from enabling KFENCE as early as + * possible, even when the RNG is not yet available, as this will allow + * KFENCE to detect bugs due to earlier allocations. The only downside + * is that the out-of-bounds accesses detected are deterministic for + * such allocations. + */ + if (prandom_u32_max(2)) { + /* Allocate on the "right" side, re-calculate address. */ + meta->addr += PAGE_SIZE - size; + meta->addr = ALIGN_DOWN(meta->addr, cache->align); + } + + addr = (void *)meta->addr; + + /* Update remaining metadata. */ + metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED); + /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */ + WRITE_ONCE(meta->cache, cache); + meta->size = size; + for_each_canary(meta, set_canary_byte); + + /* Set required struct page fields. */ + page = virt_to_page(meta->addr); + page->slab_cache = cache; + + raw_spin_unlock_irqrestore(&meta->lock, flags); + + /* Memory initialization. */ + + /* + * We check slab_want_init_on_alloc() ourselves, rather than letting + * SL*B do the initialization, as otherwise we might overwrite KFENCE's + * redzone. + */ + if (unlikely(slab_want_init_on_alloc(gfp, cache))) + memzero_explicit(addr, size); + if (cache->ctor) + cache->ctor(addr); + + if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS)) + kfence_protect(meta->addr); /* Random "faults" by protecting the object. */ + + atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]); + atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]); + + return addr; +} + +static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie) +{ + struct kcsan_scoped_access assert_page_exclusive; + unsigned long flags; + + raw_spin_lock_irqsave(&meta->lock, flags); + + if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) { + /* Invalid or double-free, bail out. */ + atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); + kfence_report_error((unsigned long)addr, meta, KFENCE_ERROR_INVALID_FREE); + raw_spin_unlock_irqrestore(&meta->lock, flags); + return; + } + + /* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */ + kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE, + KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, + &assert_page_exclusive); + + if (CONFIG_KFENCE_STRESS_TEST_FAULTS) + kfence_unprotect((unsigned long)addr); /* To check canary bytes. */ + + /* Restore page protection if there was an OOB access. */ + if (meta->unprotected_page) { + kfence_protect(meta->unprotected_page); + meta->unprotected_page = 0; + } + + /* Check canary bytes for memory corruption. */ + for_each_canary(meta, check_canary_byte); + + /* + * Clear memory if init-on-free is set. While we protect the page, the + * data is still there, and after a use-after-free is detected, we + * unprotect the page, so the data is still accessible. + */ + if (!zombie && unlikely(slab_want_init_on_free(meta->cache))) + memzero_explicit(addr, meta->size); + + /* Mark the object as freed. */ + metadata_update_state(meta, KFENCE_OBJECT_FREED); + + raw_spin_unlock_irqrestore(&meta->lock, flags); + + /* Protect to detect use-after-frees. */ + kfence_protect((unsigned long)addr); + + kcsan_end_scoped_access(&assert_page_exclusive); + if (!zombie) { + /* Add it to the tail of the freelist for reuse. */ + raw_spin_lock_irqsave(&kfence_freelist_lock, flags); + KFENCE_WARN_ON(!list_empty(&meta->list)); + list_add_tail(&meta->list, &kfence_freelist); + raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags); + + atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]); + atomic_long_inc(&counters[KFENCE_COUNTER_FREES]); + } else { + /* See kfence_shutdown_cache(). */ + atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]); + } +} + +static void rcu_guarded_free(struct rcu_head *h) +{ + struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head); + + kfence_guarded_free((void *)meta->addr, meta, false); +} + +static bool __init kfence_init_pool(void) +{ + unsigned long addr = (unsigned long)__kfence_pool; + struct page *pages; + int i; + + if (!__kfence_pool) + return false; + + if (!arch_kfence_init_pool()) + goto err; + + pages = virt_to_page(addr); + + /* + * Set up object pages: they must have PG_slab set, to avoid freeing + * these as real pages. + * + * We also want to avoid inserting kfence_free() in the kfree() + * fast-path in SLUB, and therefore need to ensure kfree() correctly + * enters __slab_free() slow-path. + */ + for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) { + if (!i || (i % 2)) + continue; + + /* Verify we do not have a compound head page. */ + if (WARN_ON(compound_head(&pages[i]) != &pages[i])) + goto err; + + __SetPageSlab(&pages[i]); + } + + /* + * Protect the first 2 pages. The first page is mostly unnecessary, and + * merely serves as an extended guard page. However, adding one + * additional page in the beginning gives us an even number of pages, + * which simplifies the mapping of address to metadata index. + */ + for (i = 0; i < 2; i++) { + if (unlikely(!kfence_protect(addr))) + goto err; + + addr += PAGE_SIZE; + } + + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + struct kfence_metadata *meta = &kfence_metadata[i]; + + /* Initialize metadata. */ + INIT_LIST_HEAD(&meta->list); + raw_spin_lock_init(&meta->lock); + meta->state = KFENCE_OBJECT_UNUSED; + meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */ + list_add_tail(&meta->list, &kfence_freelist); + + /* Protect the right redzone. */ + if (unlikely(!kfence_protect(addr + PAGE_SIZE))) + goto err; + + addr += 2 * PAGE_SIZE; + } + + return true; + +err: + /* + * Only release unprotected pages, and do not try to go back and change + * page attributes due to risk of failing to do so as well. If changing + * page attributes for some pages fails, it is very likely that it also + * fails for the first page, and therefore expect addr==__kfence_pool in + * most failure cases. + */ + memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool)); + __kfence_pool = NULL; + return false; +} + +/* === DebugFS Interface ==================================================== */ + +static int stats_show(struct seq_file *seq, void *v) +{ + int i; + + seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled)); + for (i = 0; i < KFENCE_COUNTER_COUNT; i++) + seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i])); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(stats); + +/* + * debugfs seq_file operations for /sys/kernel/debug/kfence/objects. + * start_object() and next_object() return the object index + 1, because NULL is used + * to stop iteration. + */ +static void *start_object(struct seq_file *seq, loff_t *pos) +{ + if (*pos < CONFIG_KFENCE_NUM_OBJECTS) + return (void *)((long)*pos + 1); + return NULL; +} + +static void stop_object(struct seq_file *seq, void *v) +{ +} + +static void *next_object(struct seq_file *seq, void *v, loff_t *pos) +{ + ++*pos; + if (*pos < CONFIG_KFENCE_NUM_OBJECTS) + return (void *)((long)*pos + 1); + return NULL; +} + +static int show_object(struct seq_file *seq, void *v) +{ + struct kfence_metadata *meta = &kfence_metadata[(long)v - 1]; + unsigned long flags; + + raw_spin_lock_irqsave(&meta->lock, flags); + kfence_print_object(seq, meta); + raw_spin_unlock_irqrestore(&meta->lock, flags); + seq_puts(seq, "---------------------------------\n"); + + return 0; +} + +static const struct seq_operations object_seqops = { + .start = start_object, + .next = next_object, + .stop = stop_object, + .show = show_object, +}; + +static int open_objects(struct inode *inode, struct file *file) +{ + return seq_open(file, &object_seqops); +} + +static const struct file_operations objects_fops = { + .open = open_objects, + .read = seq_read, + .llseek = seq_lseek, +}; + +static int __init kfence_debugfs_init(void) +{ + struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL); + + debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops); + debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops); + return 0; +} + +late_initcall(kfence_debugfs_init); + +/* === Allocation Gate Timer ================================================ */ + +/* + * Set up delayed work, which will enable and disable the static key. We need to + * use a work queue (rather than a simple timer), since enabling and disabling a + * static key cannot be done from an interrupt. + * + * Note: Toggling a static branch currently causes IPIs, and here we'll end up + * with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with + * more aggressive sampling intervals), we could get away with a variant that + * avoids IPIs, at the cost of not immediately capturing allocations if the + * instructions remain cached. + */ +static struct delayed_work kfence_timer; +static void toggle_allocation_gate(struct work_struct *work) +{ + if (!READ_ONCE(kfence_enabled)) + return; + + /* Enable static key, and await allocation to happen. */ + atomic_set(&allocation_gate, 0); + static_branch_enable(&kfence_allocation_key); + wait_event(allocation_wait, atomic_read(&allocation_gate) != 0); + + /* Disable static key and reset timer. */ + static_branch_disable(&kfence_allocation_key); + schedule_delayed_work(&kfence_timer, msecs_to_jiffies(kfence_sample_interval)); +} +static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate); + +/* === Public interface ===================================================== */ + +void __init kfence_alloc_pool(void) +{ + if (!kfence_sample_interval) + return; + + __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE); + + if (!__kfence_pool) + pr_err("failed to allocate pool\n"); +} + +void __init kfence_init(void) +{ + /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */ + if (!kfence_sample_interval) + return; + + if (!kfence_init_pool()) { + pr_err("%s failed\n", __func__); + return; + } + + WRITE_ONCE(kfence_enabled, true); + schedule_delayed_work(&kfence_timer, 0); + pr_info("initialized - using %lu bytes for %d objects", KFENCE_POOL_SIZE, + CONFIG_KFENCE_NUM_OBJECTS); + if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) + pr_cont(" at 0x%px-0x%px\n", (void *)__kfence_pool, + (void *)(__kfence_pool + KFENCE_POOL_SIZE)); + else + pr_cont("\n"); +} + +void kfence_shutdown_cache(struct kmem_cache *s) +{ + unsigned long flags; + struct kfence_metadata *meta; + int i; + + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + bool in_use; + + meta = &kfence_metadata[i]; + + /* + * If we observe some inconsistent cache and state pair where we + * should have returned false here, cache destruction is racing + * with either kmem_cache_alloc() or kmem_cache_free(). Taking + * the lock will not help, as different critical section + * serialization will have the same outcome. + */ + if (READ_ONCE(meta->cache) != s || + READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED) + continue; + + raw_spin_lock_irqsave(&meta->lock, flags); + in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED; + raw_spin_unlock_irqrestore(&meta->lock, flags); + + if (in_use) { + /* + * This cache still has allocations, and we should not + * release them back into the freelist so they can still + * safely be used and retain the kernel's default + * behaviour of keeping the allocations alive (leak the + * cache); however, they effectively become "zombie + * allocations" as the KFENCE objects are the only ones + * still in use and the owning cache is being destroyed. + * + * We mark them freed, so that any subsequent use shows + * more useful error messages that will include stack + * traces of the user of the object, the original + * allocation, and caller to shutdown_cache(). + */ + kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true); + } + } + + for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) { + meta = &kfence_metadata[i]; + + /* See above. */ + if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED) + continue; + + raw_spin_lock_irqsave(&meta->lock, flags); + if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED) + meta->cache = NULL; + raw_spin_unlock_irqrestore(&meta->lock, flags); + } +} + +void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) +{ + /* + * allocation_gate only needs to become non-zero, so it doesn't make + * sense to continue writing to it and pay the associated contention + * cost, in case we have a large number of concurrent allocations. + */ + if (atomic_read(&allocation_gate) || atomic_inc_return(&allocation_gate) > 1) + return NULL; + wake_up(&allocation_wait); + + if (!READ_ONCE(kfence_enabled)) + return NULL; + + if (size > PAGE_SIZE) + return NULL; + + return kfence_guarded_alloc(s, size, flags); +} + +size_t kfence_ksize(const void *addr) +{ + const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); + + /* + * Read locklessly -- if there is a race with __kfence_alloc(), this is + * either a use-after-free or invalid access. + */ + return meta ? meta->size : 0; +} + +void *kfence_object_start(const void *addr) +{ + const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); + + /* + * Read locklessly -- if there is a race with __kfence_alloc(), this is + * either a use-after-free or invalid access. + */ + return meta ? (void *)meta->addr : NULL; +} + +void __kfence_free(void *addr) +{ + struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr); + + /* + * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing + * the object, as the object page may be recycled for other-typed + * objects once it has been freed. meta->cache may be NULL if the cache + * was destroyed. + */ + if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU))) + call_rcu(&meta->rcu_head, rcu_guarded_free); + else + kfence_guarded_free(addr, meta, false); +} + +bool kfence_handle_page_fault(unsigned long addr) +{ + const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE; + struct kfence_metadata *to_report = NULL; + enum kfence_error_type error_type; + unsigned long flags; + + if (!is_kfence_address((void *)addr)) + return false; + + if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */ + return kfence_unprotect(addr); /* ... unprotect and proceed. */ + + atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]); + + if (page_index % 2) { + /* This is a redzone, report a buffer overflow. */ + struct kfence_metadata *meta; + int distance = 0; + + meta = addr_to_metadata(addr - PAGE_SIZE); + if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { + to_report = meta; + /* Data race ok; distance calculation approximate. */ + distance = addr - data_race(meta->addr + meta->size); + } + + meta = addr_to_metadata(addr + PAGE_SIZE); + if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) { + /* Data race ok; distance calculation approximate. */ + if (!to_report || distance > data_race(meta->addr) - addr) + to_report = meta; + } + + if (!to_report) + goto out; + + raw_spin_lock_irqsave(&to_report->lock, flags); + to_report->unprotected_page = addr; + error_type = KFENCE_ERROR_OOB; + + /* + * If the object was freed before we took the look we can still + * report this as an OOB -- the report will simply show the + * stacktrace of the free as well. + */ + } else { + to_report = addr_to_metadata(addr); + if (!to_report) + goto out; + + raw_spin_lock_irqsave(&to_report->lock, flags); + error_type = KFENCE_ERROR_UAF; + /* + * We may race with __kfence_alloc(), and it is possible that a + * freed object may be reallocated. We simply report this as a + * use-after-free, with the stack trace showing the place where + * the object was re-allocated. + */ + } + +out: + if (to_report) { + kfence_report_error(addr, to_report, error_type); + raw_spin_unlock_irqrestore(&to_report->lock, flags); + } else { + /* This may be a UAF or OOB access, but we can't be sure. */ + kfence_report_error(addr, NULL, KFENCE_ERROR_INVALID); + } + + return kfence_unprotect(addr); /* Unprotect and let access proceed. */ +} diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h new file mode 100644 index 0000000..f115aab --- /dev/null +++ b/mm/kfence/kfence.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef MM_KFENCE_KFENCE_H +#define MM_KFENCE_KFENCE_H + +#include +#include +#include +#include + +#include "../slab.h" /* for struct kmem_cache */ + +/* For non-debug builds, avoid leaking kernel pointers into dmesg. */ +#ifdef CONFIG_DEBUG_KERNEL +#define PTR_FMT "%px" +#else +#define PTR_FMT "%p" +#endif + +/* + * Get the canary byte pattern for @addr. Use a pattern that varies based on the + * lower 3 bits of the address, to detect memory corruptions with higher + * probability, where similar constants are used. + */ +#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7)) + +/* Maximum stack depth for reports. */ +#define KFENCE_STACK_DEPTH 64 + +/* KFENCE object states. */ +enum kfence_object_state { + KFENCE_OBJECT_UNUSED, /* Object is unused. */ + KFENCE_OBJECT_ALLOCATED, /* Object is currently allocated. */ + KFENCE_OBJECT_FREED, /* Object was allocated, and then freed. */ +}; + +/* Alloc/free tracking information. */ +struct kfence_track { + pid_t pid; + int num_stack_entries; + unsigned long stack_entries[KFENCE_STACK_DEPTH]; +}; + +/* KFENCE metadata per guarded allocation. */ +struct kfence_metadata { + struct list_head list; /* Freelist node; access under kfence_freelist_lock. */ + struct rcu_head rcu_head; /* For delayed freeing. */ + + /* + * Lock protecting below data; to ensure consistency of the below data, + * since the following may execute concurrently: __kfence_alloc(), + * __kfence_free(), kfence_handle_page_fault(). However, note that we + * cannot grab the same metadata off the freelist twice, and multiple + * __kfence_alloc() cannot run concurrently on the same metadata. + */ + raw_spinlock_t lock; + + /* The current state of the object; see above. */ + enum kfence_object_state state; + + /* + * Allocated object address; cannot be calculated from size, because of + * alignment requirements. + * + * Invariant: ALIGN_DOWN(addr, PAGE_SIZE) is constant. + */ + unsigned long addr; + + /* + * The size of the original allocation. + */ + size_t size; + + /* + * The kmem_cache cache of the last allocation; NULL if never allocated + * or the cache has already been destroyed. + */ + struct kmem_cache *cache; + + /* + * In case of an invalid access, the page that was unprotected; we + * optimistically only store one address. + */ + unsigned long unprotected_page; + + /* Allocation and free stack information. */ + struct kfence_track alloc_track; + struct kfence_track free_track; +}; + +extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS]; + +/* KFENCE error types for report generation. */ +enum kfence_error_type { + KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */ + KFENCE_ERROR_UAF, /* Detected a use-after-free access. */ + KFENCE_ERROR_CORRUPTION, /* Detected a memory corruption on free. */ + KFENCE_ERROR_INVALID, /* Invalid access of unknown type. */ + KFENCE_ERROR_INVALID_FREE, /* Invalid free. */ +}; + +void kfence_report_error(unsigned long address, const struct kfence_metadata *meta, + enum kfence_error_type type); + +void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta); + +#endif /* MM_KFENCE_KFENCE_H */ diff --git a/mm/kfence/report.c b/mm/kfence/report.c new file mode 100644 index 0000000..0fdaa3d --- /dev/null +++ b/mm/kfence/report.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include "kfence.h" + +/* Helper function to either print to a seq_file or to console. */ +__printf(2, 3) +static void seq_con_printf(struct seq_file *seq, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + if (seq) + seq_vprintf(seq, fmt, args); + else + vprintk(fmt, args); + va_end(args); +} + +/* + * Get the number of stack entries to skip to get out of MM internals. @type is + * optional, and if set to NULL, assumes an allocation or free stack. + */ +static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries, + const enum kfence_error_type *type) +{ + char buf[64]; + int skipnr, fallback = 0; + bool is_access_fault = false; + + if (type) { + /* Depending on error type, find different stack entries. */ + switch (*type) { + case KFENCE_ERROR_UAF: + case KFENCE_ERROR_OOB: + case KFENCE_ERROR_INVALID: + is_access_fault = true; + break; + case KFENCE_ERROR_CORRUPTION: + case KFENCE_ERROR_INVALID_FREE: + break; + } + } + + for (skipnr = 0; skipnr < num_entries; skipnr++) { + int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]); + + if (is_access_fault) { + if (!strncmp(buf, KFENCE_SKIP_ARCH_FAULT_HANDLER, len)) + goto found; + } else { + if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") || + !strncmp(buf, "__slab_free", len)) { + /* + * In case of tail calls from any of the below + * to any of the above. + */ + fallback = skipnr + 1; + } + + /* Also the *_bulk() variants by only checking prefixes. */ + if (str_has_prefix(buf, "kfree") || + str_has_prefix(buf, "kmem_cache_free") || + str_has_prefix(buf, "__kmalloc") || + str_has_prefix(buf, "kmem_cache_alloc")) + goto found; + } + } + if (fallback < num_entries) + return fallback; +found: + skipnr++; + return skipnr < num_entries ? skipnr : 0; +} + +static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadata *meta, + bool show_alloc) +{ + const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track; + + if (track->num_stack_entries) { + /* Skip allocation/free internals stack. */ + int i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL); + + /* stack_trace_seq_print() does not exist; open code our own. */ + for (; i < track->num_stack_entries; i++) + seq_con_printf(seq, " %pS\n", (void *)track->stack_entries[i]); + } else { + seq_con_printf(seq, " no %s stack\n", show_alloc ? "allocation" : "deallocation"); + } +} + +void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta) +{ + const int size = abs(meta->size); + const unsigned long start = meta->addr; + const struct kmem_cache *const cache = meta->cache; + + lockdep_assert_held(&meta->lock); + + if (meta->state == KFENCE_OBJECT_UNUSED) { + seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata); + return; + } + + seq_con_printf(seq, + "kfence-#%zd [0x" PTR_FMT "-0x" PTR_FMT + ", size=%d, cache=%s] allocated by task %d:\n", + meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size, + (cache && cache->name) ? cache->name : "", meta->alloc_track.pid); + kfence_print_stack(seq, meta, true); + + if (meta->state == KFENCE_OBJECT_FREED) { + seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid); + kfence_print_stack(seq, meta, false); + } +} + +/* + * Show bytes at @addr that are different from the expected canary values, up to + * @max_bytes. + */ +static void print_diff_canary(unsigned long address, size_t bytes_to_show, + const struct kfence_metadata *meta) +{ + const unsigned long show_until_addr = address + bytes_to_show; + const u8 *cur, *end; + + /* Do not show contents of object nor read into following guard page. */ + end = (const u8 *)(address < meta->addr ? min(show_until_addr, meta->addr) + : min(show_until_addr, PAGE_ALIGN(address))); + + pr_cont("["); + for (cur = (const u8 *)address; cur < end; cur++) { + if (*cur == KFENCE_CANARY_PATTERN(cur)) + pr_cont(" ."); + else if (IS_ENABLED(CONFIG_DEBUG_KERNEL)) + pr_cont(" 0x%02x", *cur); + else /* Do not leak kernel memory in non-debug builds. */ + pr_cont(" !"); + } + pr_cont(" ]"); +} + +void kfence_report_error(unsigned long address, const struct kfence_metadata *meta, + enum kfence_error_type type) +{ + unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 }; + int num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1); + int skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type); + const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1; + + /* Require non-NULL meta, except if KFENCE_ERROR_INVALID. */ + if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta)) + return; + + if (meta) + lockdep_assert_held(&meta->lock); + /* + * Because we may generate reports in printk-unfriendly parts of the + * kernel, such as scheduler code, the use of printk() could deadlock. + * Until such time that all printing code here is safe in all parts of + * the kernel, accept the risk, and just get our message out (given the + * system might already behave unpredictably due to the memory error). + * As such, also disable lockdep to hide warnings, and avoid disabling + * lockdep for the rest of the kernel. + */ + lockdep_off(); + + pr_err("==================================================================\n"); + /* Print report header. */ + switch (type) { + case KFENCE_ERROR_OOB: { + const bool left_of_object = address < meta->addr; + + pr_err("BUG: KFENCE: out-of-bounds in %pS\n\n", (void *)stack_entries[skipnr]); + pr_err("Out-of-bounds access at 0x" PTR_FMT " (%luB %s of kfence-#%zd):\n", + (void *)address, + left_of_object ? meta->addr - address : address - meta->addr, + left_of_object ? "left" : "right", object_index); + break; + } + case KFENCE_ERROR_UAF: + pr_err("BUG: KFENCE: use-after-free in %pS\n\n", (void *)stack_entries[skipnr]); + pr_err("Use-after-free access at 0x" PTR_FMT " (in kfence-#%zd):\n", + (void *)address, object_index); + break; + case KFENCE_ERROR_CORRUPTION: + pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]); + pr_err("Corrupted memory at 0x" PTR_FMT " ", (void *)address); + print_diff_canary(address, 16, meta); + pr_cont(" (in kfence-#%zd):\n", object_index); + break; + case KFENCE_ERROR_INVALID: + pr_err("BUG: KFENCE: invalid access in %pS\n\n", (void *)stack_entries[skipnr]); + pr_err("Invalid access at 0x" PTR_FMT ":\n", (void *)address); + break; + case KFENCE_ERROR_INVALID_FREE: + pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]); + pr_err("Invalid free of 0x" PTR_FMT " (in kfence-#%zd):\n", (void *)address, + object_index); + break; + } + + /* Print stack trace and object info. */ + stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0); + + if (meta) { + pr_err("\n"); + kfence_print_object(NULL, meta); + } + + /* Print report footer. */ + pr_err("\n"); + dump_stack_print_info(KERN_ERR); + pr_err("==================================================================\n"); + + lockdep_on(); + + if (panic_on_warn) + panic("panic_on_warn set ...\n"); + + /* We encountered a memory unsafety error, taint the kernel! */ + add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK); +} -- 2.7.4 From 3992c7ab984555a01127e9ca48962b124436bd3e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 3 Nov 2020 18:58:34 +0100 Subject: [PATCH 13/16] x86, kfence: enable KFENCE for x86 Add architecture specific implementation details for KFENCE and enable KFENCE for the x86 architecture. In particular, this implements the required interface in for setting up the pool and providing helper functions for protecting and unprotecting pages. For x86, we need to ensure that the pool uses 4K pages, which is done using the set_memory_4k() helper function. Reviewed-by: Dmitry Vyukov Co-developed-by: Marco Elver Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko Reviewed-by: Jann Horn [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Idfebe6e03d9bda5de4bd6d0084f65bd0feda8d8a --- arch/x86/Kconfig | 1 + arch/x86/include/asm/kfence.h | 65 +++++++++++++++++++++++++++++++++++++++++++ arch/x86/mm/fault.c | 5 ++++ 3 files changed, 71 insertions(+) create mode 100644 arch/x86/include/asm/kfence.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 38583e9..ce70abb 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -148,6 +148,7 @@ config X86 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if X86_64 select HAVE_ARCH_KASAN_VMALLOC if X86_64 + select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h new file mode 100644 index 0000000..beeac10 --- /dev/null +++ b/arch/x86/include/asm/kfence.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_X86_KFENCE_H +#define _ASM_X86_KFENCE_H + +#include +#include + +#include +#include +#include +#include + +/* + * The page fault handler entry function, up to which the stack trace is + * truncated in reports. + */ +#define KFENCE_SKIP_ARCH_FAULT_HANDLER "asm_exc_page_fault" + +/* Force 4K pages for __kfence_pool. */ +static inline bool arch_kfence_init_pool(void) +{ + unsigned long addr; + + for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr); + addr += PAGE_SIZE) { + unsigned int level; + + if (!lookup_address(addr, &level)) + return false; + + if (level != PG_LEVEL_4K) + set_memory_4k(addr, 1); + } + + return true; +} + +/* Protect the given page and flush TLB. */ +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + unsigned int level; + pte_t *pte = lookup_address(addr, &level); + + if (WARN_ON(!pte || level != PG_LEVEL_4K)) + return false; + + /* + * We need to avoid IPIs, as we may get KFENCE allocations or faults + * with interrupts disabled. Therefore, the below is best-effort, and + * does not flush TLBs on all CPUs. We can tolerate some inaccuracy; + * lazy fault handling takes care of faults after the page is PRESENT. + */ + + if (protect) + set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); + else + set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); + + /* Flush this CPU's TLB. */ + flush_tlb_one_kernel(addr); + return true; +} + +#endif /* _ASM_X86_KFENCE_H */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9c1545c..c64e44f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -9,6 +9,7 @@ #include /* oops_begin/end, ... */ #include /* search_exception_tables */ #include /* max_low_pfn */ +#include /* kfence_handle_page_fault */ #include /* NOKPROBE_SYMBOL, ... */ #include /* kmmio_handler, ... */ #include /* perf_sw_event */ @@ -732,6 +733,10 @@ no_context(struct pt_regs *regs, unsigned long error_code, if (IS_ENABLED(CONFIG_EFI)) efi_recover_from_page_fault(address); + /* Only not-present faults should be handled by KFENCE. */ + if (!(error_code & X86_PF_PROT) && kfence_handle_page_fault(address)) + return; + oops: /* * Oops. The kernel tried to access some bad page. We'll have to -- 2.7.4 From 3b586d13502a89a53ff34752eb785e456657fe73 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 3 Nov 2020 18:58:35 +0100 Subject: [PATCH 14/16] arm64, kfence: enable KFENCE for ARM64 Add architecture specific implementation details for KFENCE and enable KFENCE for the arm64 architecture. In particular, this implements the required interface in . KFENCE requires that attributes for pages from its memory pool can individually be set. Therefore, force the entire linear map to be mapped at page granularity. Doing so may result in extra memory allocated for page tables in case rodata=full is not set; however, currently CONFIG_RODATA_FULL_DEFAULT_ENABLED=y is the default, and the common case is therefore not affected by this change. Reviewed-by: Dmitry Vyukov Co-developed-by: Alexander Potapenko Signed-off-by: Alexander Potapenko Signed-off-by: Marco Elver Reviewed-by: Jann Horn Reviewed-by: Mark Rutland [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I1cb00f8faea0324b850f4c5879a51b1721bc155f --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/kfence.h | 19 +++++++++++++++++++ arch/arm64/mm/fault.c | 4 ++++ arch/arm64/mm/mmu.c | 8 ++++++-- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/kfence.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3ba7b56..29874f2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -137,6 +137,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN + select HAVE_ARCH_KFENCE select HAVE_ARCH_KGDB select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h new file mode 100644 index 0000000..5ac0f59 --- /dev/null +++ b/arch/arm64/include/asm/kfence.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_KFENCE_H +#define __ASM_KFENCE_H + +#include + +#define KFENCE_SKIP_ARCH_FAULT_HANDLER "el1_sync" + +static inline bool arch_kfence_init_pool(void) { return true; } + +static inline bool kfence_protect_page(unsigned long addr, bool protect) +{ + set_memory_valid(addr, 1, !protect); + + return true; +} + +#endif /* __ASM_KFENCE_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 795d224..d1a8b52 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -322,6 +323,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; } else { + if (kfence_handle_page_fault(addr)) + return; + msg = "paging request"; } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c68e444..22098c5 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1493,8 +1493,12 @@ int arch_add_memory(int nid, u64 start, u64 size, { int ret, flags = 0; - VM_BUG_ON(!mhp_range_allowed(start, size, true)); - if (rodata_full || debug_pagealloc_enabled()) + /* + * KFENCE requires linear map to be mapped at page granularity, so that + * it is possible to protect/unprotect single pages in the KFENCE pool. + */ + if (rodata_full || debug_pagealloc_enabled() || + IS_ENABLED(CONFIG_KFENCE)) flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start), -- 2.7.4 From 80ec216383c2b697255eed5cc06ec8bd9c739604 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 3 Nov 2020 18:58:36 +0100 Subject: [PATCH 15/16] mm, kfence: insert KFENCE hooks for SLAB Inserts KFENCE hooks into the SLAB allocator. To pass the originally requested size to KFENCE, add an argument 'orig_size' to slab_alloc*(). The additional argument is required to preserve the requested original size for kmalloc() allocations, which uses size classes (e.g. an allocation of 272 bytes will return an object of size 512). Therefore, kmem_cache::size does not represent the kmalloc-caller's requested size, and we must introduce the argument 'orig_size' to propagate the originally requested size to KFENCE. Without the originally requested size, we would not be able to detect out-of-bounds accesses for objects placed at the end of a KFENCE object page if that object is not equal to the kmalloc-size class it was bucketed into. When KFENCE is disabled, there is no additional overhead, since slab_alloc*() functions are __always_inline. Reviewed-by: Dmitry Vyukov Co-developed-by: Marco Elver Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: I3a5660422a01623a21468f3ff62d506725fa11ed --- include/linux/slab_def.h | 3 +++ mm/kfence/core.c | 2 ++ mm/slab.c | 38 +++++++++++++++++++++++++++++--------- mm/slab_common.c | 5 ++++- 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 9eb430c16..3aa5e1e 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -2,6 +2,7 @@ #ifndef _LINUX_SLAB_DEF_H #define _LINUX_SLAB_DEF_H +#include #include /* @@ -114,6 +115,8 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache, static inline int objs_per_slab_page(const struct kmem_cache *cache, const struct page *page) { + if (is_kfence_address(page_address(page))) + return 1; return cache->num; } diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 64f33b9..721fd63 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -313,6 +313,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g /* Set required struct page fields. */ page = virt_to_page(meta->addr); page->slab_cache = cache; + if (IS_ENABLED(CONFIG_SLAB)) + page->s_mem = addr; raw_spin_unlock_irqrestore(&meta->lock, flags); diff --git a/mm/slab.c b/mm/slab.c index 677c065..88e54dd 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -100,6 +100,7 @@ #include #include #include +#include #include #include #include @@ -3207,7 +3208,7 @@ must_grow: } static __always_inline void * -slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, +slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size, unsigned long caller) { unsigned long save_flags; @@ -3220,6 +3221,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, if (unlikely(!cachep)) return NULL; + ptr = kfence_alloc(cachep, orig_size, flags); + if (unlikely(ptr)) + goto out_hooks; + cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); @@ -3252,6 +3257,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr) memset(ptr, 0, cachep->object_size); +out_hooks: slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr); return ptr; } @@ -3289,7 +3295,7 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags) #endif /* CONFIG_NUMA */ static __always_inline void * -slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) +slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller) { unsigned long save_flags; void *objp; @@ -3300,6 +3306,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) if (unlikely(!cachep)) return NULL; + objp = kfence_alloc(cachep, orig_size, flags); + if (unlikely(objp)) + goto out; + cache_alloc_debugcheck_before(cachep, flags); local_irq_save(save_flags); objp = __do_cache_alloc(cachep, flags); @@ -3310,6 +3320,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp) memset(objp, 0, cachep->object_size); +out: slab_post_alloc_hook(cachep, objcg, flags, 1, &objp); return objp; } @@ -3415,6 +3426,12 @@ free_done: static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp, unsigned long caller) { + if (is_kfence_address(objp)) { + kmemleak_free_recursive(objp, cachep->flags); + __kfence_free(objp); + return; + } + /* Put the object into the quarantine, don't touch it for now. */ if (kasan_slab_free(cachep, objp, _RET_IP_)) return; @@ -3480,7 +3497,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp, */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { - void *ret = slab_alloc(cachep, flags, _RET_IP_); + void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_); trace_kmem_cache_alloc(_RET_IP_, ret, cachep->object_size, cachep->size, flags); @@ -3513,7 +3530,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, local_irq_disable(); for (i = 0; i < size; i++) { - void *objp = __do_cache_alloc(s, flags); + void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags); if (unlikely(!objp)) goto error; @@ -3546,7 +3563,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size) { void *ret; - ret = slab_alloc(cachep, flags, _RET_IP_); + ret = slab_alloc(cachep, flags, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc(_RET_IP_, ret, @@ -3572,7 +3589,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace); */ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) { - void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); + void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_); trace_kmem_cache_alloc_node(_RET_IP_, ret, cachep->object_size, cachep->size, @@ -3590,7 +3607,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep, { void *ret; - ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_); + ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc_node(_RET_IP_, ret, @@ -3651,7 +3668,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; - ret = slab_alloc(cachep, flags, caller); + ret = slab_alloc(cachep, flags, size, caller); ret = kasan_kmalloc(cachep, ret, size, flags); trace_kmalloc(caller, ret, @@ -4150,7 +4167,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, BUG_ON(objnr >= cachep->num); /* Find offset within object. */ - offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); + if (is_kfence_address(ptr)) + offset = ptr - kfence_object_start(ptr); + else + offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep); /* Allow address range falling entirely within usercopy region. */ if (offset >= cachep->useroffset && diff --git a/mm/slab_common.c b/mm/slab_common.c index ec83290..79c86cc 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -434,6 +435,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) rcu_barrier(); list_for_each_entry_safe(s, s2, &to_destroy, list) { + kfence_shutdown_cache(s); #ifdef SLAB_SUPPORTS_SYSFS sysfs_slab_release(s); #else @@ -459,6 +461,7 @@ static int shutdown_cache(struct kmem_cache *s) list_add_tail(&s->list, &slab_caches_to_rcu_destroy); schedule_work(&slab_caches_to_rcu_destroy_work); } else { + kfence_shutdown_cache(s); #ifdef SLAB_SUPPORTS_SYSFS sysfs_slab_unlink(s); sysfs_slab_release(s); @@ -1170,7 +1173,7 @@ size_t ksize(const void *objp) if (unlikely(ZERO_OR_NULL_PTR(objp)) || !__kasan_check_read(objp, 1)) return 0; - size = __ksize(objp); + size = kfence_ksize(objp) ?: __ksize(objp); /* * We assume that ksize callers could use whole allocated area, * so we need to unpoison this area. -- 2.7.4 From dc53d80a4c1a01a02b1e2d100d72fed4f6df7157 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Tue, 3 Nov 2020 18:58:37 +0100 Subject: [PATCH 16/16] mm, kfence: insert KFENCE hooks for SLUB Inserts KFENCE hooks into the SLUB allocator. To pass the originally requested size to KFENCE, add an argument 'orig_size' to slab_alloc*(). The additional argument is required to preserve the requested original size for kmalloc() allocations, which uses size classes (e.g. an allocation of 272 bytes will return an object of size 512). Therefore, kmem_cache::size does not represent the kmalloc-caller's requested size, and we must introduce the argument 'orig_size' to propagate the originally requested size to KFENCE. Without the originally requested size, we would not be able to detect out-of-bounds accesses for objects placed at the end of a KFENCE object page if that object is not equal to the kmalloc-size class it was bucketed into. When KFENCE is disabled, there is no additional overhead, since slab_alloc*() functions are __always_inline. Reviewed-by: Dmitry Vyukov Reviewed-by: Jann Horn Co-developed-by: Marco Elver Signed-off-by: Marco Elver Signed-off-by: Alexander Potapenko [port kfence feature to rpi-5.10.95] Signed-off-by: Sung-hun Kim Signed-off-by: Marek Szyprowski Signed-off-by: Seung-Woo Kim Change-Id: Id2b0d64b10cc68d176c935dea3b7135bf2190d1f --- include/linux/slub_def.h | 3 +++ mm/kfence/core.c | 2 ++ mm/slub.c | 60 +++++++++++++++++++++++++++++++++++++----------- 3 files changed, 51 insertions(+), 14 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 1be0ed5..dcde82a 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -7,6 +7,7 @@ * * (C) 2007 SGI, Christoph Lameter */ +#include #include #include @@ -185,6 +186,8 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache, static inline unsigned int obj_to_index(const struct kmem_cache *cache, const struct page *page, void *obj) { + if (is_kfence_address(obj)) + return 0; return __obj_to_index(cache, page_address(page), obj); } diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 721fd63..9d59701 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -313,6 +313,8 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g /* Set required struct page fields. */ page = virt_to_page(meta->addr); page->slab_cache = cache; + if (IS_ENABLED(CONFIG_SLUB)) + page->objects = 1; if (IS_ENABLED(CONFIG_SLAB)) page->s_mem = addr; diff --git a/mm/slub.c b/mm/slub.c index d90d8f9f..f2ae296 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1558,6 +1559,11 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, void *old_tail = *tail ? *tail : *head; int rsize; + if (is_kfence_address(next)) { + slab_free_hook(s, next); + return true; + } + /* Head and tail of the reconstructed freelist */ *head = NULL; *tail = NULL; @@ -2870,7 +2876,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s, * Otherwise we can simply pick the next object from the lockless free list. */ static __always_inline void *slab_alloc_node(struct kmem_cache *s, - gfp_t gfpflags, int node, unsigned long addr) + gfp_t gfpflags, int node, unsigned long addr, size_t orig_size) { void *object; struct kmem_cache_cpu *c; @@ -2885,6 +2891,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags); if (!s) return NULL; + + object = kfence_alloc(s, orig_size, gfpflags); + if (unlikely(object)) + goto out; + redo: /* * Must read kmem_cache cpu data via this cpu ptr. Preemption is @@ -2957,20 +2968,21 @@ redo: if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object) memset(object, 0, s->object_size); +out: slab_post_alloc_hook(s, objcg, gfpflags, 1, &object); return object; } static __always_inline void *slab_alloc(struct kmem_cache *s, - gfp_t gfpflags, unsigned long addr) + gfp_t gfpflags, unsigned long addr, size_t orig_size) { - return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr); + return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size); } void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags) { - void *ret = slab_alloc(s, gfpflags, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size); trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags); @@ -2982,7 +2994,7 @@ EXPORT_SYMBOL(kmem_cache_alloc); #ifdef CONFIG_TRACING void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size) { - void *ret = slab_alloc(s, gfpflags, _RET_IP_); + void *ret = slab_alloc(s, gfpflags, _RET_IP_, size); trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags); ret = kasan_kmalloc(s, ret, size, gfpflags); return ret; @@ -2993,7 +3005,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace); #ifdef CONFIG_NUMA void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node) { - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size); trace_kmem_cache_alloc_node(_RET_IP_, ret, s->object_size, s->size, gfpflags, node); @@ -3007,7 +3019,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, int node, size_t size) { - void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_); + void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size); trace_kmalloc_node(_RET_IP_, ret, size, s->size, gfpflags, node); @@ -3041,6 +3053,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page, stat(s, FREE_SLOWPATH); + if (kfence_free(head)) + return; + if (kmem_cache_debug(s) && !free_debug_processing(s, page, head, tail, cnt, addr)) return; @@ -3287,6 +3302,13 @@ int build_detached_freelist(struct kmem_cache *s, size_t size, df->s = cache_from_obj(s, object); /* Support for memcg */ } + if (is_kfence_address(object)) { + slab_free_hook(df->s, object); + __kfence_free(object); + p[size] = NULL; /* mark object processed */ + return size; + } + /* Start new detached freelist */ df->page = page; set_freepointer(df->s, object, NULL); @@ -3367,8 +3389,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size, c = this_cpu_ptr(s->cpu_slab); for (i = 0; i < size; i++) { - void *object = c->freelist; + void *object = kfence_alloc(s, s->object_size, flags); + if (unlikely(object)) { + p[i] = object; + continue; + } + + object = c->freelist; if (unlikely(!object)) { /* * We may have removed an object from c->freelist using @@ -4035,7 +4063,7 @@ void *__kmalloc(size_t size, gfp_t flags) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc(s, flags, _RET_IP_); + ret = slab_alloc(s, flags, _RET_IP_, size); trace_kmalloc(_RET_IP_, ret, size, s->size, flags); @@ -4083,7 +4111,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc_node(s, flags, node, _RET_IP_); + ret = slab_alloc_node(s, flags, node, _RET_IP_, size); trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node); @@ -4109,6 +4137,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, struct kmem_cache *s; unsigned int offset; size_t object_size; + bool is_kfence = is_kfence_address(ptr); ptr = kasan_reset_tag(ptr); @@ -4121,10 +4150,13 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page, to_user, 0, n); /* Find offset within object. */ - offset = (ptr - page_address(page)) % s->size; + if (is_kfence) + offset = ptr - kfence_object_start(ptr); + else + offset = (ptr - page_address(page)) % s->size; /* Adjust for redzone and reject if within the redzone. */ - if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { + if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) { if (offset < s->red_left_pad) usercopy_abort("SLUB object in left red zone", s->name, to_user, offset, n); @@ -4539,7 +4571,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc(s, gfpflags, caller); + ret = slab_alloc(s, gfpflags, caller, size); /* Honor the call site pointer we received. */ trace_kmalloc(caller, ret, size, s->size, gfpflags); @@ -4570,7 +4602,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, if (unlikely(ZERO_OR_NULL_PTR(s))) return s; - ret = slab_alloc_node(s, gfpflags, node, caller); + ret = slab_alloc_node(s, gfpflags, node, caller, size); /* Honor the call site pointer we received. */ trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node); -- 2.7.4