From 4b74cd8fdf66d4cb128d9249d9b2ada34dc98921 Mon Sep 17 00:00:00 2001
From: Dongwoo Lee <dwoo08.lee@samsung.com>
Date: Fri, 6 Mar 2020 14:04:13 +0900
Subject: [PATCH 01/16] usb: dwc2: gadget: Expand buffer size of control
 endpoint

We found the case that buffer of control endpoint, which was allocated
with 8 bytes previously, is corrupted when the host races for setting
up interfaces. Even worse, it overwrites memory for other structure
such as usb_request for control endpoint and it causes kernel panic.
Especially in Tizen, it often happens when the target is configured as
multi-functional device: sdb + mtp.

In our emprical examination the buffer can be corrupted upto size of
456 bytes. With this result, the size of buffer will be enlarged to
512 bytes to prevent kernel panic even if it happens.

Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
Signed-off-by: Dongwoo Lee <dwoo08.lee@samsung.com>
(cherry picked from commit 4039de56979d2fb4d1cd7acf99e3b0215337a08d)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I8e9b04a9f290523147cc93686cc9de95ceeb4c00
---
 drivers/usb/dwc2/core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index faf40c1..10f41ba 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -659,7 +659,7 @@ struct dwc2_hw_params {
 };
 
 /* Size of control and EP0 buffers */
-#define DWC2_CTRL_BUFF_SIZE 8
+#define DWC2_CTRL_BUFF_SIZE 512
 
 /**
  * struct dwc2_gregs_backup - Holds global registers state before
-- 
2.7.4


From 84c3c2a03ff89766f95f9ee6d37e885f715ad888 Mon Sep 17 00:00:00 2001
From: Dongwoo Lee <dwoo08.lee@samsung.com>
Date: Wed, 26 Feb 2020 19:39:04 +0900
Subject: [PATCH 02/16] usb: dwc2: Defer forcing peripheral mode

dwc2 forces the mode as following dr_mode when it is finally
determined. In the case of peripheral mode, however, this causes the
notification to host without any preparation about gadget driver.
In host, hcd requests device descriptor for enumeration, but it does
never get response. See log below:

 usb 2-2: new high-speed USB device number 6 using xhci_hcd
 usb 2-2: device descriptor read/64, error -110
 usb 2-2: device descriptor read/64, error -110
 usb 2-2: new high-speed USB device number 7 using xhci_hcd
 usb 2-2: device descriptor read/64, error -110
 usb 2-2: device descriptor read/64, error -110
 usb usb2-port2: attempt power cycle
 usb 2-2: new high-speed USB device number 8 using xhci_hcd
 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command
 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command
 usb 2-2: device not accepting address 8, error -62
 usb 2-2: new high-speed USB device number 9 using xhci_hcd
 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command
 xhci_hcd 0000:00:14.0: Timeout while waiting for setup device command
 usb 2-2: device not accepting address 9, error -62
 usb usb2-port2: unable to enumerate USB device

Even worse, all ports on host can get disabled at least xhci case in
this situation. To prevent this, forcing peripheral mode will be
defered until the gadget driver is prepared.

Signed-off-by: Dongwoo Lee <dwoo08.lee@samsung.com>
(cherry picked from commit 9507385cb797a12f278e73b9cb298ba73f120fd5)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: Ic24f1a36c9ccfd9b571cd4d6eff5cbc3da9082b1
---
 drivers/usb/dwc2/core.c   | 7 ++++++-
 drivers/usb/dwc2/gadget.c | 1 +
 drivers/usb/dwc2/params.c | 3 ++-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc2/core.c b/drivers/usb/dwc2/core.c
index 81f6843..f755d73 100644
--- a/drivers/usb/dwc2/core.c
+++ b/drivers/usb/dwc2/core.c
@@ -581,7 +581,12 @@ void dwc2_force_dr_mode(struct dwc2_hsotg *hsotg)
 
 		break;
 	case USB_DR_MODE_PERIPHERAL:
-		dwc2_force_mode(hsotg, false);
+		/*
+		 * To prevent early notification to host without any
+		 * preparation about device descriptor, forcing mode
+		 * is defered until gadget driver is ready.
+		 */
+		/* dwc2_force_mode(hsotg, false); */
 		break;
 	case USB_DR_MODE_OTG:
 		dwc2_clear_force_mode(hsotg);
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 68292c2..dc999fd 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -4550,6 +4550,7 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget,
 	hsotg->gadget.speed = USB_SPEED_UNKNOWN;
 
 	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) {
+		dwc2_force_mode(hsotg, false);
 		ret = dwc2_lowlevel_hw_enable(hsotg);
 		if (ret)
 			goto err;
diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index d0508a0..5e0f85c 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -864,7 +864,8 @@ static void dwc2_get_dev_hwparams(struct dwc2_hsotg *hsotg)
 	if (hsotg->dr_mode == USB_DR_MODE_HOST)
 		return;
 
-	dwc2_force_mode(hsotg, false);
+	if (hsotg->dr_mode != USB_DR_MODE_PERIPHERAL)
+		dwc2_force_mode(hsotg, false);
 
 	gnptxfsiz = dwc2_readl(hsotg, GNPTXFSIZ);
 
-- 
2.7.4


From 661b0cac861aa7c6d49130f8a243f80e9ff00dfe Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 22 Oct 2018 13:44:11 +0900
Subject: [PATCH 03/16] usb: dwc2: gadget: set the quirk_ep_out_alinged_size as
 true

Set the quirk_ep_out_aligned_size as true.
This patch is fixed about occurring kernel panic after failed memory
allocation.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
[dwoo08.lee: bring commit from linux-artik7 of public tizen repository]
Signed-off-by: Dongwoo Lee <dwoo08.lee@samsung.com>
(cherry picked from commit 377d8707f528fe5e318debd4e282db6ae8713244)

Change-Id: Ia058acdaac0b9d39554e9eb1a69be57255961186
---
 drivers/usb/dwc2/gadget.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index dc999fd..cb89afc 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -5060,6 +5060,7 @@ int dwc2_gadget_init(struct dwc2_hsotg *hsotg)
 					  epnum, 0);
 	}
 
+	hsotg->gadget.quirk_ep_out_aligned_size = true;
 	dwc2_hsotg_dump(hsotg);
 
 #if IS_ENABLED(CONFIG_EXTCON)
-- 
2.7.4


From ff2eecaa68140cb58507fd8eebc15099ccebebf8 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Fri, 27 Mar 2020 18:07:28 +0900
Subject: [PATCH 04/16] brcmfamc: add the feature-disable property

Add the feature-disable property.
It will be parsed when brcmfmac is probed.
If someone want to disable some features by default, it's possible to
use this property.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
(cherry picked from commit 4a12cde7ed3e962f146676cb51fb3a6bc6380bd1)

Change-Id: Ie06f2e896d37dd75657779dedb68f7c683cb6d53
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
index e406e11..89c5f0a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/of.c
@@ -125,6 +125,11 @@ void brcmf_of_probe(struct device *dev, enum brcmf_bus_type bus_type,
 	if (bus_type != BRCMF_BUSTYPE_SDIO)
 		return;
 
+	if (of_property_read_u32(np, "brcm,feature-disable", &val) == 0) {
+		settings->feature_disable |= val;
+		brcmf_info("Disabled feature 0x%x\n", settings->feature_disable);
+	}
+
 	if (of_property_read_u32(np, "brcm,drive-strength", &val) == 0)
 		sdio->drive_strength = val;
 
-- 
2.7.4


From bbbe8f5741a6ba313abbce8c6053cc8dde57266d Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 25 Mar 2024 20:53:30 +0900
Subject: [PATCH 05/16] ARM: dts: bcm2711-rpi-4-b: add a brcmf wifi node

Add brcmf wifi node to use a below property.
- brcm,featuer-disable

"0x2000" is a bit to disable "sup_wpa" feature.
Tizen doesn't need to use this feature. So disable it by default.

Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Hoegeun Kwon <hoegeun.kwon@samsung.com>
(cherry-picked from commit 23eaf8308aa4cc22a4002ae67dd5736f183e0b7d)

Change-Id: Ifc39b6ce7d71caca0efe48a9659d41e6da5081f5
---
 arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dts | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dts
index d3a3a1e..5555f56 100644
--- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dts
+++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dts
@@ -273,10 +273,17 @@
 };
 
 &mmcnr {
+	#address-cells = <1>;
+	#size-cells = <0>;
 	pinctrl-names = "default";
 	pinctrl-0 = <&sdio_pins>;
 	bus-width = <4>;
 	status = "okay";
+	brcmf: wifi@1 {
+		reg = <1>;
+		compatible = "brcm,bcm4329-fmac";
+		brcm,feature-disable = <0x2000>; /* BIT[13] : sup-wpa */
+	};
 };
 
 &uart0 {
-- 
2.7.4


From 4e9b8d262125285f873cf4a5af3824a46882aa00 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Fri, 23 Feb 2024 16:13:15 +0900
Subject: [PATCH 06/16] Add a build script for building tizen kernel on local

Add a build script for building tizen kernel.
This script is creating,
     - kernel image
     - dtb file
     - modules image

Change-Id: Id2a63588ac5288cf20e96145feb5b27184ed25e4
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
---
 build-rpi4.sh | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 150 insertions(+)
 create mode 100755 build-rpi4.sh

diff --git a/build-rpi4.sh b/build-rpi4.sh
new file mode 100755
index 0000000..8c2d3ce
--- /dev/null
+++ b/build-rpi4.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+COLOR_RED_BG=$(tput setab 1)
+COLOR_RED_BOLD=$(tput bold)$(tput setaf 1)
+COLOR_RESET=$(tput sgr0)
+
+MOD_DIR="usr/tmp_mod"
+MOD_IMG="usr/modules.img"
+MOD_SIZE=20
+NCPUS=`cat /proc/cpuinfo | grep processor | wc -l`
+NCPUS=$(($NCPUS * 2))
+
+if [ $# == 0 ]; then
+	echo "Usage : $0 <architecture> [rt]"
+	echo "	architecture : arm or arm64"
+	echo "	e.g) $0 arm"
+	echo "	e.g) $0 arm64 rt"
+	exit
+fi
+
+# Check this system has ccache
+check_ccache()
+{
+	type ccache
+	if [ "$?" -eq "0" ]; then
+		CCACHE=ccache
+	fi
+}
+
+function mk_modules() {
+	[ -e /usr/bin/make_ext4fs ] && USE_MAKE_EXT4FS=1
+	if [ "$USE_MAKE_EXT4FS" != "1" ]; then
+		sudo ls > /dev/null
+	fi
+
+	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules_prepare
+	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules -j ${NCPUS}
+
+	if [ "$?" != "0" ]; then
+		echo "Failed to make modules"
+		exit 1
+	fi
+
+	[ -d ${MOD_DIR} ] || mkdir ${MOD_DIR}
+
+	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules_install INSTALL_MOD_PATH=${MOD_DIR} INSTALL_MOD_STRIP=1
+
+	if [ "$USE_MAKE_EXT4FS" == "1" ]; then
+		/usr/bin/make_ext4fs -b 4096 -L modules -l ${MOD_SIZE}M $MOD_IMG ${MOD_DIR}/lib/modules/
+	else
+		dd if=/dev/zero of=${MOD_IMG} bs=1M count=${MOD_SIZE}
+		mkfs.ext4 -F -b 4096 -L modules ${MOD_IMG}
+		[ -d ${MOD_DIR}/mnt ] || mkdir ${MOD_DIR}/mnt
+		sudo mount -o loop ${MOD_IMG} ${MOD_DIR}/mnt
+		sudo cp -rf ${MOD_DIR}/lib/modules/* ${MOD_DIR}/mnt
+		sync
+		sudo umount ${MOD_DIR}/mnt
+	fi
+	rm -rf ${MOD_DIR}
+	ls -al ${MOD_IMG}
+}
+
+check_ccache
+
+function cleanup_localversion()
+{
+	rm -f localversion-rt
+}
+
+function create_tar()
+{
+
+	if [ ! -d output ] ; then
+		mkdir ./output
+	fi
+
+	cp usr/modules.img ./output
+
+	DATE=`date +%Y%m%d%H%M`
+	if [ $ARM_ARCH = "arm" ]; then
+		cp ./arch/arm/boot/zImage ./output
+		cp ./arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dtb ./output
+		tar cvf tizen-local-${DATE}-boot-armv7l-rpi4.tar -C output zImage bcm2711-rpi-4-b.dtb modules.img
+		mv tizen-local-${DATE}-boot-armv7l-rpi4.tar ./output/
+	elif [ $ARM_ARCH = "arm64" ]; then
+		cp ./arch/arm64/boot/Image ./output
+		cp ./arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dtb ./output
+		tar cvf tizen-local-${DATE}-boot-arm64-rpi4.tar -C output Image bcm2711-rpi-4-b.dtb modules.img
+		mv tizen-local-${DATE}-boot-arm64-rpi4.tar ./output/
+	else
+		echo "There is no proper architecture agrument!"
+		exit 1;
+	fi
+}
+
+DEFCONFIG=tizen_bcm2711_defconfig
+touch .kernel_buildenv
+if [ "$2" = "rt" ]; then
+	echo "Build RT-Kernel"
+	BUILD_VERSION=`cat .kernel_buildenv`
+	if [ "$BUILD_VERSION" != "RT" ]; then
+		echo "Previous .config is for Normal Kernel, so remove"
+		rm -f .config
+	fi
+	DEFCONFIG=tizen_bcm2711_rt_defconfig
+	cp _localversion-rt localversion-rt
+	trap cleanup_localversion EXIT
+	echo "RT" > ./.kernel_buildenv
+else
+	echo "Build Normal-Kernel"
+	BUILD_VERSION=`cat .kernel_buildenv`
+	if [ "$BUILD_VERSION" == "RT" ]; then
+		echo "Previous .config is for RT Kernel, so remove"
+		rm -f .config
+	fi
+	echo "NORMAL" > ./.kernel_buildenv
+fi
+
+
+if [ $1 = "arm" ]; then
+	ARM_ARCH="arm"
+	CROSS_COMPILER="${CCACHE} arm-linux-gnueabi-"
+	echo "ARM"
+elif [ $1 = "arm64" ]; then
+	ARM_ARCH="arm64"
+	CROSS_COMPILER="${CCACHE} aarch64-linux-gnu-"
+	echo "ARM64"
+else
+	echo "There is no arcitecture type..."
+	exit 1;
+fi
+
+if ! [ -e .config ] ; then
+	make ARCH="${ARM_ARCH}" CROSS_COMPILE="${CROSS_COMPILER}" ${DEFCONFIG}
+fi
+
+make ARCH="${ARM_ARCH}" CROSS_COMPILE="${CROSS_COMPILER}" -j ${NCPUS} 2>&1 | \
+sed -e "/error:.*/{s/^.*error:/$(echo -e "${COLOR_RED_BG}&${COLOR_RESET}")/g;q1}" \
+	-e "/Error.*/{s/^.*Error:/$(echo -e "${COLOR_RED_BG}&${COLOR_RESET}")/g;q1}" \
+	-e "/ERROR.*/{s/^.*ERROR/$(echo -e "${COLOR_RED_BG}&${COLOR_RESET}")/g;q1}"
+if [ $? -ne 0 ]; then
+	echo -e "${COLOR_RED_BOLD}Failed to build${COLOR_RESET}"
+	exit 1
+fi
+
+# Make module.img file
+mk_modules
+
+# Create Tar file
+create_tar
-- 
2.7.4


From 3a6545dd346807f5902130404a07cf70c85578bf Mon Sep 17 00:00:00 2001
From: Seung-Woo Kim <sw0312.kim@samsung.com>
Date: Tue, 26 Mar 2024 15:06:03 +0900
Subject: [PATCH 07/16] script: build: Add to build linux-tizen-modules

Add to build and install linux-tizen-modules in local build script.

To build linux-tizne-modules, it is required to clone tizen git
platform/kernel/linux-tizen-modules-source and
platform/kernel/linux-tizen-modules in parent path.

Change-Id: If54d188765c4c46ca21c8db15fce77038fca6755
Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
---
 build-rpi4.sh | 45 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/build-rpi4.sh b/build-rpi4.sh
index 8c2d3ce..d6711b0 100755
--- a/build-rpi4.sh
+++ b/build-rpi4.sh
@@ -10,6 +10,9 @@ MOD_SIZE=20
 NCPUS=`cat /proc/cpuinfo | grep processor | wc -l`
 NCPUS=$(($NCPUS * 2))
 
+BUILD_ROOT=$PWD
+KERNEL_PATH=`basename ${BUILD_ROOT}`
+
 if [ $# == 0 ]; then
 	echo "Usage : $0 <architecture> [rt]"
 	echo "	architecture : arm or arm64"
@@ -27,6 +30,45 @@ check_ccache()
 	fi
 }
 
+function mk_tizen_modules()
+{
+	if [ -e ${BUILD_ROOT}/../linux-tizen-modules-source ]; then
+		pushd ${BUILD_ROOT}/../linux-tizen-modules-source
+
+		ln -s ${PWD}/include kernel
+
+		export BUILD_logger=m
+		export LOGGER_MAIN_BUFFER_SIZE=256
+		export LOGGER_EVENTS_BUFFER_SIZE=256
+		export LOGGER_RADIO_BUFFER_SIZE=256
+		export LOGGER_SYSTEM_BUFFER_SIZE=256
+
+		export BUILD_proc_tsm=m
+		export BUILD_kdbus=m
+		export BUILD_zlogger=m
+
+		ARCH="${ARM_ARCH}" CROSS_COMPILE="${CROSS_COMPILER}" make -C kernel KERNELDIR=${BUILD_ROOT}
+
+		rm -f kernel/include
+
+		ARCH="${ARM_ARCH}" CROSS_COMPILE="${CROSS_COMPILER}" make -C kernel modules_install KERNELDIR=${BUILD_ROOT} INSTALL_MOD_PATH=../${KERNEL_PATH}/${MOD_DIR} INSTALL_MOD_STRIP=1 INSTALL_MOD_DIR=extra
+
+		popd
+
+		if [ -e ${BUILD_ROOT}/../linux-tizen-modules ]; then
+			mkdir -p ${MOD_DIR}/lib/modules/modprobe.d
+			mkdir -p ${MOD_DIR}/lib/modules/modules-load.d
+			cp ${BUILD_ROOT}/../linux-tizen-modules/conf/tizen-modprobe-rpi4.conf ${MOD_DIR}/lib/modules/modprobe.d
+			cp ${BUILD_ROOT}/../linux-tizen-modules/conf/tizen-modules-rpi4.conf ${MOD_DIR}/lib/modules/modules-load.d
+		fi
+	else
+		echo "linux-tizen-modules-source/linux-tizen-modules are not cloned in parent path, so skip building linux-tizen-modules."
+		echo "To build linux-tizen-modules, please use below command:"
+		echo "   git clone git://git.tizen.org/platform/kernel/linux-tizen-modules-source -b tizen ../linux-tizen-modules-source"
+		echo "   git clone git://git.tizen.org/platform/kernel/linux-tizen-modules -b tizen ../linux-tizen-modules"
+	fi
+}
+
 function mk_modules() {
 	[ -e /usr/bin/make_ext4fs ] && USE_MAKE_EXT4FS=1
 	if [ "$USE_MAKE_EXT4FS" != "1" ]; then
@@ -45,6 +87,9 @@ function mk_modules() {
 
 	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules_install INSTALL_MOD_PATH=${MOD_DIR} INSTALL_MOD_STRIP=1
 
+	# build and install linux-tizen-modules if cloned in parent path
+	mk_tizen_modules
+
 	if [ "$USE_MAKE_EXT4FS" == "1" ]; then
 		/usr/bin/make_ext4fs -b 4096 -L modules -l ${MOD_SIZE}M $MOD_IMG ${MOD_DIR}/lib/modules/
 	else
-- 
2.7.4


From 661d188897c0d964205e30b1378d90982c98be3a Mon Sep 17 00:00:00 2001
From: Seung-Woo Kim <sw0312.kim@samsung.com>
Date: Tue, 26 Mar 2024 16:03:11 +0900
Subject: [PATCH 08/16] script: build: Remove local version temporary file

Like error exit case, in build success case, remove local version
temporary file also.

Change-Id: I51235dec2b1fa479e7fdff730ee4d859dbdbdb24
Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
---
 build-rpi4.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build-rpi4.sh b/build-rpi4.sh
index d6711b0..7e4a6e1 100755
--- a/build-rpi4.sh
+++ b/build-rpi4.sh
@@ -193,3 +193,5 @@ mk_modules
 
 # Create Tar file
 create_tar
+
+cleanup_localversion
-- 
2.7.4


From 301a853e585a257b986c355f79449121a4e8f9f7 Mon Sep 17 00:00:00 2001
From: Seung-Woo Kim <sw0312.kim@samsung.com>
Date: Tue, 26 Mar 2024 17:24:56 +0900
Subject: [PATCH 09/16] script: build: Add option to create boot.img

Add option 'bootimg' to create boot.img vfat image having boot
firmwares, u-boot, boot script, kernel image, dt binaries and
dt overlays.

Change-Id: I62043417125129ca9cdd697a921da1ac1046d0d7
Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
---
 build-rpi4.sh | 112 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 99 insertions(+), 13 deletions(-)

diff --git a/build-rpi4.sh b/build-rpi4.sh
index 7e4a6e1..d7066bb 100755
--- a/build-rpi4.sh
+++ b/build-rpi4.sh
@@ -14,10 +14,12 @@ BUILD_ROOT=$PWD
 KERNEL_PATH=`basename ${BUILD_ROOT}`
 
 if [ $# == 0 ]; then
-	echo "Usage : $0 <architecture> [rt]"
+	echo "Usage : $0 <architecture> [rt] [bootimg]"
 	echo "	architecture : arm or arm64"
 	echo "	e.g) $0 arm"
 	echo "	e.g) $0 arm64 rt"
+	echo "	e.g) $0 arm64 bootimg"
+	echo "	e.g) $0 arm rt bootimg"
 	exit
 fi
 
@@ -30,6 +32,72 @@ check_ccache()
 	fi
 }
 
+check_sudo()
+{
+	[ -e /usr/bin/make_ext4fs ] && USE_MAKE_EXT4FS=1
+	if [ "$USE_MAKE_EXT4FS" != "1" ] || [ "$IS_BOOTIMG" = "1" ]; then
+		sudo ls > /dev/null
+	fi
+
+}
+
+function mk_bootimg()
+{
+	TMP_PATH=./tmp_bootimg
+	TMP_UBOOT_PATH=./tmp_uboot
+	BOOT_PATH="rpi4/boot"
+	USER_ID=`id -u`
+	GROUP_ID=`id -g`
+
+	rm -f boot.img
+	rm -rf ${TMP_PATH}
+	mkdir ${TMP_PATH}
+
+	# Create boot.img
+	mkfs.vfat -F 16 -C -n BOOT boot.img 65536
+	sudo mount -o loop,uid=$USER_ID,gid=$GROUP_ID,showexec boot.img ${TMP_PATH}
+
+	cp -a $BOOT_PATH/LICENCE.broadcom ${TMP_PATH}
+	cp -a $BOOT_PATH/start*.elf ${TMP_PATH}
+	cp -a $BOOT_PATH/fixup*.dat ${TMP_PATH}
+	if [ "$ARM_ARCH" = "arm64" ]; then
+		echo "Create 64bit boot image"
+		cp -a $BOOT_PATH/config_64bit.txt ${TMP_PATH}/config.txt
+		cp -a arch/arm64/boot/Image ${TMP_PATH}
+		cp -a arch/arm64/boot/dts/broadcom/bcm*.dtb ${TMP_PATH}
+	else
+		echo "Create 32bit boot image"
+		cp -a $BOOT_PATH/config.txt ${TMP_PATH}
+		cp -a arch/arm/boot/zImage ${TMP_PATH}
+		cp -a arch/arm/boot/dts/bcm*.dtb ${TMP_PATH}
+	fi
+	mkdir -p ${TMP_PATH}/overlays
+	cp -a arch/arm/boot/dts/overlays/*.dtbo ${TMP_PATH}/overlays
+
+	# install u-boot files extracted from u-boot-rpi4 rpm package in download.tizen.org.
+	rm -rf ${TMP_UBOOT_PATH}
+	mkdir -p ${TMP_UBOOT_PATH}
+	pushd ${TMP_UBOOT_PATH}
+	if [ "$ARM_ARCH" = "arm64" ]; then
+		REPO_URL=http://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Unified/latest/repos/standard/packages/aarch64/
+	else
+		REPO_URL=http://download.tizen.org/snapshots/TIZEN/Tizen/Tizen-Unified/latest/repos/standard/packages/armv7l/
+	fi
+	rm -f index.html*
+	wget ${REPO_URL}
+	UBOOT=`awk -F\" '{ print $2 }' index.html | grep u-boot-rpi4`
+	wget ${REPO_URL}${UBOOT}
+	unrpm ${UBOOT}
+	popd
+
+	cp -a ${TMP_UBOOT_PATH}/boot/* ${TMP_PATH}
+	sync
+	sudo umount ${TMP_PATH}
+
+	rm -rf ${TMP_UBOOT_PATH}
+	rm -rf ${TMP_PATH}
+}
+
 function mk_tizen_modules()
 {
 	if [ -e ${BUILD_ROOT}/../linux-tizen-modules-source ]; then
@@ -70,11 +138,6 @@ function mk_tizen_modules()
 }
 
 function mk_modules() {
-	[ -e /usr/bin/make_ext4fs ] && USE_MAKE_EXT4FS=1
-	if [ "$USE_MAKE_EXT4FS" != "1" ]; then
-		sudo ls > /dev/null
-	fi
-
 	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules_prepare
 	make ARCH=${ARM_ARCH} CROSS_COMPILE="$CROSS_COMPILER" modules -j ${NCPUS}
 
@@ -119,18 +182,18 @@ function create_tar()
 		mkdir ./output
 	fi
 
+	rm -f ./output/*
+
+	cp $BOOTFILES ./output
 	cp usr/modules.img ./output
 
+	FILES=`ls output`
 	DATE=`date +%Y%m%d%H%M`
 	if [ $ARM_ARCH = "arm" ]; then
-		cp ./arch/arm/boot/zImage ./output
-		cp ./arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dtb ./output
-		tar cvf tizen-local-${DATE}-boot-armv7l-rpi4.tar -C output zImage bcm2711-rpi-4-b.dtb modules.img
+		tar cvf tizen-local-${DATE}-boot-armv7l-rpi4.tar -C output $FILES
 		mv tizen-local-${DATE}-boot-armv7l-rpi4.tar ./output/
 	elif [ $ARM_ARCH = "arm64" ]; then
-		cp ./arch/arm64/boot/Image ./output
-		cp ./arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dtb ./output
-		tar cvf tizen-local-${DATE}-boot-arm64-rpi4.tar -C output Image bcm2711-rpi-4-b.dtb modules.img
+		tar cvf tizen-local-${DATE}-boot-arm64-rpi4.tar -C output $FILES
 		mv tizen-local-${DATE}-boot-arm64-rpi4.tar ./output/
 	else
 		echo "There is no proper architecture agrument!"
@@ -140,7 +203,18 @@ function create_tar()
 
 DEFCONFIG=tizen_bcm2711_defconfig
 touch .kernel_buildenv
-if [ "$2" = "rt" ]; then
+
+IS_RT=0
+if [ "$2" = "rt" ] || [ "$3" = "rt" ]; then
+	IS_RT=1
+fi
+
+IS_BOOTIMG=0
+if [ "$2" = "bootimg" ] || [ "$3" = "bootimg" ]; then
+	IS_BOOTIMG=1
+fi
+
+if [ "$IS_RT" = "1" ]; then
 	echo "Build RT-Kernel"
 	BUILD_VERSION=`cat .kernel_buildenv`
 	if [ "$BUILD_VERSION" != "RT" ]; then
@@ -161,6 +235,7 @@ else
 	echo "NORMAL" > ./.kernel_buildenv
 fi
 
+check_sudo
 
 if [ $1 = "arm" ]; then
 	ARM_ARCH="arm"
@@ -188,6 +263,17 @@ if [ $? -ne 0 ]; then
 	exit 1
 fi
 
+if [ "$IS_BOOTIMG" = "1" ]; then
+	mk_bootimg
+	BOOTFILES="boot.img"
+else
+	if [ $ARM_ARCH = "arm" ]; then
+		BOOTFILES="./arch/arm/boot/zImage  ./arch/arm/boot/dts/broadcom/bcm2711-rpi-4-b.dtb"
+	else
+		BOOTFILES="./arch/arm64/boot/Image ./arch/arm64/boot/dts/broadcom/bcm2711-rpi-4-b.dtb"
+	fi
+fi
+
 # Make module.img file
 mk_modules
 
-- 
2.7.4


From 00b1553bfef2822db5333175f001d5425ab3d9b8 Mon Sep 17 00:00:00 2001
From: Seung-Woo Kim <sw0312.kim@samsung.com>
Date: Thu, 11 Apr 2024 20:15:22 +0900
Subject: [PATCH 10/16] script: build: Fix wrong arm 32bit dtb path

Case of arm 32bit, raspberry pi dtb files are also in
boot/dts/broadcom directory. Fix path for arm 32bit dtb.

Change-Id: Id8831c4a8fde26e394e8cd712c10dd97262022ff
Fixes: commit 301a853e585a ("script: build: Add option to create boot.img")
Signed-off-by: Seung-Woo Kim <sw0312.kim@samsung.com>
---
 build-rpi4.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build-rpi4.sh b/build-rpi4.sh
index d7066bb..a453b29 100755
--- a/build-rpi4.sh
+++ b/build-rpi4.sh
@@ -69,7 +69,7 @@ function mk_bootimg()
 		echo "Create 32bit boot image"
 		cp -a $BOOT_PATH/config.txt ${TMP_PATH}
 		cp -a arch/arm/boot/zImage ${TMP_PATH}
-		cp -a arch/arm/boot/dts/bcm*.dtb ${TMP_PATH}
+		cp -a arch/arm/boot/dts/broadcom/bcm*.dtb ${TMP_PATH}
 	fi
 	mkdir -p ${TMP_PATH}/overlays
 	cp -a arch/arm/boot/dts/overlays/*.dtbo ${TMP_PATH}/overlays
-- 
2.7.4


From ce215a3a795e5338ae28ce422bb24df544d9d539 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 13 Oct 2023 14:48:21 +0800
Subject: [PATCH 11/16] page_pool: fragment API support for 32-bit arch with
 64-bit DMA

Currently page_pool_alloc_frag() is not supported in 32-bit
arch with 64-bit DMA because of the overlap issue between
pp_frag_count and dma_addr_upper in 'struct page' for those
arches, which seems to be quite common, see [1], which means
driver may need to handle it when using fragment API.

It is assumed that the combination of the above arch with an
address space >16TB does not exist, as all those arches have
64b equivalent, it seems logical to use the 64b version for a
system with a large address space. It is also assumed that dma
address is page aligned when we are dma mapping a page aligned
buffer, see [2].

That means we're storing 12 bits of 0 at the lower end for a
dma address, we can reuse those bits for the above arches to
support 32b+12b, which is 16TB of memory.

If we make a wrong assumption, a warning is emitted so that
user can report to us.

1. https://lore.kernel.org/all/20211117075652.58299-1-linyunsheng@huawei.com/
2. https://lore.kernel.org/all/20230818145145.4b357c89@kernel.org/

Tested-by: Alexander Lobakin <aleksander.lobakin@intel.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Guillaume Tucker <guillaume.tucker@collabora.com>
CC: Matthew Wilcox <willy@infradead.org>
CC: Linux-MM <linux-mm@kvack.org>
Link: https://lore.kernel.org/r/20231013064827.61135-2-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 90de47f020db086f7929e09f64efd0cf627d6869)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I9fdea00f780aac6bd06c2099a590790ed66aea54
---
 include/linux/mm_types.h        | 13 +------------
 include/net/page_pool/helpers.h | 20 ++++++++++++++------
 net/core/page_pool.c            | 14 +++++++++-----
 3 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 36c5b43..74b49c4 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -125,18 +125,7 @@ struct page {
 			struct page_pool *pp;
 			unsigned long _pp_mapping_pad;
 			unsigned long dma_addr;
-			union {
-				/**
-				 * dma_addr_upper: might require a 64-bit
-				 * value on 32-bit architectures.
-				 */
-				unsigned long dma_addr_upper;
-				/**
-				 * For frag page support, not supported in
-				 * 32-bit architectures with 64-bit DMA.
-				 */
-				atomic_long_t pp_frag_count;
-			};
+			atomic_long_t pp_frag_count;
 		};
 		struct {	/* Tail pages of compound page */
 			unsigned long compound_head;	/* Bit zero is set */
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 8e77514..8f64adf 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -197,7 +197,7 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 	page_pool_put_full_page(pool, page, true);
 }
 
-#define PAGE_POOL_DMA_USE_PP_FRAG_COUNT	\
+#define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA	\
 		(sizeof(dma_addr_t) > sizeof(unsigned long))
 
 /**
@@ -211,17 +211,25 @@ static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
 {
 	dma_addr_t ret = page->dma_addr;
 
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
-		ret |= (dma_addr_t)page->dma_addr_upper << 16 << 16;
+	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
+		ret <<= PAGE_SHIFT;
 
 	return ret;
 }
 
-static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
 {
+	if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) {
+		page->dma_addr = addr >> PAGE_SHIFT;
+
+		/* We assume page alignment to shave off bottom bits,
+		 * if this "compression" doesn't work we need to drop.
+		 */
+		return addr != (dma_addr_t)page->dma_addr << PAGE_SHIFT;
+	}
+
 	page->dma_addr = addr;
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT)
-		page->dma_addr_upper = upper_32_bits(addr);
+	return false;
 }
 
 static inline bool page_pool_put(struct page_pool *pool)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 31f923e..278332e 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -211,10 +211,6 @@ static int page_pool_init(struct page_pool *pool,
 		 */
 	}
 
-	if (PAGE_POOL_DMA_USE_PP_FRAG_COUNT &&
-	    pool->p.flags & PP_FLAG_PAGE_FRAG)
-		return -EINVAL;
-
 #ifdef CONFIG_PAGE_POOL_STATS
 	pool->recycle_stats = alloc_percpu(struct page_pool_recycle_stats);
 	if (!pool->recycle_stats)
@@ -363,12 +359,20 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
 	if (dma_mapping_error(pool->p.dev, dma))
 		return false;
 
-	page_pool_set_dma_addr(page, dma);
+	if (page_pool_set_dma_addr(page, dma))
+		goto unmap_failed;
 
 	if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
 		page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
 
 	return true;
+
+unmap_failed:
+	WARN_ON_ONCE("unexpected DMA address, please report to netdev@");
+	dma_unmap_page_attrs(pool->p.dev, dma,
+			     PAGE_SIZE << pool->p.order, pool->p.dma_dir,
+			     DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING);
+	return false;
 }
 
 static void page_pool_set_pp_info(struct page_pool *pool,
-- 
2.7.4


From b64bd10d99215cebb935563d04773b9609b2a214 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 20 Oct 2023 17:59:48 +0800
Subject: [PATCH 12/16] page_pool: unify frag_count handling in
 page_pool_is_last_frag()

Currently when page_pool_create() is called with
PP_FLAG_PAGE_FRAG flag, page_pool_alloc_pages() is only
allowed to be called under the below constraints:
1. page_pool_fragment_page() need to be called to setup
   page->pp_frag_count immediately.
2. page_pool_defrag_page() often need to be called to drain
   the page->pp_frag_count when there is no more user will
   be holding on to that page.

Those constraints exist in order to support a page to be
split into multi fragments.

And those constraints have some overhead because of the
cache line dirtying/bouncing and atomic update.

Those constraints are unavoidable for case when we need a
page to be split into more than one fragment, but there is
also case that we want to avoid the above constraints and
their overhead when a page can't be split as it can only
hold a fragment as requested by user, depending on different
use cases:
use case 1: allocate page without page splitting.
use case 2: allocate page with page splitting.
use case 3: allocate page with or without page splitting
            depending on the fragment size.

Currently page pool only provide page_pool_alloc_pages() and
page_pool_alloc_frag() API to enable the 1 & 2 separately,
so we can not use a combination of 1 & 2 to enable 3, it is
not possible yet because of the per page_pool flag
PP_FLAG_PAGE_FRAG.

So in order to allow allocating unsplit page without the
overhead of split page while still allow allocating split
page we need to remove the per page_pool flag in
page_pool_is_last_frag(), as best as I can think of, it seems
there are two methods as below:
1. Add per page flag/bit to indicate a page is split or
   not, which means we might need to update that flag/bit
   everytime the page is recycled, dirtying the cache line
   of 'struct page' for use case 1.
2. Unify the page->pp_frag_count handling for both split and
   unsplit page by assuming all pages in the page pool is split
   into a big fragment initially.

As page pool already supports use case 1 without dirtying the
cache line of 'struct page' whenever a page is recyclable, we
need to support the above use case 3 with minimal overhead,
especially not adding any noticeable overhead for use case 1,
and we are already doing an optimization by not updating
pp_frag_count in page_pool_defrag_page() for the last fragment
user, this patch chooses to unify the pp_frag_count handling
to support the above use case 3.

There is no noticeable performance degradation and some
justification for unifying the frag_count handling with this
patch applied using a micro-benchmark testing in [1].

1. https://lore.kernel.org/all/bf2591f8-7b3c-4480-bb2c-31dc9da1d6ac@huawei.com/

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20231020095952.11055-2-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 58d53d8f7da63dd13903bec0a40b3009a841b61b)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I4020fab404d9c362383a10f8b596a767b1b091a8
---
 include/net/page_pool/helpers.h | 47 +++++++++++++++++++++++++++++------------
 net/core/page_pool.c            | 10 ++++++++-
 2 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 8f64adf..759489c 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -115,28 +115,49 @@ static inline long page_pool_defrag_page(struct page *page, long nr)
 	long ret;
 
 	/* If nr == pp_frag_count then we have cleared all remaining
-	 * references to the page. No need to actually overwrite it, instead
-	 * we can leave this to be overwritten by the calling function.
+	 * references to the page:
+	 * 1. 'n == 1': no need to actually overwrite it.
+	 * 2. 'n != 1': overwrite it with one, which is the rare case
+	 *              for pp_frag_count draining.
 	 *
-	 * The main advantage to doing this is that an atomic_read is
-	 * generally a much cheaper operation than an atomic update,
-	 * especially when dealing with a page that may be partitioned
-	 * into only 2 or 3 pieces.
+	 * The main advantage to doing this is that not only we avoid a atomic
+	 * update, as an atomic_read is generally a much cheaper operation than
+	 * an atomic update, especially when dealing with a page that may be
+	 * partitioned into only 2 or 3 pieces; but also unify the pp_frag_count
+	 * handling by ensuring all pages have partitioned into only 1 piece
+	 * initially, and only overwrite it when the page is partitioned into
+	 * more than one piece.
 	 */
-	if (atomic_long_read(&page->pp_frag_count) == nr)
+	if (atomic_long_read(&page->pp_frag_count) == nr) {
+		/* As we have ensured nr is always one for constant case using
+		 * the BUILD_BUG_ON(), only need to handle the non-constant case
+		 * here for pp_frag_count draining, which is a rare case.
+		 */
+		BUILD_BUG_ON(__builtin_constant_p(nr) && nr != 1);
+		if (!__builtin_constant_p(nr))
+			atomic_long_set(&page->pp_frag_count, 1);
+
 		return 0;
+	}
 
 	ret = atomic_long_sub_return(nr, &page->pp_frag_count);
 	WARN_ON(ret < 0);
+
+	/* We are the last user here too, reset pp_frag_count back to 1 to
+	 * ensure all pages have been partitioned into 1 piece initially,
+	 * this should be the rare case when the last two fragment users call
+	 * page_pool_defrag_page() currently.
+	 */
+	if (unlikely(!ret))
+		atomic_long_set(&page->pp_frag_count, 1);
+
 	return ret;
 }
 
-static inline bool page_pool_is_last_frag(struct page_pool *pool,
-					  struct page *page)
+static inline bool page_pool_is_last_frag(struct page *page)
 {
-	/* If fragments aren't enabled or count is 0 we were the last user */
-	return !(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
-	       (page_pool_defrag_page(page, 1) == 0);
+	/* If page_pool_defrag_page() returns 0, we were the last user */
+	return page_pool_defrag_page(page, 1) == 0;
 }
 
 /**
@@ -161,7 +182,7 @@ static inline void page_pool_put_page(struct page_pool *pool,
 	 * allow registering MEM_TYPE_PAGE_POOL, but shield linker.
 	 */
 #ifdef CONFIG_PAGE_POOL
-	if (!page_pool_is_last_frag(pool, page))
+	if (!page_pool_is_last_frag(page))
 		return;
 
 	page_pool_put_defragged_page(pool, page, dma_sync_size, allow_direct);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 278332e..59bad96 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -380,6 +380,14 @@ static void page_pool_set_pp_info(struct page_pool *pool,
 {
 	page->pp = pool;
 	page->pp_magic |= PP_SIGNATURE;
+
+	/* Ensuring all pages have been split into one fragment initially:
+	 * page_pool_set_pp_info() is only called once for every page when it
+	 * is allocated from the page allocator and page_pool_fragment_page()
+	 * is dirtying the same cache line as the page->pp_magic above, so
+	 * the overhead is negligible.
+	 */
+	page_pool_fragment_page(page, 1);
 	if (pool->p.init_callback)
 		pool->p.init_callback(page, pool->p.init_arg);
 }
@@ -676,7 +684,7 @@ void page_pool_put_page_bulk(struct page_pool *pool, void **data,
 		struct page *page = virt_to_head_page(data[i]);
 
 		/* It is not the last user for the page frag case */
-		if (!page_pool_is_last_frag(pool, page))
+		if (!page_pool_is_last_frag(page))
 			continue;
 
 		page = __page_pool_put_page(pool, page, -1, false);
-- 
2.7.4


From e662056480461dcb37c588f708f00191e0be3222 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 20 Oct 2023 17:59:49 +0800
Subject: [PATCH 13/16] page_pool: remove PP_FLAG_PAGE_FRAG

PP_FLAG_PAGE_FRAG is not really needed after pp_frag_count
handling is unified and page_pool_alloc_frag() is supported
in 32-bit arch with 64-bit DMA, so remove it.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20231020095952.11055-3-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 09d96ee5674a0eaa800c664353756ecc45c4a87f)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I5d7223d1cb9baefea5c7e86f9a4de3e972c04a4a
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c                | 2 --
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c          | 3 +--
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c        | 2 +-
 drivers/net/wireless/mediatek/mt76/mac80211.c            | 2 +-
 include/net/page_pool/types.h                            | 6 ++----
 net/core/page_pool.c                                     | 3 +--
 net/core/skbuff.c                                        | 2 +-
 8 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index dac4f95..06b3789 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -3214,8 +3214,6 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
 	pp.dma_dir = bp->rx_dir;
 	pp.max_len = PAGE_SIZE;
 	pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
-	if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
-		pp.flags |= PP_FLAG_PAGE_FRAG;
 
 	rxr->page_pool = page_pool_create(&pp);
 	if (IS_ERR(rxr->page_pool)) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 677cfaa..b618797 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -4940,8 +4940,7 @@ static void hns3_put_ring_config(struct hns3_nic_priv *priv)
 static void hns3_alloc_page_pool(struct hns3_enet_ring *ring)
 {
 	struct page_pool_params pp_params = {
-		.flags = PP_FLAG_DMA_MAP | PP_FLAG_PAGE_FRAG |
-				PP_FLAG_DMA_SYNC_DEV,
+		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
 		.order = hns3_page_order(ring),
 		.pool_size = ring->desc_num * hns3_buf_size(ring) /
 				(PAGE_SIZE << hns3_page_order(ring)),
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index e6df4e6..02d0b70 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -1409,7 +1409,7 @@ int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 	}
 
 	pp_params.order = get_order(buf_size);
-	pp_params.flags = PP_FLAG_PAGE_FRAG | PP_FLAG_DMA_MAP;
+	pp_params.flags = PP_FLAG_DMA_MAP;
 	pp_params.pool_size = min(OTX2_PAGE_POOL_SZ, numptrs);
 	pp_params.nid = NUMA_NO_NODE;
 	pp_params.dev = pfvf->dev;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index c3961c2..8707691 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -834,7 +834,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
 		struct page_pool_params pp_params = { 0 };
 
 		pp_params.order     = 0;
-		pp_params.flags     = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | PP_FLAG_PAGE_FRAG;
+		pp_params.flags     = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV;
 		pp_params.pool_size = pool_size;
 		pp_params.nid       = node;
 		pp_params.dev       = rq->pdev;
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index dbab400..8d66825 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -566,7 +566,7 @@ int mt76_create_page_pool(struct mt76_dev *dev, struct mt76_queue *q)
 {
 	struct page_pool_params pp_params = {
 		.order = 0,
-		.flags = PP_FLAG_PAGE_FRAG,
+		.flags = 0,
 		.nid = NUMA_NO_NODE,
 		.dev = dev->dma_dev,
 	};
diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h
index 887e794..6fc5134 100644
--- a/include/net/page_pool/types.h
+++ b/include/net/page_pool/types.h
@@ -17,10 +17,8 @@
 					* Please note DMA-sync-for-CPU is still
 					* device driver responsibility
 					*/
-#define PP_FLAG_PAGE_FRAG	BIT(2) /* for page frag feature */
 #define PP_FLAG_ALL		(PP_FLAG_DMA_MAP |\
-				 PP_FLAG_DMA_SYNC_DEV |\
-				 PP_FLAG_PAGE_FRAG)
+				 PP_FLAG_DMA_SYNC_DEV)
 
 /*
  * Fast allocation side cache array/stack
@@ -45,7 +43,7 @@ struct pp_alloc_cache {
 
 /**
  * struct page_pool_params - page pool parameters
- * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV, PP_FLAG_PAGE_FRAG
+ * @flags:	PP_FLAG_DMA_MAP, PP_FLAG_DMA_SYNC_DEV
  * @order:	2^order pages on allocation
  * @pool_size:	size of the ptr_ring
  * @nid:	NUMA node id to allocate from pages from
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 59bad96..11ce15a 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -760,8 +760,7 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
 	unsigned int max_size = PAGE_SIZE << pool->p.order;
 	struct page *page = pool->frag_page;
 
-	if (WARN_ON(!(pool->p.flags & PP_FLAG_PAGE_FRAG) ||
-		    size > max_size))
+	if (WARN_ON(size > max_size))
 		return NULL;
 
 	size = ALIGN(size, dma_get_cache_alignment());
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9a9fbe1..8c1dd63 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -5752,7 +5752,7 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
 	/* In general, avoid mixing page_pool and non-page_pool allocated
 	 * pages within the same SKB. Additionally avoid dealing with clones
 	 * with page_pool pages, in case the SKB is using page_pool fragment
-	 * references (PP_FLAG_PAGE_FRAG). Since we only take full page
+	 * references (page_pool_alloc_frag()). Since we only take full page
 	 * references for cloned SKBs at the moment that would result in
 	 * inconsistent reference counts.
 	 * In theory we could take full references if @from is cloned and
-- 
2.7.4


From 564415c41bb7f7d7c8c76a1944432836959c799c Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 20 Oct 2023 17:59:50 +0800
Subject: [PATCH 14/16] page_pool: introduce page_pool_alloc() API

Currently page pool supports the below use cases:
use case 1: allocate page without page splitting using
            page_pool_alloc_pages() API if the driver knows
            that the memory it need is always bigger than
            half of the page allocated from page pool.
use case 2: allocate page frag with page splitting using
            page_pool_alloc_frag() API if the driver knows
            that the memory it need is always smaller than
            or equal to the half of the page allocated from
            page pool.

There is emerging use case [1] & [2] that is a mix of the
above two case: the driver doesn't know the size of memory it
need beforehand, so the driver may use something like below to
allocate memory with least memory utilization and performance
penalty:

if (size << 1 > max_size)
	page = page_pool_alloc_pages();
else
	page = page_pool_alloc_frag();

To avoid the driver doing something like above, add the
page_pool_alloc() API to support the above use case, and update
the true size of memory that is acctually allocated by updating
'*size' back to the driver in order to avoid exacerbating
truesize underestimate problem.

Rename page_pool_free() which is used in the destroy process to
__page_pool_destroy() to avoid confusion with the newly added
API.

1. https://lore.kernel.org/all/d3ae6bd3537fbce379382ac6a42f67e22f27ece2.1683896626.git.lorenzo@kernel.org/
2. https://lore.kernel.org/all/20230526054621.18371-3-liangchen.linux@gmail.com/

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20231020095952.11055-4-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit de97502e16fc406a74edee8359612e518986cf59)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I56b80851e1e18980521292771e13177698fee12e
---
 include/net/page_pool/helpers.h | 66 +++++++++++++++++++++++++++++++++++++++++
 net/core/page_pool.c            |  4 +--
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 759489c..1b76e05 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -82,6 +82,66 @@ static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 	return page_pool_alloc_frag(pool, offset, size, gfp);
 }
 
+static inline struct page *page_pool_alloc(struct page_pool *pool,
+					   unsigned int *offset,
+					   unsigned int *size, gfp_t gfp)
+{
+	unsigned int max_size = PAGE_SIZE << pool->p.order;
+	struct page *page;
+
+	if ((*size << 1) > max_size) {
+		*size = max_size;
+		*offset = 0;
+		return page_pool_alloc_pages(pool, gfp);
+	}
+
+	page = page_pool_alloc_frag(pool, offset, *size, gfp);
+	if (unlikely(!page))
+		return NULL;
+
+	/* There is very likely not enough space for another fragment, so append
+	 * the remaining size to the current fragment to avoid truesize
+	 * underestimate problem.
+	 */
+	if (pool->frag_offset + *size > max_size) {
+		*size = max_size - *offset;
+		pool->frag_offset = max_size;
+	}
+
+	return page;
+}
+
+static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
+					       unsigned int *offset,
+					       unsigned int *size)
+{
+	gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+	return page_pool_alloc(pool, offset, size, gfp);
+}
+
+static inline void *page_pool_alloc_va(struct page_pool *pool,
+				       unsigned int *size, gfp_t gfp)
+{
+	unsigned int offset;
+	struct page *page;
+
+	/* Mask off __GFP_HIGHMEM to ensure we can use page_address() */
+	page = page_pool_alloc(pool, &offset, size, gfp & ~__GFP_HIGHMEM);
+	if (unlikely(!page))
+		return NULL;
+
+	return page_address(page) + offset;
+}
+
+static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
+					   unsigned int *size)
+{
+	gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN);
+
+	return page_pool_alloc_va(pool, size, gfp);
+}
+
 /**
  * page_pool_get_dma_dir() - Retrieve the stored DMA direction.
  * @pool:	pool from which page was allocated
@@ -221,6 +281,12 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA	\
 		(sizeof(dma_addr_t) > sizeof(unsigned long))
 
+static inline void page_pool_free_va(struct page_pool *pool, void *va,
+				     bool allow_direct)
+{
+	page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
+}
+
 /**
  * page_pool_get_dma_addr() - Retrieve the stored DMA address.
  * @page:	page allocated from a page pool
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 11ce15a..dec5443 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -813,7 +813,7 @@ static void page_pool_empty_ring(struct page_pool *pool)
 	}
 }
 
-static void page_pool_free(struct page_pool *pool)
+static void __page_pool_destroy(struct page_pool *pool)
 {
 	if (pool->disconnect)
 		pool->disconnect(pool);
@@ -864,7 +864,7 @@ static int page_pool_release(struct page_pool *pool)
 	page_pool_scrub(pool);
 	inflight = page_pool_inflight(pool);
 	if (!inflight)
-		page_pool_free(pool);
+		__page_pool_destroy(pool);
 
 	return inflight;
 }
-- 
2.7.4


From f3a5bc15db5d814d64b91c619ba320cf15eb59a1 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 20 Oct 2023 17:59:51 +0800
Subject: [PATCH 15/16] page_pool: update document about fragment API

As more drivers begin to use the fragment API, update the
document about how to decide which API to use for the
driver author.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Alexander Lobakin <aleksander.lobakin@intel.com>
CC: Dima Tisnek <dimaqq@gmail.com>
Link: https://lore.kernel.org/r/20231020095952.11055-5-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 8ab32fa1c7947f4807b1d98af2d411a2587bb841)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I03e5460b08e7607bd85a44b37930d770a0c56c9d
---
 Documentation/networking/page_pool.rst |  4 +-
 include/net/page_pool/helpers.h        | 93 +++++++++++++++++++++++++++++-----
 2 files changed, 83 insertions(+), 14 deletions(-)

diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
index 215ebc9..60993cb 100644
--- a/Documentation/networking/page_pool.rst
+++ b/Documentation/networking/page_pool.rst
@@ -58,7 +58,9 @@ a page will cause no race conditions is enough.
 
 .. kernel-doc:: include/net/page_pool/helpers.h
    :identifiers: page_pool_put_page page_pool_put_full_page
-		 page_pool_recycle_direct page_pool_dev_alloc_pages
+		 page_pool_recycle_direct page_pool_free_va
+		 page_pool_dev_alloc_pages page_pool_dev_alloc_frag
+		 page_pool_dev_alloc page_pool_dev_alloc_va
 		 page_pool_get_dma_addr page_pool_get_dma_dir
 
 .. kernel-doc:: net/core/page_pool.c
diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
index 1b76e05..4ebd544 100644
--- a/include/net/page_pool/helpers.h
+++ b/include/net/page_pool/helpers.h
@@ -8,23 +8,46 @@
 /**
  * DOC: page_pool allocator
  *
- * The page_pool allocator is optimized for the XDP mode that
- * uses one frame per-page, but it can fallback on the
- * regular page allocator APIs.
+ * The page_pool allocator is optimized for recycling page or page fragment used
+ * by skb packet and xdp frame.
  *
- * Basic use involves replacing alloc_pages() calls with the
- * page_pool_alloc_pages() call.  Drivers should use
- * page_pool_dev_alloc_pages() replacing dev_alloc_pages().
+ * Basic use involves replacing and alloc_pages() calls with page_pool_alloc(),
+ * which allocate memory with or without page splitting depending on the
+ * requested memory size.
  *
- * The API keeps track of in-flight pages, in order to let API users know
- * when it is safe to free a page_pool object.  Thus, API users
- * must call page_pool_put_page() to free the page, or attach
- * the page to a page_pool-aware object like skbs marked with
+ * If the driver knows that it always requires full pages or its allocations are
+ * always smaller than half a page, it can use one of the more specific API
+ * calls:
+ *
+ * 1. page_pool_alloc_pages(): allocate memory without page splitting when
+ * driver knows that the memory it need is always bigger than half of the page
+ * allocated from page pool. There is no cache line dirtying for 'struct page'
+ * when a page is recycled back to the page pool.
+ *
+ * 2. page_pool_alloc_frag(): allocate memory with page splitting when driver
+ * knows that the memory it need is always smaller than or equal to half of the
+ * page allocated from page pool. Page splitting enables memory saving and thus
+ * avoids TLB/cache miss for data access, but there also is some cost to
+ * implement page splitting, mainly some cache line dirtying/bouncing for
+ * 'struct page' and atomic operation for page->pp_frag_count.
+ *
+ * The API keeps track of in-flight pages, in order to let API users know when
+ * it is safe to free a page_pool object, the API users must call
+ * page_pool_put_page() or page_pool_free_va() to free the page_pool object, or
+ * attach the page_pool object to a page_pool-aware object like skbs marked with
  * skb_mark_for_recycle().
  *
- * API users must call page_pool_put_page() once on a page, as it
- * will either recycle the page, or in case of refcnt > 1, it will
- * release the DMA mapping and in-flight state accounting.
+ * page_pool_put_page() may be called multi times on the same page if a page is
+ * split into multi fragments. For the last fragment, it will either recycle the
+ * page, or in case of page->_refcount > 1, it will release the DMA mapping and
+ * in-flight state accounting.
+ *
+ * dma_sync_single_range_for_device() is only called for the last fragment when
+ * page_pool is created with PP_FLAG_DMA_SYNC_DEV flag, so it depends on the
+ * last freed fragment to do the sync_for_device operation for all fragments in
+ * the same page when a page is split, the API user must setup pool->p.max_len
+ * and pool->p.offset correctly and ensure that page_pool_put_page() is called
+ * with dma_sync_size being -1 for fragment API.
  */
 #ifndef _NET_PAGE_POOL_HELPERS_H
 #define _NET_PAGE_POOL_HELPERS_H
@@ -73,6 +96,17 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool)
 	return page_pool_alloc_pages(pool, gfp);
 }
 
+/**
+ * page_pool_dev_alloc_frag() - allocate a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: requested size
+ *
+ * Get a page fragment from the page allocator or page_pool caches.
+ *
+ * Return:
+ * Return allocated page fragment, otherwise return NULL.
+ */
 static inline struct page *page_pool_dev_alloc_frag(struct page_pool *pool,
 						    unsigned int *offset,
 						    unsigned int size)
@@ -111,6 +145,19 @@ static inline struct page *page_pool_alloc(struct page_pool *pool,
 	return page;
 }
 
+/**
+ * page_pool_dev_alloc() - allocate a page or a page fragment.
+ * @pool: pool from which to allocate
+ * @offset: offset to the allocated page
+ * @size: in as the requested size, out as the allocated size
+ *
+ * Get a page or a page fragment from the page allocator or page_pool caches
+ * depending on the requested size in order to allocate memory with least memory
+ * utilization and performance penalty.
+ *
+ * Return:
+ * Return allocated page or page fragment, otherwise return NULL.
+ */
 static inline struct page *page_pool_dev_alloc(struct page_pool *pool,
 					       unsigned int *offset,
 					       unsigned int *size)
@@ -134,6 +181,18 @@ static inline void *page_pool_alloc_va(struct page_pool *pool,
 	return page_address(page) + offset;
 }
 
+/**
+ * page_pool_dev_alloc_va() - allocate a page or a page fragment and return its
+ *			      va.
+ * @pool: pool from which to allocate
+ * @size: in as the requested size, out as the allocated size
+ *
+ * This is just a thin wrapper around the page_pool_alloc() API, and
+ * it returns va of the allocated page or page fragment.
+ *
+ * Return:
+ * Return the va for the allocated page or page fragment, otherwise return NULL.
+ */
 static inline void *page_pool_dev_alloc_va(struct page_pool *pool,
 					   unsigned int *size)
 {
@@ -281,6 +340,14 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
 #define PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA	\
 		(sizeof(dma_addr_t) > sizeof(unsigned long))
 
+/**
+ * page_pool_free_va() - free a va into the page_pool
+ * @pool: pool from which va was allocated
+ * @va: va to be freed
+ * @allow_direct: freed by the consumer, allow lockless caching
+ *
+ * Free a va allocated from page_pool_allo_va().
+ */
 static inline void page_pool_free_va(struct page_pool *pool, void *va,
 				     bool allow_direct)
 {
-- 
2.7.4


From 9d5e58f624b23fde16c0635269a64babc5e0dfe7 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 20 Oct 2023 17:59:52 +0800
Subject: [PATCH 16/16] net: veth: use newly added page pool API for veth with
 xdp

Use page_pool_alloc() API to allocate memory with least
memory utilization and performance penalty.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Lorenzo Bianconi <lorenzo@kernel.org>
CC: Alexander Duyck <alexander.duyck@gmail.com>
CC: Liang Chen <liangchen.linux@gmail.com>
CC: Alexander Lobakin <aleksander.lobakin@intel.com>
Link: https://lore.kernel.org/r/20231020095952.11055-6-linyunsheng@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
(cherry picked from commit 2d0de67da51a90c6acf7bf08d7b0501f45408002)
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>

Change-Id: I7fd0ea80d818122ed66642c98bbd33705c348e46
---
 drivers/net/veth.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 0f798bc..57efb34 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -729,10 +729,11 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 	if (skb_shared(skb) || skb_head_is_locked(skb) ||
 	    skb_shinfo(skb)->nr_frags ||
 	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
-		u32 size, len, max_head_size, off;
+		u32 size, len, max_head_size, off, truesize, page_offset;
 		struct sk_buff *nskb;
 		struct page *page;
 		int i, head_off;
+		void *va;
 
 		/* We need a private copy of the skb and data buffers since
 		 * the ebpf program can modify it. We segment the original skb
@@ -745,14 +746,17 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		if (skb->len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size)
 			goto drop;
 
+		size = min_t(u32, skb->len, max_head_size);
+		truesize = SKB_HEAD_ALIGN(size) + VETH_XDP_HEADROOM;
+
 		/* Allocate skb head */
-		page = page_pool_dev_alloc_pages(rq->page_pool);
-		if (!page)
+		va = page_pool_dev_alloc_va(rq->page_pool, &truesize);
+		if (!va)
 			goto drop;
 
-		nskb = napi_build_skb(page_address(page), PAGE_SIZE);
+		nskb = napi_build_skb(va, truesize);
 		if (!nskb) {
-			page_pool_put_full_page(rq->page_pool, page, true);
+			page_pool_free_va(rq->page_pool, va, true);
 			goto drop;
 		}
 
@@ -760,7 +764,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		skb_copy_header(nskb, skb);
 		skb_mark_for_recycle(nskb);
 
-		size = min_t(u32, skb->len, max_head_size);
 		if (skb_copy_bits(skb, 0, nskb->data, size)) {
 			consume_skb(nskb);
 			goto drop;
@@ -775,14 +778,18 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 		len = skb->len - off;
 
 		for (i = 0; i < MAX_SKB_FRAGS && off < skb->len; i++) {
-			page = page_pool_dev_alloc_pages(rq->page_pool);
+			size = min_t(u32, len, PAGE_SIZE);
+			truesize = size;
+
+			page = page_pool_dev_alloc(rq->page_pool, &page_offset,
+						   &truesize);
 			if (!page) {
 				consume_skb(nskb);
 				goto drop;
 			}
 
-			size = min_t(u32, len, PAGE_SIZE);
-			skb_add_rx_frag(nskb, i, page, 0, size, PAGE_SIZE);
+			skb_add_rx_frag(nskb, i, page, page_offset, size,
+					truesize);
 			if (skb_copy_bits(skb, off, page_address(page),
 					  size)) {
 				consume_skb(nskb);
-- 
2.7.4