From 30f158740984f9949765f6112456d62d2ca6deba Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 18 Nov 2022 14:27:29 -0800
Subject: [PATCH] ice: fix handling of burst Tx timestamps

Commit 1229b33973c7 ("ice: Add low latency Tx timestamp read") refactored
PTP timestamping logic to use a threaded IRQ instead of a separate kthread.

This implementation introduced ice_misc_intr_thread_fn and redefined the
ice_ptp_process_ts function interface to return a value of whether or not
the timestamp processing was complete.

ice_misc_intr_thread_fn would take the return value from ice_ptp_process_ts
and convert it into either IRQ_HANDLED if there were no more timestamps to
be processed, or IRQ_WAKE_THREAD if the thread should continue processing.

This is not correct, as the kernel does not re-schedule threaded IRQ
functions automatically. IRQ_WAKE_THREAD can only be used by the main IRQ
function.

This results in the ice_ptp_process_ts function (and in turn the
ice_ptp_tx_tstamp function) from only being called exactly once per
interrupt.

If an application sends a burst of Tx timestamps without waiting for a
response, the interrupt will trigger for the first timestamp. However,
later timestamps may not have arrived yet. This can result in dropped or
discarded timestamps. Worse, on E822 hardware this results in the interrupt
logic getting stuck such that no future interrupts will be triggered. The
result is complete loss of Tx timestamp functionality.

Fix this by modifying the ice_misc_intr_thread_fn to perform its own
polling of the ice_ptp_process_ts function. We sleep for a few microseconds
between attempts to avoid wasting significant CPU time. The value was
chosen to allow time for the Tx timestamps to complete without wasting so
much time that we overrun application wait budgets in the worst case.

The ice_ptp_process_ts function also currently returns false in the event
that the Tx tracker is not initialized. This would result in the threaded
IRQ handler never exiting if it gets started while the tracker is not
initialized.

Fix the function to appropriately return true when the tracker is not
initialized.

Note that this will not reproduce with default ptp4l behavior, as the
program always synchronously waits for a timestamp response before sending
another timestamp request.

Reported-by: Siddaraju DH <siddaraju.dh@intel.com>
Fixes: 1229b33973c7 ("ice: Add low latency Tx timestamp read")
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Gurucharan G <gurucharanx.g@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20221118222729.1565317-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 12 ++++++------
 drivers/net/ethernet/intel/ice/ice_ptp.c  | 20 ++++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 0f67187..ca28984 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3145,15 +3145,15 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
  */
 static irqreturn_t ice_misc_intr_thread_fn(int __always_unused irq, void *data)
 {
-	irqreturn_t ret = IRQ_HANDLED;
 	struct ice_pf *pf = data;
-	bool irq_handled;
 
-	irq_handled = ice_ptp_process_ts(pf);
-	if (!irq_handled)
-		ret = IRQ_WAKE_THREAD;
+	if (ice_is_reset_in_progress(pf->state))
+		return IRQ_HANDLED;
 
-	return ret;
+	while (!ice_ptp_process_ts(pf))
+		usleep_range(50, 100);
+
+	return IRQ_HANDLED;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index 011b727..0f66846 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -614,11 +614,14 @@ static u64 ice_ptp_extend_40b_ts(struct ice_pf *pf, u64 in_tstamp)
  * 2) extend the 40b timestamp value to get a 64bit timestamp
  * 3) send that timestamp to the stack
  *
- * After looping, if we still have waiting SKBs, return true. This may cause us
- * effectively poll even when not strictly necessary. We do this because it's
- * possible a new timestamp was requested around the same time as the interrupt.
- * In some cases hardware might not interrupt us again when the timestamp is
- * captured.
+ * Returns true if all timestamps were handled, and false if any slots remain
+ * without a timestamp.
+ *
+ * After looping, if we still have waiting SKBs, return false. This may cause
+ * us effectively poll even when not strictly necessary. We do this because
+ * it's possible a new timestamp was requested around the same time as the
+ * interrupt. In some cases hardware might not interrupt us again when the
+ * timestamp is captured.
  *
  * Note that we only take the tracking lock when clearing the bit and when
  * checking if we need to re-queue this task. The only place where bits can be
@@ -641,7 +644,7 @@ static bool ice_ptp_tx_tstamp(struct ice_ptp_tx *tx)
 	u8 idx;
 
 	if (!tx->init)
-		return false;
+		return true;
 
 	ptp_port = container_of(tx, struct ice_ptp_port, tx);
 	pf = ptp_port_to_pf(ptp_port);
@@ -2381,10 +2384,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
  */
 bool ice_ptp_process_ts(struct ice_pf *pf)
 {
-	if (pf->ptp.port.tx.init)
-		return ice_ptp_tx_tstamp(&pf->ptp.port.tx);
-
-	return false;
+	return ice_ptp_tx_tstamp(&pf->ptp.port.tx);
 }
 
 static void ice_ptp_periodic_work(struct kthread_work *work)
-- 
2.7.4