scsi: mpt3sas: Sync time periodically between driver and firmware
authorSuganath Prabu S <suganath-prabu.subramani@broadcom.com>
Thu, 26 Nov 2020 09:43:04 +0000 (15:13 +0530)
committerMartin K. Petersen <martin.petersen@oracle.com>
Wed, 9 Dec 2020 16:34:18 +0000 (11:34 -0500)
The controller time currently gets updated with host time during driver
load or when a controller reset is issued. I.e. when host issues the
IOCInit request message to the HBA firmware. This IOCInit message has a
field named 'TimeStamp' with which the host updates the controller time.

Sometimes controller time drifts with respect to the host and it is
difficult to correlate host logs with controller logs. Issuing a controller
reset to sync the time would impact in-flight I/O and is not a viable
option.

Instead the driver now sends an IO_UNIT_CONTROL Request to sync the time
periodically. This is done from the watchdog thread which gets invoked
every second.

The time synchronization interval is specified in the 'TimeSyncInterval'
field in Manufacturing Page11 by the controller:

    TimeSyncInterval - 8 bits
bits  0-6: Time stamp Synchronization interval value
bit 7: Time stamp Synchronization interval unit,
   (if this bit is one then Timestamp Synchronization
   interval value is specified in terms of hours else
   Timestamp Synchronization interval value is
   specified in terms of minutes).

The driver keeps track of the timer using IOC's timestamp_update_count
field. This field value gets incremented whenever the watchdog thread gets
invoked. And whenever this field value is greater than or equal to the Time
Stamp Synchronization interval value, the driver sends the IO_UNIT_CONTROL
Request message to controller to update the time and then it resets the
timestamp_update_count field to zero.

Link: https://lore.kernel.org/r/20201126094311.8686-2-suganath-prabu.subramani@broadcom.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Suganath Prabu S <suganath-prabu.subramani@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h

index 93230cd..8538c2d 100644 (file)
@@ -597,6 +597,71 @@ static int mpt3sas_remove_dead_ioc_func(void *arg)
 }
 
 /**
+ * _base_sync_drv_fw_timestamp - Sync Drive-Fw TimeStamp.
+ * @ioc: Per Adapter Object
+ *
+ * Return nothing.
+ */
+static void _base_sync_drv_fw_timestamp(struct MPT3SAS_ADAPTER *ioc)
+{
+       Mpi26IoUnitControlRequest_t *mpi_request;
+       Mpi26IoUnitControlReply_t *mpi_reply;
+       u16 smid;
+       ktime_t current_time;
+       u64 TimeStamp = 0;
+       u8 issue_reset = 0;
+
+       mutex_lock(&ioc->scsih_cmds.mutex);
+       if (ioc->scsih_cmds.status != MPT3_CMD_NOT_USED) {
+               ioc_err(ioc, "scsih_cmd in use %s\n", __func__);
+               goto out;
+       }
+       ioc->scsih_cmds.status = MPT3_CMD_PENDING;
+       smid = mpt3sas_base_get_smid(ioc, ioc->scsih_cb_idx);
+       if (!smid) {
+               ioc_err(ioc, "Failed obtaining a smid %s\n", __func__);
+               ioc->scsih_cmds.status = MPT3_CMD_NOT_USED;
+               goto out;
+       }
+       mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
+       ioc->scsih_cmds.smid = smid;
+       memset(mpi_request, 0, sizeof(Mpi26IoUnitControlRequest_t));
+       mpi_request->Function = MPI2_FUNCTION_IO_UNIT_CONTROL;
+       mpi_request->Operation = MPI26_CTRL_OP_SET_IOC_PARAMETER;
+       mpi_request->IOCParameter = MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP;
+       current_time = ktime_get_real();
+       TimeStamp = ktime_to_ms(current_time);
+       mpi_request->Reserved7 = cpu_to_le32(TimeStamp & 0xFFFFFFFF);
+       mpi_request->IOCParameterValue = cpu_to_le32(TimeStamp >> 32);
+       init_completion(&ioc->scsih_cmds.done);
+       ioc->put_smid_default(ioc, smid);
+       dinitprintk(ioc, ioc_info(ioc,
+           "Io Unit Control Sync TimeStamp (sending), @time %lld ms\n",
+           TimeStamp));
+       wait_for_completion_timeout(&ioc->scsih_cmds.done,
+               MPT3SAS_TIMESYNC_TIMEOUT_SECONDS*HZ);
+       if (!(ioc->scsih_cmds.status & MPT3_CMD_COMPLETE)) {
+               mpt3sas_check_cmd_timeout(ioc,
+                   ioc->scsih_cmds.status, mpi_request,
+                   sizeof(Mpi2SasIoUnitControlRequest_t)/4, issue_reset);
+               goto issue_host_reset;
+       }
+       if (ioc->scsih_cmds.status & MPT3_CMD_REPLY_VALID) {
+               mpi_reply = ioc->scsih_cmds.reply;
+               dinitprintk(ioc, ioc_info(ioc,
+                   "Io Unit Control sync timestamp (complete): ioc_status(0x%04x), loginfo(0x%08x)\n",
+                   le16_to_cpu(mpi_reply->IOCStatus),
+                   le32_to_cpu(mpi_reply->IOCLogInfo)));
+       }
+issue_host_reset:
+       if (issue_reset)
+               mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
+       ioc->scsih_cmds.status = MPT3_CMD_NOT_USED;
+out:
+       mutex_unlock(&ioc->scsih_cmds.mutex);
+}
+
+/**
  * _base_fault_reset_work - workq handling ioc fault conditions
  * @work: input argument, used to derive ioc
  *
@@ -720,7 +785,11 @@ _base_fault_reset_work(struct work_struct *work)
                        return; /* don't rearm timer */
        }
        ioc->ioc_coredump_loop = 0;
-
+       if (ioc->time_sync_interval &&
+           ++ioc->timestamp_update_count >= ioc->time_sync_interval) {
+               ioc->timestamp_update_count = 0;
+               _base_sync_drv_fw_timestamp(ioc);
+       }
        spin_lock_irqsave(&ioc->ioc_reset_in_progress_lock, flags);
  rearm_timer:
        if (ioc->fault_reset_work_q)
@@ -744,6 +813,7 @@ mpt3sas_base_start_watchdog(struct MPT3SAS_ADAPTER *ioc)
        if (ioc->fault_reset_work_q)
                return;
 
+       ioc->timestamp_update_count = 0;
        /* initialize fault polling */
 
        INIT_DELAYED_WORK(&ioc->fault_reset_work, _base_fault_reset_work);
@@ -4754,7 +4824,24 @@ _base_static_config_pages(struct MPT3SAS_ADAPTER *ioc)
                else
                        ioc->nvme_abort_timeout = ioc->manu_pg11.NVMeAbortTO;
        }
-
+       ioc->time_sync_interval =
+           ioc->manu_pg11.TimeSyncInterval & MPT3SAS_TIMESYNC_MASK;
+       if (ioc->time_sync_interval) {
+               if (ioc->manu_pg11.TimeSyncInterval & MPT3SAS_TIMESYNC_UNIT_MASK)
+                       ioc->time_sync_interval =
+                           ioc->time_sync_interval * SECONDS_PER_HOUR;
+               else
+                       ioc->time_sync_interval =
+                           ioc->time_sync_interval * SECONDS_PER_MIN;
+               dinitprintk(ioc, ioc_info(ioc,
+                   "Driver-FW TimeSync interval is %d seconds. ManuPg11 TimeSync Unit is in %s\n",
+                   ioc->time_sync_interval, (ioc->manu_pg11.TimeSyncInterval &
+                   MPT3SAS_TIMESYNC_UNIT_MASK) ? "Hour" : "Minute"));
+       } else {
+               if (ioc->is_gen35_ioc)
+                       ioc_warn(ioc,
+                           "TimeSync Interval in Manuf page-11 is not enabled. Periodic Time-Sync will be disabled\n");
+       }
        mpt3sas_config_get_bios_pg2(ioc, &mpi_reply, &ioc->bios_pg2);
        mpt3sas_config_get_bios_pg3(ioc, &mpi_reply, &ioc->bios_pg3);
        mpt3sas_config_get_ioc_pg8(ioc, &mpi_reply, &ioc->ioc_pg8);
@@ -6466,6 +6553,8 @@ _base_send_ioc_init(struct MPT3SAS_ADAPTER *ioc)
                r = -EIO;
        }
 
+       /* Reset TimeSync Counter*/
+       ioc->timestamp_update_count = 0;
        return r;
 }
 
index 7dab579..cc4815c 100644 (file)
 /* CoreDump: Default timeout */
 #define MPT3SAS_DEFAULT_COREDUMP_TIMEOUT_SECONDS       (15) /*15 seconds*/
 #define MPT3SAS_COREDUMP_LOOP_DONE                     (0xFF)
+#define MPT3SAS_TIMESYNC_TIMEOUT_SECONDS               (10) /* 10 seconds */
+#define MPT3SAS_TIMESYNC_UPDATE_INTERVAL               (900) /* 15 minutes */
+#define MPT3SAS_TIMESYNC_UNIT_MASK                     (0x80) /* bit 7 */
+#define MPT3SAS_TIMESYNC_MASK                          (0x7F) /* 0 - 6 bits */
+#define SECONDS_PER_MIN                                        (60)
+#define SECONDS_PER_HOUR                               (3600)
+#define MPT3SAS_COREDUMP_LOOP_DONE                     (0xFF)
+#define MPI26_SET_IOC_PARAMETER_SYNC_TIMESTAMP         (0x81)
 
 /*
  * Set MPT3SAS_SG_DEPTH value based on user input.
@@ -405,7 +413,7 @@ struct Mpi2ManufacturingPage11_t {
        u16     HostTraceBufferMaxSizeKB;       /* 50h */
        u16     HostTraceBufferMinSizeKB;       /* 52h */
        u8      CoreDumpTOSec;                  /* 54h */
-       u8      Reserved8;                      /* 55h */
+       u8      TimeSyncInterval;               /* 55h */
        u16     Reserved9;                      /* 56h */
        __le32  Reserved10;                     /* 58h */
 };
@@ -1113,6 +1121,8 @@ typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
  * @cpu_msix_table_sz: table size
  * @total_io_cnt: Gives total IO count, used to load balance the interrupts
  * @ioc_coredump_loop: will have non-zero value when FW is in CoreDump state
+ * @timestamp_update_count: Counter to fire timeSync command
+ * time_sync_interval: Time sync interval read from man page 11
  * @high_iops_outstanding: used to load balance the interrupts
  *                             within high iops reply queues
  * @msix_load_balance: Enables load balancing of interrupts across
@@ -1308,6 +1318,8 @@ struct MPT3SAS_ADAPTER {
        MPT3SAS_FLUSH_RUNNING_CMDS schedule_dead_ioc_flush_running_cmds;
        u32             non_operational_loop;
        u8              ioc_coredump_loop;
+       u32             timestamp_update_count;
+       u32             time_sync_interval;
        atomic64_t      total_io_cnt;
        atomic64_t      high_iops_outstanding;
        bool            msix_load_balance;