hwmon (occ): Retry for checksum failure
authorEddie James <eajames@linux.ibm.com>
Tue, 26 Apr 2022 15:49:56 +0000 (10:49 -0500)
committerJoel Stanley <joel@jms.id.au>
Wed, 28 Sep 2022 11:40:57 +0000 (21:10 +0930)
Due to the OCC communication design with a shared SRAM area,
checkum errors are expected due to corrupted buffer from OCC
communications with other system components. Therefore, retry
the command twice in the event of a checksum failure.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20220426154956.27205-3-eajames@linux.ibm.com
Signed-off-by: Joel Stanley <joel@jms.id.au>
drivers/hwmon/occ/p9_sbe.c

index c1e0a1d..f3791a5 100644 (file)
@@ -14,6 +14,8 @@
 
 #include "common.h"
 
+#define OCC_CHECKSUM_RETRIES   3
+
 struct p9_sbe_occ {
        struct occ occ;
        bool sbe_error;
@@ -80,18 +82,23 @@ done:
 static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len,
                               void *resp, size_t resp_len)
 {
+       size_t original_resp_len = resp_len;
        struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
-       int rc;
+       int rc, i;
 
-       rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
-       if (rc < 0) {
+       for (i = 0; i < OCC_CHECKSUM_RETRIES; ++i) {
+               rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
+               if (rc >= 0)
+                       break;
                if (resp_len) {
                        if (p9_sbe_occ_save_ffdc(ctx, resp, resp_len))
                                sysfs_notify(&occ->bus_dev->kobj, NULL,
                                             bin_attr_ffdc.attr.name);
+                       return rc;
                }
-
-               return rc;
+               if (rc != -EBADE)
+                       return rc;
+               resp_len = original_resp_len;
        }
 
        switch (((struct occ_response *)resp)->return_status) {