net/mlx5: Avoid recovery in probe flows
[platform/kernel/linux-starfive.git] / drivers / net / ethernet / mellanox / mlx5 / core / health.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/kernel.h>
34 #include <linux/random.h>
35 #include <linux/vmalloc.h>
36 #include <linux/hardirq.h>
37 #include <linux/mlx5/driver.h>
38 #include <linux/kern_levels.h>
39 #include "mlx5_core.h"
40 #include "lib/eq.h"
41 #include "lib/mlx5.h"
42 #include "lib/pci_vsc.h"
43 #include "lib/tout.h"
44 #include "diag/fw_tracer.h"
45
46 enum {
47         MAX_MISSES                      = 3,
48 };
49
50 enum {
51         MLX5_HEALTH_SYNDR_FW_ERR                = 0x1,
52         MLX5_HEALTH_SYNDR_IRISC_ERR             = 0x7,
53         MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR  = 0x8,
54         MLX5_HEALTH_SYNDR_CRC_ERR               = 0x9,
55         MLX5_HEALTH_SYNDR_FETCH_PCI_ERR         = 0xa,
56         MLX5_HEALTH_SYNDR_HW_FTL_ERR            = 0xb,
57         MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR  = 0xc,
58         MLX5_HEALTH_SYNDR_EQ_ERR                = 0xd,
59         MLX5_HEALTH_SYNDR_EQ_INV                = 0xe,
60         MLX5_HEALTH_SYNDR_FFSER_ERR             = 0xf,
61         MLX5_HEALTH_SYNDR_HIGH_TEMP             = 0x10
62 };
63
64 enum {
65         MLX5_DROP_NEW_HEALTH_WORK,
66 };
67
68 enum  {
69         MLX5_SENSOR_NO_ERR              = 0,
70         MLX5_SENSOR_PCI_COMM_ERR        = 1,
71         MLX5_SENSOR_PCI_ERR             = 2,
72         MLX5_SENSOR_NIC_DISABLED        = 3,
73         MLX5_SENSOR_NIC_SW_RESET        = 4,
74         MLX5_SENSOR_FW_SYND_RFR         = 5,
75 };
76
77 enum {
78         MLX5_SEVERITY_MASK              = 0x7,
79         MLX5_SEVERITY_VALID_MASK        = 0x8,
80 };
81
82 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
83 {
84         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
85 }
86
87 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
88 {
89         u32 cur_cmdq_addr_l_sz;
90
91         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
92         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
93                     state << MLX5_NIC_IFC_OFFSET,
94                     &dev->iseg->cmdq_addr_l_sz);
95 }
96
97 static bool sensor_pci_not_working(struct mlx5_core_dev *dev)
98 {
99         struct mlx5_core_health *health = &dev->priv.health;
100         struct health_buffer __iomem *h = health->health;
101
102         /* Offline PCI reads return 0xffffffff */
103         return (ioread32be(&h->fw_ver) == 0xffffffff);
104 }
105
106 static int mlx5_health_get_rfr(u8 rfr_severity)
107 {
108         return rfr_severity >> MLX5_RFR_BIT_OFFSET;
109 }
110
111 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
112 {
113         struct mlx5_core_health *health = &dev->priv.health;
114         struct health_buffer __iomem *h = health->health;
115         u8 synd = ioread8(&h->synd);
116         u8 rfr;
117
118         rfr = mlx5_health_get_rfr(ioread8(&h->rfr_severity));
119
120         if (rfr && synd)
121                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
122         return rfr && synd;
123 }
124
125 u32 mlx5_health_check_fatal_sensors(struct mlx5_core_dev *dev)
126 {
127         if (sensor_pci_not_working(dev))
128                 return MLX5_SENSOR_PCI_COMM_ERR;
129         if (pci_channel_offline(dev->pdev))
130                 return MLX5_SENSOR_PCI_ERR;
131         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
132                 return MLX5_SENSOR_NIC_DISABLED;
133         if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET)
134                 return MLX5_SENSOR_NIC_SW_RESET;
135         if (sensor_fw_synd_rfr(dev))
136                 return MLX5_SENSOR_FW_SYND_RFR;
137
138         return MLX5_SENSOR_NO_ERR;
139 }
140
141 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, bool lock)
142 {
143         enum mlx5_vsc_state state;
144         int ret;
145
146         if (!mlx5_core_is_pf(dev))
147                 return -EBUSY;
148
149         /* Try to lock GW access, this stage doesn't return
150          * EBUSY because locked GW does not mean that other PF
151          * already started the reset.
152          */
153         ret = mlx5_vsc_gw_lock(dev);
154         if (ret == -EBUSY)
155                 return -EINVAL;
156         if (ret)
157                 return ret;
158
159         state = lock ? MLX5_VSC_LOCK : MLX5_VSC_UNLOCK;
160         /* At this stage, if the return status == EBUSY, then we know
161          * for sure that another PF started the reset, so don't allow
162          * another reset.
163          */
164         ret = mlx5_vsc_sem_set_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
165         if (ret)
166                 mlx5_core_warn(dev, "Failed to lock SW reset semaphore\n");
167
168         /* Unlock GW access */
169         mlx5_vsc_gw_unlock(dev);
170
171         return ret;
172 }
173
174 static bool reset_fw_if_needed(struct mlx5_core_dev *dev)
175 {
176         bool supported = (ioread32be(&dev->iseg->initializing) >>
177                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
178         u32 fatal_error;
179
180         if (!supported)
181                 return false;
182
183         /* The reset only needs to be issued by one PF. The health buffer is
184          * shared between all functions, and will be cleared during a reset.
185          * Check again to avoid a redundant 2nd reset. If the fatal errors was
186          * PCI related a reset won't help.
187          */
188         fatal_error = mlx5_health_check_fatal_sensors(dev);
189         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
190             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
191             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
192                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.");
193                 return false;
194         }
195
196         mlx5_core_warn(dev, "Issuing FW Reset\n");
197         /* Write the NIC interface field to initiate the reset, the command
198          * interface address also resides here, don't overwrite it.
199          */
200         mlx5_set_nic_state(dev, MLX5_NIC_IFC_SW_RESET);
201
202         return true;
203 }
204
205 static void enter_error_state(struct mlx5_core_dev *dev, bool force)
206 {
207         if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */
208                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
209                 mlx5_cmd_flush(dev);
210         }
211
212         mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1);
213 }
214
215 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
216 {
217         bool err_detected = false;
218
219         /* Mark the device as fatal in order to abort FW commands */
220         if ((mlx5_health_check_fatal_sensors(dev) || force) &&
221             dev->state == MLX5_DEVICE_STATE_UP) {
222                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
223                 err_detected = true;
224         }
225         mutex_lock(&dev->intf_state_mutex);
226         if (!err_detected && dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
227                 goto unlock;/* a previous error is still being handled */
228
229         enter_error_state(dev, force);
230 unlock:
231         mutex_unlock(&dev->intf_state_mutex);
232 }
233
234 void mlx5_error_sw_reset(struct mlx5_core_dev *dev)
235 {
236         unsigned long end, delay_ms = mlx5_tout_ms(dev, PCI_TOGGLE);
237         int lock = -EBUSY;
238
239         mutex_lock(&dev->intf_state_mutex);
240         if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
241                 goto unlock;
242
243         mlx5_core_err(dev, "start\n");
244
245         if (mlx5_health_check_fatal_sensors(dev) == MLX5_SENSOR_FW_SYND_RFR) {
246                 /* Get cr-dump and reset FW semaphore */
247                 lock = lock_sem_sw_reset(dev, true);
248
249                 if (lock == -EBUSY) {
250                         delay_ms = mlx5_tout_ms(dev, FULL_CRDUMP);
251                         goto recover_from_sw_reset;
252                 }
253                 /* Execute SW reset */
254                 reset_fw_if_needed(dev);
255         }
256
257 recover_from_sw_reset:
258         /* Recover from SW reset */
259         end = jiffies + msecs_to_jiffies(delay_ms);
260         do {
261                 if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED)
262                         break;
263
264                 msleep(20);
265         } while (!time_after(jiffies, end));
266
267         if (mlx5_get_nic_state(dev) != MLX5_NIC_IFC_DISABLED) {
268                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
269                         mlx5_get_nic_state(dev), delay_ms);
270         }
271
272         /* Release FW semaphore if you are the lock owner */
273         if (!lock)
274                 lock_sem_sw_reset(dev, false);
275
276         mlx5_core_err(dev, "end\n");
277
278 unlock:
279         mutex_unlock(&dev->intf_state_mutex);
280 }
281
282 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
283 {
284         u8 nic_interface = mlx5_get_nic_state(dev);
285
286         switch (nic_interface) {
287         case MLX5_NIC_IFC_FULL:
288                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is full driver\n");
289                 break;
290
291         case MLX5_NIC_IFC_DISABLED:
292                 mlx5_core_warn(dev, "starting teardown\n");
293                 break;
294
295         case MLX5_NIC_IFC_NO_DRAM_NIC:
296                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is no dram nic\n");
297                 break;
298
299         case MLX5_NIC_IFC_SW_RESET:
300                 /* The IFC mode field is 3 bits, so it will read 0x7 in 2 cases:
301                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
302                  *    and this is a VF), this is not recoverable by SW reset.
303                  *    Logging of this is handled elsewhere.
304                  * 2. FW reset has been issued by another function, driver can
305                  *    be reloaded to recover after the mode switches to
306                  *    MLX5_NIC_IFC_DISABLED.
307                  */
308                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
309                         mlx5_core_warn(dev, "NIC SW reset in progress\n");
310                 break;
311
312         default:
313                 mlx5_core_warn(dev, "Expected to see disabled NIC but it is has invalid value %d\n",
314                                nic_interface);
315         }
316
317         mlx5_disable_device(dev);
318 }
319
320 int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev)
321 {
322         unsigned long end;
323
324         end = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, FW_RESET));
325         while (sensor_pci_not_working(dev)) {
326                 if (time_after(jiffies, end))
327                         return -ETIMEDOUT;
328                 msleep(100);
329         }
330         return 0;
331 }
332
333 static int mlx5_health_try_recover(struct mlx5_core_dev *dev)
334 {
335         mlx5_core_warn(dev, "handling bad device here\n");
336         mlx5_handle_bad_state(dev);
337         if (mlx5_health_wait_pci_up(dev)) {
338                 mlx5_core_err(dev, "health recovery flow aborted, PCI reads still not working\n");
339                 return -EIO;
340         }
341         mlx5_core_err(dev, "starting health recovery flow\n");
342         if (mlx5_recover_device(dev) || mlx5_health_check_fatal_sensors(dev)) {
343                 mlx5_core_err(dev, "health recovery failed\n");
344                 return -EIO;
345         }
346
347         mlx5_core_info(dev, "health recovery succeeded\n");
348         return 0;
349 }
350
351 static const char *hsynd_str(u8 synd)
352 {
353         switch (synd) {
354         case MLX5_HEALTH_SYNDR_FW_ERR:
355                 return "firmware internal error";
356         case MLX5_HEALTH_SYNDR_IRISC_ERR:
357                 return "irisc not responding";
358         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
359                 return "unrecoverable hardware error";
360         case MLX5_HEALTH_SYNDR_CRC_ERR:
361                 return "firmware CRC error";
362         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
363                 return "ICM fetch PCI error";
364         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
365                 return "HW fatal error\n";
366         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
367                 return "async EQ buffer overrun";
368         case MLX5_HEALTH_SYNDR_EQ_ERR:
369                 return "EQ error";
370         case MLX5_HEALTH_SYNDR_EQ_INV:
371                 return "Invalid EQ referenced";
372         case MLX5_HEALTH_SYNDR_FFSER_ERR:
373                 return "FFSER error";
374         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
375                 return "High temperature";
376         default:
377                 return "unrecognized error";
378         }
379 }
380
381 static const char *mlx5_loglevel_str(int level)
382 {
383         switch (level) {
384         case LOGLEVEL_EMERG:
385                 return "EMERGENCY";
386         case LOGLEVEL_ALERT:
387                 return "ALERT";
388         case LOGLEVEL_CRIT:
389                 return "CRITICAL";
390         case LOGLEVEL_ERR:
391                 return "ERROR";
392         case LOGLEVEL_WARNING:
393                 return "WARNING";
394         case LOGLEVEL_NOTICE:
395                 return "NOTICE";
396         case LOGLEVEL_INFO:
397                 return "INFO";
398         case LOGLEVEL_DEBUG:
399                 return "DEBUG";
400         }
401         return "Unknown log level";
402 }
403
404 static int mlx5_health_get_severity(u8 rfr_severity)
405 {
406         return rfr_severity & MLX5_SEVERITY_VALID_MASK ?
407                rfr_severity & MLX5_SEVERITY_MASK : LOGLEVEL_ERR;
408 }
409
410 static void print_health_info(struct mlx5_core_dev *dev)
411 {
412         struct mlx5_core_health *health = &dev->priv.health;
413         struct health_buffer __iomem *h = health->health;
414         u8 rfr_severity;
415         int severity;
416         int i;
417
418         /* If the syndrome is 0, the device is OK and no need to print buffer */
419         if (!ioread8(&h->synd))
420                 return;
421
422         if (ioread32be(&h->fw_ver) == 0xFFFFFFFF) {
423                 mlx5_log(dev, LOGLEVEL_ERR, "PCI slot is unavailable\n");
424                 return;
425         }
426
427         rfr_severity = ioread8(&h->rfr_severity);
428         severity  = mlx5_health_get_severity(rfr_severity);
429         mlx5_log(dev, severity, "Health issue observed, %s, severity(%d) %s:\n",
430                  hsynd_str(ioread8(&h->synd)), severity, mlx5_loglevel_str(severity));
431
432         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
433                 mlx5_log(dev, severity, "assert_var[%d] 0x%08x\n", i,
434                          ioread32be(h->assert_var + i));
435
436         mlx5_log(dev, severity, "assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
437         mlx5_log(dev, severity, "assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
438         mlx5_log(dev, severity, "fw_ver %d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev),
439                  fw_rev_sub(dev));
440         mlx5_log(dev, severity, "time %u\n", ioread32be(&h->time));
441         mlx5_log(dev, severity, "hw_id 0x%08x\n", ioread32be(&h->hw_id));
442         mlx5_log(dev, severity, "rfr %d\n", mlx5_health_get_rfr(rfr_severity));
443         mlx5_log(dev, severity, "severity %d (%s)\n", severity, mlx5_loglevel_str(severity));
444         mlx5_log(dev, severity, "irisc_index %d\n", ioread8(&h->irisc_index));
445         mlx5_log(dev, severity, "synd 0x%x: %s\n", ioread8(&h->synd),
446                  hsynd_str(ioread8(&h->synd)));
447         mlx5_log(dev, severity, "ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
448         mlx5_log(dev, severity, "raw fw_ver 0x%08x\n", ioread32be(&h->fw_ver));
449 }
450
451 static int
452 mlx5_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
453                           struct devlink_fmsg *fmsg,
454                           struct netlink_ext_ack *extack)
455 {
456         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
457         struct mlx5_core_health *health = &dev->priv.health;
458         struct health_buffer __iomem *h = health->health;
459         u8 synd;
460         int err;
461
462         synd = ioread8(&h->synd);
463         err = devlink_fmsg_u8_pair_put(fmsg, "Syndrome", synd);
464         if (err || !synd)
465                 return err;
466         return devlink_fmsg_string_pair_put(fmsg, "Description", hsynd_str(synd));
467 }
468
469 struct mlx5_fw_reporter_ctx {
470         u8 err_synd;
471         int miss_counter;
472 };
473
474 static int
475 mlx5_fw_reporter_ctx_pairs_put(struct devlink_fmsg *fmsg,
476                                struct mlx5_fw_reporter_ctx *fw_reporter_ctx)
477 {
478         int err;
479
480         err = devlink_fmsg_u8_pair_put(fmsg, "syndrome",
481                                        fw_reporter_ctx->err_synd);
482         if (err)
483                 return err;
484         err = devlink_fmsg_u32_pair_put(fmsg, "fw_miss_counter",
485                                         fw_reporter_ctx->miss_counter);
486         if (err)
487                 return err;
488         return 0;
489 }
490
491 static int
492 mlx5_fw_reporter_heath_buffer_data_put(struct mlx5_core_dev *dev,
493                                        struct devlink_fmsg *fmsg)
494 {
495         struct mlx5_core_health *health = &dev->priv.health;
496         struct health_buffer __iomem *h = health->health;
497         u8 rfr_severity;
498         int err;
499         int i;
500
501         if (!ioread8(&h->synd))
502                 return 0;
503
504         err = devlink_fmsg_pair_nest_start(fmsg, "health buffer");
505         if (err)
506                 return err;
507         err = devlink_fmsg_obj_nest_start(fmsg);
508         if (err)
509                 return err;
510         err = devlink_fmsg_arr_pair_nest_start(fmsg, "assert_var");
511         if (err)
512                 return err;
513
514         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++) {
515                 err = devlink_fmsg_u32_put(fmsg, ioread32be(h->assert_var + i));
516                 if (err)
517                         return err;
518         }
519         err = devlink_fmsg_arr_pair_nest_end(fmsg);
520         if (err)
521                 return err;
522         err = devlink_fmsg_u32_pair_put(fmsg, "assert_exit_ptr",
523                                         ioread32be(&h->assert_exit_ptr));
524         if (err)
525                 return err;
526         err = devlink_fmsg_u32_pair_put(fmsg, "assert_callra",
527                                         ioread32be(&h->assert_callra));
528         if (err)
529                 return err;
530         err = devlink_fmsg_u32_pair_put(fmsg, "time", ioread32be(&h->time));
531         if (err)
532                 return err;
533         err = devlink_fmsg_u32_pair_put(fmsg, "hw_id", ioread32be(&h->hw_id));
534         if (err)
535                 return err;
536         rfr_severity = ioread8(&h->rfr_severity);
537         err = devlink_fmsg_u8_pair_put(fmsg, "rfr", mlx5_health_get_rfr(rfr_severity));
538         if (err)
539                 return err;
540         err = devlink_fmsg_u8_pair_put(fmsg, "severity", mlx5_health_get_severity(rfr_severity));
541         if (err)
542                 return err;
543         err = devlink_fmsg_u8_pair_put(fmsg, "irisc_index",
544                                        ioread8(&h->irisc_index));
545         if (err)
546                 return err;
547         err = devlink_fmsg_u8_pair_put(fmsg, "synd", ioread8(&h->synd));
548         if (err)
549                 return err;
550         err = devlink_fmsg_u32_pair_put(fmsg, "ext_synd",
551                                         ioread16be(&h->ext_synd));
552         if (err)
553                 return err;
554         err = devlink_fmsg_u32_pair_put(fmsg, "raw_fw_ver",
555                                         ioread32be(&h->fw_ver));
556         if (err)
557                 return err;
558         err = devlink_fmsg_obj_nest_end(fmsg);
559         if (err)
560                 return err;
561         return devlink_fmsg_pair_nest_end(fmsg);
562 }
563
564 static int
565 mlx5_fw_reporter_dump(struct devlink_health_reporter *reporter,
566                       struct devlink_fmsg *fmsg, void *priv_ctx,
567                       struct netlink_ext_ack *extack)
568 {
569         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
570         int err;
571
572         err = mlx5_fw_tracer_trigger_core_dump_general(dev);
573         if (err)
574                 return err;
575
576         if (priv_ctx) {
577                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
578
579                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
580                 if (err)
581                         return err;
582         }
583
584         err = mlx5_fw_reporter_heath_buffer_data_put(dev, fmsg);
585         if (err)
586                 return err;
587         return mlx5_fw_tracer_get_saved_traces_objects(dev->tracer, fmsg);
588 }
589
590 static void mlx5_fw_reporter_err_work(struct work_struct *work)
591 {
592         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
593         struct mlx5_core_health *health;
594
595         health = container_of(work, struct mlx5_core_health, report_work);
596
597         if (IS_ERR_OR_NULL(health->fw_reporter))
598                 return;
599
600         fw_reporter_ctx.err_synd = health->synd;
601         fw_reporter_ctx.miss_counter = health->miss_counter;
602         if (fw_reporter_ctx.err_synd) {
603                 devlink_health_report(health->fw_reporter,
604                                       "FW syndrome reported", &fw_reporter_ctx);
605                 return;
606         }
607         if (fw_reporter_ctx.miss_counter)
608                 devlink_health_report(health->fw_reporter,
609                                       "FW miss counter reported",
610                                       &fw_reporter_ctx);
611 }
612
613 static const struct devlink_health_reporter_ops mlx5_fw_reporter_ops = {
614                 .name = "fw",
615                 .diagnose = mlx5_fw_reporter_diagnose,
616                 .dump = mlx5_fw_reporter_dump,
617 };
618
619 static int
620 mlx5_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter,
621                                void *priv_ctx,
622                                struct netlink_ext_ack *extack)
623 {
624         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
625
626         return mlx5_health_try_recover(dev);
627 }
628
629 static int
630 mlx5_fw_fatal_reporter_dump(struct devlink_health_reporter *reporter,
631                             struct devlink_fmsg *fmsg, void *priv_ctx,
632                             struct netlink_ext_ack *extack)
633 {
634         struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
635         u32 crdump_size = dev->priv.health.crdump_size;
636         u32 *cr_data;
637         int err;
638
639         if (!mlx5_core_is_pf(dev))
640                 return -EPERM;
641
642         cr_data = kvmalloc(crdump_size, GFP_KERNEL);
643         if (!cr_data)
644                 return -ENOMEM;
645         err = mlx5_crdump_collect(dev, cr_data);
646         if (err)
647                 goto free_data;
648
649         if (priv_ctx) {
650                 struct mlx5_fw_reporter_ctx *fw_reporter_ctx = priv_ctx;
651
652                 err = mlx5_fw_reporter_ctx_pairs_put(fmsg, fw_reporter_ctx);
653                 if (err)
654                         goto free_data;
655         }
656
657         err = devlink_fmsg_binary_pair_put(fmsg, "crdump_data", cr_data, crdump_size);
658
659 free_data:
660         kvfree(cr_data);
661         return err;
662 }
663
664 static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
665 {
666         struct mlx5_fw_reporter_ctx fw_reporter_ctx;
667         struct mlx5_core_health *health;
668         struct mlx5_core_dev *dev;
669         struct devlink *devlink;
670         struct mlx5_priv *priv;
671
672         health = container_of(work, struct mlx5_core_health, fatal_report_work);
673         priv = container_of(health, struct mlx5_priv, health);
674         dev = container_of(priv, struct mlx5_core_dev, priv);
675         devlink = priv_to_devlink(dev);
676
677         mutex_lock(&dev->intf_state_mutex);
678         if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
679                 mlx5_core_err(dev, "health works are not permitted at this stage\n");
680                 return;
681         }
682         mutex_unlock(&dev->intf_state_mutex);
683         enter_error_state(dev, false);
684         if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
685                 devl_lock(devlink);
686                 if (mlx5_health_try_recover(dev))
687                         mlx5_core_err(dev, "health recovery failed\n");
688                 devl_unlock(devlink);
689                 return;
690         }
691         fw_reporter_ctx.err_synd = health->synd;
692         fw_reporter_ctx.miss_counter = health->miss_counter;
693         if (devlink_health_report(health->fw_fatal_reporter,
694                                   "FW fatal error reported", &fw_reporter_ctx) == -ECANCELED) {
695                 /* If recovery wasn't performed, due to grace period,
696                  * unload the driver. This ensures that the driver
697                  * closes all its resources and it is not subjected to
698                  * requests from the kernel.
699                  */
700                 mlx5_core_err(dev, "Driver is in error state. Unloading\n");
701                 mlx5_unload_one(dev);
702         }
703 }
704
705 static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = {
706                 .name = "fw_fatal",
707                 .recover = mlx5_fw_fatal_reporter_recover,
708                 .dump = mlx5_fw_fatal_reporter_dump,
709 };
710
711 #define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000
712 #define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000
713 #define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000
714 #define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD
715
716 static void mlx5_fw_reporters_create(struct mlx5_core_dev *dev)
717 {
718         struct mlx5_core_health *health = &dev->priv.health;
719         struct devlink *devlink = priv_to_devlink(dev);
720         u64 grace_period;
721
722         if (mlx5_core_is_ecpf(dev)) {
723                 grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD;
724         } else if (mlx5_core_is_pf(dev)) {
725                 grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD;
726         } else {
727                 /* VF or SF */
728                 grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD;
729         }
730
731         health->fw_reporter =
732                 devlink_health_reporter_create(devlink, &mlx5_fw_reporter_ops,
733                                                0, dev);
734         if (IS_ERR(health->fw_reporter))
735                 mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n",
736                                PTR_ERR(health->fw_reporter));
737
738         health->fw_fatal_reporter =
739                 devlink_health_reporter_create(devlink,
740                                                &mlx5_fw_fatal_reporter_ops,
741                                                grace_period,
742                                                dev);
743         if (IS_ERR(health->fw_fatal_reporter))
744                 mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n",
745                                PTR_ERR(health->fw_fatal_reporter));
746 }
747
748 static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev)
749 {
750         struct mlx5_core_health *health = &dev->priv.health;
751
752         if (!IS_ERR_OR_NULL(health->fw_reporter))
753                 devlink_health_reporter_destroy(health->fw_reporter);
754
755         if (!IS_ERR_OR_NULL(health->fw_fatal_reporter))
756                 devlink_health_reporter_destroy(health->fw_fatal_reporter);
757 }
758
759 static unsigned long get_next_poll_jiffies(struct mlx5_core_dev *dev)
760 {
761         unsigned long next;
762
763         get_random_bytes(&next, sizeof(next));
764         next %= HZ;
765         next += jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL));
766
767         return next;
768 }
769
770 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
771 {
772         struct mlx5_core_health *health = &dev->priv.health;
773         unsigned long flags;
774
775         spin_lock_irqsave(&health->wq_lock, flags);
776         if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
777                 queue_work(health->wq, &health->fatal_report_work);
778         else
779                 mlx5_core_err(dev, "new health works are not permitted at this stage\n");
780         spin_unlock_irqrestore(&health->wq_lock, flags);
781 }
782
783 #define MLX5_MSEC_PER_HOUR (MSEC_PER_SEC * 60 * 60)
784 static void mlx5_health_log_ts_update(struct work_struct *work)
785 {
786         struct delayed_work *dwork = to_delayed_work(work);
787         u32 out[MLX5_ST_SZ_DW(mrtc_reg)] = {};
788         u32 in[MLX5_ST_SZ_DW(mrtc_reg)] = {};
789         struct mlx5_core_health *health;
790         struct mlx5_core_dev *dev;
791         struct mlx5_priv *priv;
792         u64 now_us;
793
794         health = container_of(dwork, struct mlx5_core_health, update_fw_log_ts_work);
795         priv = container_of(health, struct mlx5_priv, health);
796         dev = container_of(priv, struct mlx5_core_dev, priv);
797
798         now_us =  ktime_to_us(ktime_get_real());
799
800         MLX5_SET(mrtc_reg, in, time_h, now_us >> 32);
801         MLX5_SET(mrtc_reg, in, time_l, now_us & 0xFFFFFFFF);
802         mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MRTC, 0, 1);
803
804         queue_delayed_work(health->wq, &health->update_fw_log_ts_work,
805                            msecs_to_jiffies(MLX5_MSEC_PER_HOUR));
806 }
807
808 static void poll_health(struct timer_list *t)
809 {
810         struct mlx5_core_dev *dev = from_timer(dev, t, priv.health.timer);
811         struct mlx5_core_health *health = &dev->priv.health;
812         struct health_buffer __iomem *h = health->health;
813         u32 fatal_error;
814         u8 prev_synd;
815         u32 count;
816
817         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
818                 goto out;
819
820         fatal_error = mlx5_health_check_fatal_sensors(dev);
821
822         if (fatal_error && !health->fatal_error) {
823                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
824                 dev->priv.health.fatal_error = fatal_error;
825                 print_health_info(dev);
826                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
827                 mlx5_trigger_health_work(dev);
828                 return;
829         }
830
831         count = ioread32be(health->health_counter);
832         if (count == health->prev)
833                 ++health->miss_counter;
834         else
835                 health->miss_counter = 0;
836
837         health->prev = count;
838         if (health->miss_counter == MAX_MISSES) {
839                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
840                 print_health_info(dev);
841                 queue_work(health->wq, &health->report_work);
842         }
843
844         prev_synd = health->synd;
845         health->synd = ioread8(&h->synd);
846         if (health->synd && health->synd != prev_synd)
847                 queue_work(health->wq, &health->report_work);
848
849 out:
850         mod_timer(&health->timer, get_next_poll_jiffies(dev));
851 }
852
853 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
854 {
855         u64 poll_interval_ms =  mlx5_tout_ms(dev, HEALTH_POLL_INTERVAL);
856         struct mlx5_core_health *health = &dev->priv.health;
857
858         timer_setup(&health->timer, poll_health, 0);
859         health->fatal_error = MLX5_SENSOR_NO_ERR;
860         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
861         health->health = &dev->iseg->health;
862         health->health_counter = &dev->iseg->health_counter;
863
864         health->timer.expires = jiffies + msecs_to_jiffies(poll_interval_ms);
865         add_timer(&health->timer);
866 }
867
868 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
869 {
870         struct mlx5_core_health *health = &dev->priv.health;
871         unsigned long flags;
872
873         if (disable_health) {
874                 spin_lock_irqsave(&health->wq_lock, flags);
875                 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
876                 spin_unlock_irqrestore(&health->wq_lock, flags);
877         }
878
879         del_timer_sync(&health->timer);
880 }
881
882 void mlx5_start_health_fw_log_up(struct mlx5_core_dev *dev)
883 {
884         struct mlx5_core_health *health = &dev->priv.health;
885
886         if (mlx5_core_is_pf(dev) && MLX5_CAP_MCAM_REG(dev, mrtc))
887                 queue_delayed_work(health->wq, &health->update_fw_log_ts_work, 0);
888 }
889
890 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
891 {
892         struct mlx5_core_health *health = &dev->priv.health;
893         unsigned long flags;
894
895         spin_lock_irqsave(&health->wq_lock, flags);
896         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
897         spin_unlock_irqrestore(&health->wq_lock, flags);
898         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
899         cancel_work_sync(&health->report_work);
900         cancel_work_sync(&health->fatal_report_work);
901 }
902
903 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
904 {
905         struct mlx5_core_health *health = &dev->priv.health;
906
907         cancel_delayed_work_sync(&health->update_fw_log_ts_work);
908         destroy_workqueue(health->wq);
909         mlx5_fw_reporters_destroy(dev);
910 }
911
912 int mlx5_health_init(struct mlx5_core_dev *dev)
913 {
914         struct mlx5_core_health *health;
915         char *name;
916
917         mlx5_fw_reporters_create(dev);
918
919         health = &dev->priv.health;
920         name = kmalloc(64, GFP_KERNEL);
921         if (!name)
922                 goto out_err;
923
924         strcpy(name, "mlx5_health");
925         strcat(name, dev_name(dev->device));
926         health->wq = create_singlethread_workqueue(name);
927         kfree(name);
928         if (!health->wq)
929                 goto out_err;
930         spin_lock_init(&health->wq_lock);
931         INIT_WORK(&health->fatal_report_work, mlx5_fw_fatal_reporter_err_work);
932         INIT_WORK(&health->report_work, mlx5_fw_reporter_err_work);
933         INIT_DELAYED_WORK(&health->update_fw_log_ts_work, mlx5_health_log_ts_update);
934
935         return 0;
936
937 out_err:
938         mlx5_fw_reporters_destroy(dev);
939         return -ENOMEM;
940 }