30f23363465039971f9880f35cd7577ae182f4a5
[platform/adaptation/renesas_rcar/renesas_kernel.git] / drivers / block / mtip32xx / mtip32xx.c
1 /*
2  * Driver for the Micron P320 SSD
3  *   Copyright (C) 2011 Micron Technology, Inc.
4  *
5  * Portions of this code were derived from works subjected to the
6  * following copyright:
7  *    Copyright (C) 2009 Integrated Device Technology, Inc.
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  */
20
21 #include <linux/pci.h>
22 #include <linux/interrupt.h>
23 #include <linux/ata.h>
24 #include <linux/delay.h>
25 #include <linux/hdreg.h>
26 #include <linux/uaccess.h>
27 #include <linux/random.h>
28 #include <linux/smp.h>
29 #include <linux/compat.h>
30 #include <linux/fs.h>
31 #include <linux/module.h>
32 #include <linux/genhd.h>
33 #include <linux/blkdev.h>
34 #include <linux/bio.h>
35 #include <linux/dma-mapping.h>
36 #include <linux/idr.h>
37 #include <linux/kthread.h>
38 #include <../drivers/ata/ahci.h>
39 #include <linux/export.h>
40 #include <linux/debugfs.h>
41 #include "mtip32xx.h"
42
43 #define HW_CMD_SLOT_SZ          (MTIP_MAX_COMMAND_SLOTS * 32)
44 #define HW_CMD_TBL_SZ           (AHCI_CMD_TBL_HDR_SZ + (MTIP_MAX_SG * 16))
45 #define HW_CMD_TBL_AR_SZ        (HW_CMD_TBL_SZ * MTIP_MAX_COMMAND_SLOTS)
46 #define HW_PORT_PRIV_DMA_SZ \
47                 (HW_CMD_SLOT_SZ + HW_CMD_TBL_AR_SZ + AHCI_RX_FIS_SZ)
48
49 #define HOST_CAP_NZDMA          (1 << 19)
50 #define HOST_HSORG              0xFC
51 #define HSORG_DISABLE_SLOTGRP_INTR (1<<24)
52 #define HSORG_DISABLE_SLOTGRP_PXIS (1<<16)
53 #define HSORG_HWREV             0xFF00
54 #define HSORG_STYLE             0x8
55 #define HSORG_SLOTGROUPS        0x7
56
57 #define PORT_COMMAND_ISSUE      0x38
58 #define PORT_SDBV               0x7C
59
60 #define PORT_OFFSET             0x100
61 #define PORT_MEM_SIZE           0x80
62
63 #define PORT_IRQ_ERR \
64         (PORT_IRQ_HBUS_ERR | PORT_IRQ_IF_ERR | PORT_IRQ_CONNECT | \
65          PORT_IRQ_PHYRDY | PORT_IRQ_UNK_FIS | PORT_IRQ_BAD_PMP | \
66          PORT_IRQ_TF_ERR | PORT_IRQ_HBUS_DATA_ERR | PORT_IRQ_IF_NONFATAL | \
67          PORT_IRQ_OVERFLOW)
68 #define PORT_IRQ_LEGACY \
69         (PORT_IRQ_PIOS_FIS | PORT_IRQ_D2H_REG_FIS)
70 #define PORT_IRQ_HANDLED \
71         (PORT_IRQ_SDB_FIS | PORT_IRQ_LEGACY | \
72          PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR | \
73          PORT_IRQ_CONNECT | PORT_IRQ_PHYRDY)
74 #define DEF_PORT_IRQ \
75         (PORT_IRQ_ERR | PORT_IRQ_LEGACY | PORT_IRQ_SDB_FIS)
76
77 /* product numbers */
78 #define MTIP_PRODUCT_UNKNOWN    0x00
79 #define MTIP_PRODUCT_ASICFPGA   0x11
80
81 /* Device instance number, incremented each time a device is probed. */
82 static int instance;
83
84 /*
85  * Global variable used to hold the major block device number
86  * allocated in mtip_init().
87  */
88 static int mtip_major;
89 static struct dentry *dfs_parent;
90
91 static u32 cpu_use[NR_CPUS];
92
93 static DEFINE_SPINLOCK(rssd_index_lock);
94 static DEFINE_IDA(rssd_index_ida);
95
96 static int mtip_block_initialize(struct driver_data *dd);
97
98 #ifdef CONFIG_COMPAT
99 struct mtip_compat_ide_task_request_s {
100         __u8            io_ports[8];
101         __u8            hob_ports[8];
102         ide_reg_valid_t out_flags;
103         ide_reg_valid_t in_flags;
104         int             data_phase;
105         int             req_cmd;
106         compat_ulong_t  out_size;
107         compat_ulong_t  in_size;
108 };
109 #endif
110
111 /*
112  * This function check_for_surprise_removal is called
113  * while card is removed from the system and it will
114  * read the vendor id from the configration space
115  *
116  * @pdev Pointer to the pci_dev structure.
117  *
118  * return value
119  *       true if device removed, else false
120  */
121 static bool mtip_check_surprise_removal(struct pci_dev *pdev)
122 {
123         u16 vendor_id = 0;
124
125        /* Read the vendorID from the configuration space */
126         pci_read_config_word(pdev, 0x00, &vendor_id);
127         if (vendor_id == 0xFFFF)
128                 return true; /* device removed */
129
130         return false; /* device present */
131 }
132
133 /*
134  * This function is called for clean the pending command in the
135  * command slot during the surprise removal of device and return
136  * error to the upper layer.
137  *
138  * @dd Pointer to the DRIVER_DATA structure.
139  *
140  * return value
141  *      None
142  */
143 static void mtip_command_cleanup(struct driver_data *dd)
144 {
145         int group = 0, commandslot = 0, commandindex = 0;
146         struct mtip_cmd *command;
147         struct mtip_port *port = dd->port;
148         static int in_progress;
149
150         if (in_progress)
151                 return;
152
153         in_progress = 1;
154
155         for (group = 0; group < 4; group++) {
156                 for (commandslot = 0; commandslot < 32; commandslot++) {
157                         if (!(port->allocated[group] & (1 << commandslot)))
158                                 continue;
159
160                         commandindex = group << 5 | commandslot;
161                         command = &port->commands[commandindex];
162
163                         if (atomic_read(&command->active)
164                             && (command->async_callback)) {
165                                 command->async_callback(command->async_data,
166                                         -ENODEV);
167                                 command->async_callback = NULL;
168                                 command->async_data = NULL;
169                         }
170
171                         dma_unmap_sg(&port->dd->pdev->dev,
172                                 command->sg,
173                                 command->scatter_ents,
174                                 command->direction);
175                 }
176         }
177
178         up(&port->cmd_slot);
179
180         set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
181         in_progress = 0;
182 }
183
184 /*
185  * Obtain an empty command slot.
186  *
187  * This function needs to be reentrant since it could be called
188  * at the same time on multiple CPUs. The allocation of the
189  * command slot must be atomic.
190  *
191  * @port Pointer to the port data structure.
192  *
193  * return value
194  *      >= 0    Index of command slot obtained.
195  *      -1      No command slots available.
196  */
197 static int get_slot(struct mtip_port *port)
198 {
199         int slot, i;
200         unsigned int num_command_slots = port->dd->slot_groups * 32;
201
202         /*
203          * Try 10 times, because there is a small race here.
204          *  that's ok, because it's still cheaper than a lock.
205          *
206          * Race: Since this section is not protected by lock, same bit
207          * could be chosen by different process contexts running in
208          * different processor. So instead of costly lock, we are going
209          * with loop.
210          */
211         for (i = 0; i < 10; i++) {
212                 slot = find_next_zero_bit(port->allocated,
213                                          num_command_slots, 1);
214                 if ((slot < num_command_slots) &&
215                     (!test_and_set_bit(slot, port->allocated)))
216                         return slot;
217         }
218         dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n");
219
220         if (mtip_check_surprise_removal(port->dd->pdev)) {
221                 /* Device not present, clean outstanding commands */
222                 mtip_command_cleanup(port->dd);
223         }
224         return -1;
225 }
226
227 /*
228  * Release a command slot.
229  *
230  * @port Pointer to the port data structure.
231  * @tag  Tag of command to release
232  *
233  * return value
234  *      None
235  */
236 static inline void release_slot(struct mtip_port *port, int tag)
237 {
238         smp_mb__before_clear_bit();
239         clear_bit(tag, port->allocated);
240         smp_mb__after_clear_bit();
241 }
242
243 /*
244  * Reset the HBA (without sleeping)
245  *
246  * @dd Pointer to the driver data structure.
247  *
248  * return value
249  *      0       The reset was successful.
250  *      -1      The HBA Reset bit did not clear.
251  */
252 static int mtip_hba_reset(struct driver_data *dd)
253 {
254         unsigned long timeout;
255
256         /* Set the reset bit */
257         writel(HOST_RESET, dd->mmio + HOST_CTL);
258
259         /* Flush */
260         readl(dd->mmio + HOST_CTL);
261
262         /* Spin for up to 2 seconds, waiting for reset acknowledgement */
263         timeout = jiffies + msecs_to_jiffies(2000);
264         do {
265                 mdelay(10);
266                 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))
267                         return -1;
268
269         } while ((readl(dd->mmio + HOST_CTL) & HOST_RESET)
270                  && time_before(jiffies, timeout));
271
272         if (readl(dd->mmio + HOST_CTL) & HOST_RESET)
273                 return -1;
274
275         return 0;
276 }
277
278 /*
279  * Issue a command to the hardware.
280  *
281  * Set the appropriate bit in the s_active and Command Issue hardware
282  * registers, causing hardware command processing to begin.
283  *
284  * @port Pointer to the port structure.
285  * @tag  The tag of the command to be issued.
286  *
287  * return value
288  *      None
289  */
290 static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag)
291 {
292         int group = tag >> 5;
293
294         atomic_set(&port->commands[tag].active, 1);
295
296         /* guard SACT and CI registers */
297         spin_lock(&port->cmd_issue_lock[group]);
298         writel((1 << MTIP_TAG_BIT(tag)),
299                         port->s_active[MTIP_TAG_INDEX(tag)]);
300         writel((1 << MTIP_TAG_BIT(tag)),
301                         port->cmd_issue[MTIP_TAG_INDEX(tag)]);
302         spin_unlock(&port->cmd_issue_lock[group]);
303
304         /* Set the command's timeout value.*/
305         port->commands[tag].comp_time = jiffies + msecs_to_jiffies(
306                                         MTIP_NCQ_COMMAND_TIMEOUT_MS);
307 }
308
309 /*
310  * Enable/disable the reception of FIS
311  *
312  * @port   Pointer to the port data structure
313  * @enable 1 to enable, 0 to disable
314  *
315  * return value
316  *      Previous state: 1 enabled, 0 disabled
317  */
318 static int mtip_enable_fis(struct mtip_port *port, int enable)
319 {
320         u32 tmp;
321
322         /* enable FIS reception */
323         tmp = readl(port->mmio + PORT_CMD);
324         if (enable)
325                 writel(tmp | PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
326         else
327                 writel(tmp & ~PORT_CMD_FIS_RX, port->mmio + PORT_CMD);
328
329         /* Flush */
330         readl(port->mmio + PORT_CMD);
331
332         return (((tmp & PORT_CMD_FIS_RX) == PORT_CMD_FIS_RX));
333 }
334
335 /*
336  * Enable/disable the DMA engine
337  *
338  * @port   Pointer to the port data structure
339  * @enable 1 to enable, 0 to disable
340  *
341  * return value
342  *      Previous state: 1 enabled, 0 disabled.
343  */
344 static int mtip_enable_engine(struct mtip_port *port, int enable)
345 {
346         u32 tmp;
347
348         /* enable FIS reception */
349         tmp = readl(port->mmio + PORT_CMD);
350         if (enable)
351                 writel(tmp | PORT_CMD_START, port->mmio + PORT_CMD);
352         else
353                 writel(tmp & ~PORT_CMD_START, port->mmio + PORT_CMD);
354
355         readl(port->mmio + PORT_CMD);
356         return (((tmp & PORT_CMD_START) == PORT_CMD_START));
357 }
358
359 /*
360  * Enables the port DMA engine and FIS reception.
361  *
362  * return value
363  *      None
364  */
365 static inline void mtip_start_port(struct mtip_port *port)
366 {
367         /* Enable FIS reception */
368         mtip_enable_fis(port, 1);
369
370         /* Enable the DMA engine */
371         mtip_enable_engine(port, 1);
372 }
373
374 /*
375  * Deinitialize a port by disabling port interrupts, the DMA engine,
376  * and FIS reception.
377  *
378  * @port Pointer to the port structure
379  *
380  * return value
381  *      None
382  */
383 static inline void mtip_deinit_port(struct mtip_port *port)
384 {
385         /* Disable interrupts on this port */
386         writel(0, port->mmio + PORT_IRQ_MASK);
387
388         /* Disable the DMA engine */
389         mtip_enable_engine(port, 0);
390
391         /* Disable FIS reception */
392         mtip_enable_fis(port, 0);
393 }
394
395 /*
396  * Initialize a port.
397  *
398  * This function deinitializes the port by calling mtip_deinit_port() and
399  * then initializes it by setting the command header and RX FIS addresses,
400  * clearing the SError register and any pending port interrupts before
401  * re-enabling the default set of port interrupts.
402  *
403  * @port Pointer to the port structure.
404  *
405  * return value
406  *      None
407  */
408 static void mtip_init_port(struct mtip_port *port)
409 {
410         int i;
411         mtip_deinit_port(port);
412
413         /* Program the command list base and FIS base addresses */
414         if (readl(port->dd->mmio + HOST_CAP) & HOST_CAP_64) {
415                 writel((port->command_list_dma >> 16) >> 16,
416                          port->mmio + PORT_LST_ADDR_HI);
417                 writel((port->rxfis_dma >> 16) >> 16,
418                          port->mmio + PORT_FIS_ADDR_HI);
419         }
420
421         writel(port->command_list_dma & 0xFFFFFFFF,
422                         port->mmio + PORT_LST_ADDR);
423         writel(port->rxfis_dma & 0xFFFFFFFF, port->mmio + PORT_FIS_ADDR);
424
425         /* Clear SError */
426         writel(readl(port->mmio + PORT_SCR_ERR), port->mmio + PORT_SCR_ERR);
427
428         /* reset the completed registers.*/
429         for (i = 0; i < port->dd->slot_groups; i++)
430                 writel(0xFFFFFFFF, port->completed[i]);
431
432         /* Clear any pending interrupts for this port */
433         writel(readl(port->mmio + PORT_IRQ_STAT), port->mmio + PORT_IRQ_STAT);
434
435         /* Clear any pending interrupts on the HBA. */
436         writel(readl(port->dd->mmio + HOST_IRQ_STAT),
437                                         port->dd->mmio + HOST_IRQ_STAT);
438
439         /* Enable port interrupts */
440         writel(DEF_PORT_IRQ, port->mmio + PORT_IRQ_MASK);
441 }
442
443 /*
444  * Restart a port
445  *
446  * @port Pointer to the port data structure.
447  *
448  * return value
449  *      None
450  */
451 static void mtip_restart_port(struct mtip_port *port)
452 {
453         unsigned long timeout;
454
455         /* Disable the DMA engine */
456         mtip_enable_engine(port, 0);
457
458         /* Chip quirk: wait up to 500ms for PxCMD.CR == 0 */
459         timeout = jiffies + msecs_to_jiffies(500);
460         while ((readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON)
461                  && time_before(jiffies, timeout))
462                 ;
463
464         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
465                 return;
466
467         /*
468          * Chip quirk: escalate to hba reset if
469          * PxCMD.CR not clear after 500 ms
470          */
471         if (readl(port->mmio + PORT_CMD) & PORT_CMD_LIST_ON) {
472                 dev_warn(&port->dd->pdev->dev,
473                         "PxCMD.CR not clear, escalating reset\n");
474
475                 if (mtip_hba_reset(port->dd))
476                         dev_err(&port->dd->pdev->dev,
477                                 "HBA reset escalation failed.\n");
478
479                 /* 30 ms delay before com reset to quiesce chip */
480                 mdelay(30);
481         }
482
483         dev_warn(&port->dd->pdev->dev, "Issuing COM reset\n");
484
485         /* Set PxSCTL.DET */
486         writel(readl(port->mmio + PORT_SCR_CTL) |
487                          1, port->mmio + PORT_SCR_CTL);
488         readl(port->mmio + PORT_SCR_CTL);
489
490         /* Wait 1 ms to quiesce chip function */
491         timeout = jiffies + msecs_to_jiffies(1);
492         while (time_before(jiffies, timeout))
493                 ;
494
495         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
496                 return;
497
498         /* Clear PxSCTL.DET */
499         writel(readl(port->mmio + PORT_SCR_CTL) & ~1,
500                          port->mmio + PORT_SCR_CTL);
501         readl(port->mmio + PORT_SCR_CTL);
502
503         /* Wait 500 ms for bit 0 of PORT_SCR_STS to be set */
504         timeout = jiffies + msecs_to_jiffies(500);
505         while (((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
506                          && time_before(jiffies, timeout))
507                 ;
508
509         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
510                 return;
511
512         if ((readl(port->mmio + PORT_SCR_STAT) & 0x01) == 0)
513                 dev_warn(&port->dd->pdev->dev,
514                         "COM reset failed\n");
515
516         mtip_init_port(port);
517         mtip_start_port(port);
518
519 }
520
521 static int mtip_device_reset(struct driver_data *dd)
522 {
523         int rv = 0;
524
525         if (mtip_check_surprise_removal(dd->pdev))
526                 return 0;
527
528         if (mtip_hba_reset(dd) < 0)
529                 rv = -EFAULT;
530
531         mdelay(1);
532         mtip_init_port(dd->port);
533         mtip_start_port(dd->port);
534
535         /* Enable interrupts on the HBA. */
536         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
537                                         dd->mmio + HOST_CTL);
538         return rv;
539 }
540
541 /*
542  * Helper function for tag logging
543  */
544 static void print_tags(struct driver_data *dd,
545                         char *msg,
546                         unsigned long *tagbits,
547                         int cnt)
548 {
549         unsigned char tagmap[128];
550         int group, tagmap_len = 0;
551
552         memset(tagmap, 0, sizeof(tagmap));
553         for (group = SLOTBITS_IN_LONGS; group > 0; group--)
554                 tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ",
555                                                 tagbits[group-1]);
556         dev_warn(&dd->pdev->dev,
557                         "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap);
558 }
559
560 /*
561  * Called periodically to see if any read/write commands are
562  * taking too long to complete.
563  *
564  * @data Pointer to the PORT data structure.
565  *
566  * return value
567  *      None
568  */
569 static void mtip_timeout_function(unsigned long int data)
570 {
571         struct mtip_port *port = (struct mtip_port *) data;
572         struct host_to_dev_fis *fis;
573         struct mtip_cmd *command;
574         int tag, cmdto_cnt = 0;
575         unsigned int bit, group;
576         unsigned int num_command_slots;
577         unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
578
579         if (unlikely(!port))
580                 return;
581
582         if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) {
583                 mod_timer(&port->cmd_timer,
584                         jiffies + msecs_to_jiffies(30000));
585                 return;
586         }
587         /* clear the tag accumulator */
588         memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
589         num_command_slots = port->dd->slot_groups * 32;
590
591         for (tag = 0; tag < num_command_slots; tag++) {
592                 /*
593                  * Skip internal command slot as it has
594                  * its own timeout mechanism
595                  */
596                 if (tag == MTIP_TAG_INTERNAL)
597                         continue;
598
599                 if (atomic_read(&port->commands[tag].active) &&
600                    (time_after(jiffies, port->commands[tag].comp_time))) {
601                         group = tag >> 5;
602                         bit = tag & 0x1F;
603
604                         command = &port->commands[tag];
605                         fis = (struct host_to_dev_fis *) command->command;
606
607                         set_bit(tag, tagaccum);
608                         cmdto_cnt++;
609                         if (cmdto_cnt == 1)
610                                 set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
611
612                         /*
613                          * Clear the completed bit. This should prevent
614                          *  any interrupt handlers from trying to retire
615                          *  the command.
616                          */
617                         writel(1 << bit, port->completed[group]);
618
619                         /* Call the async completion callback. */
620                         if (likely(command->async_callback))
621                                 command->async_callback(command->async_data,
622                                                          -EIO);
623                         command->async_callback = NULL;
624                         command->comp_func = NULL;
625
626                         /* Unmap the DMA scatter list entries */
627                         dma_unmap_sg(&port->dd->pdev->dev,
628                                         command->sg,
629                                         command->scatter_ents,
630                                         command->direction);
631
632                         /*
633                          * Clear the allocated bit and active tag for the
634                          * command.
635                          */
636                         atomic_set(&port->commands[tag].active, 0);
637                         release_slot(port, tag);
638
639                         up(&port->cmd_slot);
640                 }
641         }
642
643         if (cmdto_cnt) {
644                 print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
645                 if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
646                         mtip_device_reset(port->dd);
647                         wake_up_interruptible(&port->svc_wait);
648                 }
649                 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
650         }
651
652         if (port->ic_pause_timer) {
653                 to  = port->ic_pause_timer + msecs_to_jiffies(1000);
654                 if (time_after(jiffies, to)) {
655                         if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
656                                 port->ic_pause_timer = 0;
657                                 clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
658                                 clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
659                                 clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
660                                 wake_up_interruptible(&port->svc_wait);
661                         }
662
663
664                 }
665         }
666
667         /* Restart the timer */
668         mod_timer(&port->cmd_timer,
669                 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
670 }
671
672 /*
673  * IO completion function.
674  *
675  * This completion function is called by the driver ISR when a
676  * command that was issued by the kernel completes. It first calls the
677  * asynchronous completion function which normally calls back into the block
678  * layer passing the asynchronous callback data, then unmaps the
679  * scatter list associated with the completed command, and finally
680  * clears the allocated bit associated with the completed command.
681  *
682  * @port   Pointer to the port data structure.
683  * @tag    Tag of the command.
684  * @data   Pointer to driver_data.
685  * @status Completion status.
686  *
687  * return value
688  *      None
689  */
690 static void mtip_async_complete(struct mtip_port *port,
691                                 int tag,
692                                 void *data,
693                                 int status)
694 {
695         struct mtip_cmd *command;
696         struct driver_data *dd = data;
697         int cb_status = status ? -EIO : 0;
698
699         if (unlikely(!dd) || unlikely(!port))
700                 return;
701
702         command = &port->commands[tag];
703
704         if (unlikely(status == PORT_IRQ_TF_ERR)) {
705                 dev_warn(&port->dd->pdev->dev,
706                         "Command tag %d failed due to TFE\n", tag);
707         }
708
709         /* Upper layer callback */
710         if (likely(command->async_callback))
711                 command->async_callback(command->async_data, cb_status);
712
713         command->async_callback = NULL;
714         command->comp_func = NULL;
715
716         /* Unmap the DMA scatter list entries */
717         dma_unmap_sg(&dd->pdev->dev,
718                 command->sg,
719                 command->scatter_ents,
720                 command->direction);
721
722         /* Clear the allocated and active bits for the command */
723         atomic_set(&port->commands[tag].active, 0);
724         release_slot(port, tag);
725
726         up(&port->cmd_slot);
727 }
728
729 /*
730  * Internal command completion callback function.
731  *
732  * This function is normally called by the driver ISR when an internal
733  * command completed. This function signals the command completion by
734  * calling complete().
735  *
736  * @port   Pointer to the port data structure.
737  * @tag    Tag of the command that has completed.
738  * @data   Pointer to a completion structure.
739  * @status Completion status.
740  *
741  * return value
742  *      None
743  */
744 static void mtip_completion(struct mtip_port *port,
745                             int tag,
746                             void *data,
747                             int status)
748 {
749         struct mtip_cmd *command = &port->commands[tag];
750         struct completion *waiting = data;
751         if (unlikely(status == PORT_IRQ_TF_ERR))
752                 dev_warn(&port->dd->pdev->dev,
753                         "Internal command %d completed with TFE\n", tag);
754
755         command->async_callback = NULL;
756         command->comp_func = NULL;
757
758         complete(waiting);
759 }
760
761 static void mtip_null_completion(struct mtip_port *port,
762                             int tag,
763                             void *data,
764                             int status)
765 {
766         return;
767 }
768
769 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
770                                 dma_addr_t buffer_dma, unsigned int sectors);
771 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
772                                                 struct smart_attr *attrib);
773 /*
774  * Handle an error.
775  *
776  * @dd Pointer to the DRIVER_DATA structure.
777  *
778  * return value
779  *      None
780  */
781 static void mtip_handle_tfe(struct driver_data *dd)
782 {
783         int group, tag, bit, reissue, rv;
784         struct mtip_port *port;
785         struct mtip_cmd  *cmd;
786         u32 completed;
787         struct host_to_dev_fis *fis;
788         unsigned long tagaccum[SLOTBITS_IN_LONGS];
789         unsigned int cmd_cnt = 0;
790         unsigned char *buf;
791         char *fail_reason = NULL;
792         int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
793
794         dev_warn(&dd->pdev->dev, "Taskfile error\n");
795
796         port = dd->port;
797
798         /* Stop the timer to prevent command timeouts. */
799         del_timer(&port->cmd_timer);
800         set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
801
802         if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
803                         test_bit(MTIP_TAG_INTERNAL, port->allocated)) {
804                 cmd = &port->commands[MTIP_TAG_INTERNAL];
805                 dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n");
806
807                 atomic_inc(&cmd->active); /* active > 1 indicates error */
808                 if (cmd->comp_data && cmd->comp_func) {
809                         cmd->comp_func(port, MTIP_TAG_INTERNAL,
810                                         cmd->comp_data, PORT_IRQ_TF_ERR);
811                 }
812                 goto handle_tfe_exit;
813         }
814
815         /* clear the tag accumulator */
816         memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
817
818         /* Loop through all the groups */
819         for (group = 0; group < dd->slot_groups; group++) {
820                 completed = readl(port->completed[group]);
821
822                 /* clear completed status register in the hardware.*/
823                 writel(completed, port->completed[group]);
824
825                 /* Process successfully completed commands */
826                 for (bit = 0; bit < 32 && completed; bit++) {
827                         if (!(completed & (1<<bit)))
828                                 continue;
829                         tag = (group << 5) + bit;
830
831                         /* Skip the internal command slot */
832                         if (tag == MTIP_TAG_INTERNAL)
833                                 continue;
834
835                         cmd = &port->commands[tag];
836                         if (likely(cmd->comp_func)) {
837                                 set_bit(tag, tagaccum);
838                                 cmd_cnt++;
839                                 atomic_set(&cmd->active, 0);
840                                 cmd->comp_func(port,
841                                          tag,
842                                          cmd->comp_data,
843                                          0);
844                         } else {
845                                 dev_err(&port->dd->pdev->dev,
846                                         "Missing completion func for tag %d",
847                                         tag);
848                                 if (mtip_check_surprise_removal(dd->pdev)) {
849                                         mtip_command_cleanup(dd);
850                                         /* don't proceed further */
851                                         return;
852                                 }
853                         }
854                 }
855         }
856
857         print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
858
859         /* Restart the port */
860         mdelay(20);
861         mtip_restart_port(port);
862
863         /* Trying to determine the cause of the error */
864         rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
865                                 dd->port->log_buf,
866                                 dd->port->log_buf_dma, 1);
867         if (rv) {
868                 dev_warn(&dd->pdev->dev,
869                         "Error in READ LOG EXT (10h) command\n");
870                 /* non-critical error, don't fail the load */
871         } else {
872                 buf = (unsigned char *)dd->port->log_buf;
873                 if (buf[259] & 0x1) {
874                         dev_info(&dd->pdev->dev,
875                                 "Write protect bit is set.\n");
876                         set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
877                         fail_all_ncq_write = 1;
878                         fail_reason = "write protect";
879                 }
880                 if (buf[288] == 0xF7) {
881                         dev_info(&dd->pdev->dev,
882                                 "Exceeded Tmax, drive in thermal shutdown.\n");
883                         set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
884                         fail_all_ncq_cmds = 1;
885                         fail_reason = "thermal shutdown";
886                 }
887                 if (buf[288] == 0xBF) {
888                         dev_info(&dd->pdev->dev,
889                                 "Drive indicates rebuild has failed.\n");
890                         fail_all_ncq_cmds = 1;
891                         fail_reason = "rebuild failed";
892                 }
893         }
894
895         /* clear the tag accumulator */
896         memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
897
898         /* Loop through all the groups */
899         for (group = 0; group < dd->slot_groups; group++) {
900                 for (bit = 0; bit < 32; bit++) {
901                         reissue = 1;
902                         tag = (group << 5) + bit;
903                         cmd = &port->commands[tag];
904
905                         /* If the active bit is set re-issue the command */
906                         if (atomic_read(&cmd->active) == 0)
907                                 continue;
908
909                         fis = (struct host_to_dev_fis *)cmd->command;
910
911                         /* Should re-issue? */
912                         if (tag == MTIP_TAG_INTERNAL ||
913                             fis->command == ATA_CMD_SET_FEATURES)
914                                 reissue = 0;
915                         else {
916                                 if (fail_all_ncq_cmds ||
917                                         (fail_all_ncq_write &&
918                                         fis->command == ATA_CMD_FPDMA_WRITE)) {
919                                         dev_warn(&dd->pdev->dev,
920                                         "  Fail: %s w/tag %d [%s].\n",
921                                         fis->command == ATA_CMD_FPDMA_WRITE ?
922                                                 "write" : "read",
923                                         tag,
924                                         fail_reason != NULL ?
925                                                 fail_reason : "unknown");
926                                         atomic_set(&cmd->active, 0);
927                                         if (cmd->comp_func) {
928                                                 cmd->comp_func(port, tag,
929                                                         cmd->comp_data,
930                                                         -ENODATA);
931                                         }
932                                         continue;
933                                 }
934                         }
935
936                         /*
937                          * First check if this command has
938                          *  exceeded its retries.
939                          */
940                         if (reissue && (cmd->retries-- > 0)) {
941
942                                 set_bit(tag, tagaccum);
943
944                                 /* Re-issue the command. */
945                                 mtip_issue_ncq_command(port, tag);
946
947                                 continue;
948                         }
949
950                         /* Retire a command that will not be reissued */
951                         dev_warn(&port->dd->pdev->dev,
952                                 "retiring tag %d\n", tag);
953                         atomic_set(&cmd->active, 0);
954
955                         if (cmd->comp_func)
956                                 cmd->comp_func(
957                                         port,
958                                         tag,
959                                         cmd->comp_data,
960                                         PORT_IRQ_TF_ERR);
961                         else
962                                 dev_warn(&port->dd->pdev->dev,
963                                         "Bad completion for tag %d\n",
964                                         tag);
965                 }
966         }
967         print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
968
969 handle_tfe_exit:
970         /* clear eh_active */
971         clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags);
972         wake_up_interruptible(&port->svc_wait);
973
974         mod_timer(&port->cmd_timer,
975                  jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
976 }
977
978 /*
979  * Handle a set device bits interrupt
980  */
981 static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
982                                                         u32 completed)
983 {
984         struct driver_data *dd = port->dd;
985         int tag, bit;
986         struct mtip_cmd *command;
987
988         if (!completed) {
989                 WARN_ON_ONCE(!completed);
990                 return;
991         }
992         /* clear completed status register in the hardware.*/
993         writel(completed, port->completed[group]);
994
995         /* Process completed commands. */
996         for (bit = 0; (bit < 32) && completed; bit++) {
997                 if (completed & 0x01) {
998                         tag = (group << 5) | bit;
999
1000                         /* skip internal command slot. */
1001                         if (unlikely(tag == MTIP_TAG_INTERNAL))
1002                                 continue;
1003
1004                         command = &port->commands[tag];
1005                         /* make internal callback */
1006                         if (likely(command->comp_func)) {
1007                                 command->comp_func(
1008                                         port,
1009                                         tag,
1010                                         command->comp_data,
1011                                         0);
1012                         } else {
1013                                 dev_warn(&dd->pdev->dev,
1014                                         "Null completion "
1015                                         "for tag %d",
1016                                         tag);
1017
1018                                 if (mtip_check_surprise_removal(
1019                                         dd->pdev)) {
1020                                         mtip_command_cleanup(dd);
1021                                         return;
1022                                 }
1023                         }
1024                 }
1025                 completed >>= 1;
1026         }
1027
1028         /* If last, re-enable interrupts */
1029         if (atomic_dec_return(&dd->irq_workers_active) == 0)
1030                 writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
1031 }
1032
1033 /*
1034  * Process legacy pio and d2h interrupts
1035  */
1036 static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
1037 {
1038         struct mtip_port *port = dd->port;
1039         struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL];
1040
1041         if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) &&
1042             (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1043                 & (1 << MTIP_TAG_INTERNAL))) {
1044                 if (cmd->comp_func) {
1045                         cmd->comp_func(port,
1046                                 MTIP_TAG_INTERNAL,
1047                                 cmd->comp_data,
1048                                 0);
1049                         return;
1050                 }
1051         }
1052
1053         return;
1054 }
1055
1056 /*
1057  * Demux and handle errors
1058  */
1059 static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat)
1060 {
1061         if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR)))
1062                 mtip_handle_tfe(dd);
1063
1064         if (unlikely(port_stat & PORT_IRQ_CONNECT)) {
1065                 dev_warn(&dd->pdev->dev,
1066                         "Clearing PxSERR.DIAG.x\n");
1067                 writel((1 << 26), dd->port->mmio + PORT_SCR_ERR);
1068         }
1069
1070         if (unlikely(port_stat & PORT_IRQ_PHYRDY)) {
1071                 dev_warn(&dd->pdev->dev,
1072                         "Clearing PxSERR.DIAG.n\n");
1073                 writel((1 << 16), dd->port->mmio + PORT_SCR_ERR);
1074         }
1075
1076         if (unlikely(port_stat & ~PORT_IRQ_HANDLED)) {
1077                 dev_warn(&dd->pdev->dev,
1078                         "Port stat errors %x unhandled\n",
1079                         (port_stat & ~PORT_IRQ_HANDLED));
1080         }
1081 }
1082
1083 static inline irqreturn_t mtip_handle_irq(struct driver_data *data)
1084 {
1085         struct driver_data *dd = (struct driver_data *) data;
1086         struct mtip_port *port = dd->port;
1087         u32 hba_stat, port_stat;
1088         int rv = IRQ_NONE;
1089         int do_irq_enable = 1, i, workers;
1090         struct mtip_work *twork;
1091
1092         hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1093         if (hba_stat) {
1094                 rv = IRQ_HANDLED;
1095
1096                 /* Acknowledge the interrupt status on the port.*/
1097                 port_stat = readl(port->mmio + PORT_IRQ_STAT);
1098                 writel(port_stat, port->mmio + PORT_IRQ_STAT);
1099
1100                 /* Demux port status */
1101                 if (likely(port_stat & PORT_IRQ_SDB_FIS)) {
1102                         do_irq_enable = 0;
1103                         WARN_ON_ONCE(atomic_read(&dd->irq_workers_active) != 0);
1104
1105                         /* Start at 1: group zero is always local? */
1106                         for (i = 0, workers = 0; i < MTIP_MAX_SLOT_GROUPS;
1107                                                                         i++) {
1108                                 twork = &dd->work[i];
1109                                 twork->completed = readl(port->completed[i]);
1110                                 if (twork->completed)
1111                                         workers++;
1112                         }
1113
1114                         atomic_set(&dd->irq_workers_active, workers);
1115                         if (workers) {
1116                                 for (i = 1; i < MTIP_MAX_SLOT_GROUPS; i++) {
1117                                         twork = &dd->work[i];
1118                                         if (twork->completed)
1119                                                 queue_work_on(
1120                                                         twork->cpu_binding,
1121                                                         dd->isr_workq,
1122                                                         &twork->work);
1123                                 }
1124
1125                                 if (likely(dd->work[0].completed))
1126                                         mtip_workq_sdbfx(port, 0,
1127                                                         dd->work[0].completed);
1128
1129                         } else {
1130                                 /*
1131                                  * Chip quirk: SDB interrupt but nothing
1132                                  * to complete
1133                                  */
1134                                 do_irq_enable = 1;
1135                         }
1136                 }
1137
1138                 if (unlikely(port_stat & PORT_IRQ_ERR)) {
1139                         if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
1140                                 mtip_command_cleanup(dd);
1141                                 /* don't proceed further */
1142                                 return IRQ_HANDLED;
1143                         }
1144                         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1145                                                         &dd->dd_flag))
1146                                 return rv;
1147
1148                         mtip_process_errors(dd, port_stat & PORT_IRQ_ERR);
1149                 }
1150
1151                 if (unlikely(port_stat & PORT_IRQ_LEGACY))
1152                         mtip_process_legacy(dd, port_stat & PORT_IRQ_LEGACY);
1153         }
1154
1155         /* acknowledge interrupt */
1156         if (unlikely(do_irq_enable))
1157                 writel(hba_stat, dd->mmio + HOST_IRQ_STAT);
1158
1159         return rv;
1160 }
1161
1162 /*
1163  * HBA interrupt subroutine.
1164  *
1165  * @irq         IRQ number.
1166  * @instance    Pointer to the driver data structure.
1167  *
1168  * return value
1169  *      IRQ_HANDLED     A HBA interrupt was pending and handled.
1170  *      IRQ_NONE        This interrupt was not for the HBA.
1171  */
1172 static irqreturn_t mtip_irq_handler(int irq, void *instance)
1173 {
1174         struct driver_data *dd = instance;
1175
1176         return mtip_handle_irq(dd);
1177 }
1178
1179 static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag)
1180 {
1181         atomic_set(&port->commands[tag].active, 1);
1182         writel(1 << MTIP_TAG_BIT(tag),
1183                 port->cmd_issue[MTIP_TAG_INDEX(tag)]);
1184 }
1185
1186 static bool mtip_pause_ncq(struct mtip_port *port,
1187                                 struct host_to_dev_fis *fis)
1188 {
1189         struct host_to_dev_fis *reply;
1190         unsigned long task_file_data;
1191
1192         reply = port->rxfis + RX_FIS_D2H_REG;
1193         task_file_data = readl(port->mmio+PORT_TFDATA);
1194
1195         if (fis->command == ATA_CMD_SEC_ERASE_UNIT)
1196                 clear_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1197
1198         if ((task_file_data & 1))
1199                 return false;
1200
1201         if (fis->command == ATA_CMD_SEC_ERASE_PREP) {
1202                 set_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1203                 set_bit(MTIP_DDF_SEC_LOCK_BIT, &port->dd->dd_flag);
1204                 port->ic_pause_timer = jiffies;
1205                 return true;
1206         } else if ((fis->command == ATA_CMD_DOWNLOAD_MICRO) &&
1207                                         (fis->features == 0x03)) {
1208                 set_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1209                 port->ic_pause_timer = jiffies;
1210                 return true;
1211         } else if ((fis->command == ATA_CMD_SEC_ERASE_UNIT) ||
1212                 ((fis->command == 0xFC) &&
1213                         (fis->features == 0x27 || fis->features == 0x72 ||
1214                          fis->features == 0x62 || fis->features == 0x26))) {
1215                 /* Com reset after secure erase or lowlevel format */
1216                 mtip_restart_port(port);
1217                 return false;
1218         }
1219
1220         return false;
1221 }
1222
1223 /*
1224  * Wait for port to quiesce
1225  *
1226  * @port    Pointer to port data structure
1227  * @timeout Max duration to wait (ms)
1228  *
1229  * return value
1230  *      0       Success
1231  *      -EBUSY  Commands still active
1232  */
1233 static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout)
1234 {
1235         unsigned long to;
1236         unsigned int n;
1237         unsigned int active = 1;
1238
1239         to = jiffies + msecs_to_jiffies(timeout);
1240         do {
1241                 if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) &&
1242                         test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
1243                         msleep(20);
1244                         continue; /* svc thd is actively issuing commands */
1245                 }
1246                 if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1247                         return -EFAULT;
1248                 /*
1249                  * Ignore s_active bit 0 of array element 0.
1250                  * This bit will always be set
1251                  */
1252                 active = readl(port->s_active[0]) & 0xFFFFFFFE;
1253                 for (n = 1; n < port->dd->slot_groups; n++)
1254                         active |= readl(port->s_active[n]);
1255
1256                 if (!active)
1257                         break;
1258
1259                 msleep(20);
1260         } while (time_before(jiffies, to));
1261
1262         return active ? -EBUSY : 0;
1263 }
1264
1265 /*
1266  * Execute an internal command and wait for the completion.
1267  *
1268  * @port    Pointer to the port data structure.
1269  * @fis     Pointer to the FIS that describes the command.
1270  * @fis_len  Length in WORDS of the FIS.
1271  * @buffer  DMA accessible for command data.
1272  * @buf_len  Length, in bytes, of the data buffer.
1273  * @opts    Command header options, excluding the FIS length
1274  *             and the number of PRD entries.
1275  * @timeout Time in ms to wait for the command to complete.
1276  *
1277  * return value
1278  *      0        Command completed successfully.
1279  *      -EFAULT  The buffer address is not correctly aligned.
1280  *      -EBUSY   Internal command or other IO in progress.
1281  *      -EAGAIN  Time out waiting for command to complete.
1282  */
1283 static int mtip_exec_internal_command(struct mtip_port *port,
1284                                         struct host_to_dev_fis *fis,
1285                                         int fis_len,
1286                                         dma_addr_t buffer,
1287                                         int buf_len,
1288                                         u32 opts,
1289                                         gfp_t atomic,
1290                                         unsigned long timeout)
1291 {
1292         struct mtip_cmd_sg *command_sg;
1293         DECLARE_COMPLETION_ONSTACK(wait);
1294         int rv = 0, ready2go = 1;
1295         struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL];
1296         unsigned long to;
1297         struct driver_data *dd = port->dd;
1298
1299         /* Make sure the buffer is 8 byte aligned. This is asic specific. */
1300         if (buffer & 0x00000007) {
1301                 dev_err(&dd->pdev->dev, "SG buffer is not 8 byte aligned\n");
1302                 return -EFAULT;
1303         }
1304
1305         to = jiffies + msecs_to_jiffies(timeout);
1306         do {
1307                 ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL,
1308                                                 port->allocated);
1309                 if (ready2go)
1310                         break;
1311                 mdelay(100);
1312         } while (time_before(jiffies, to));
1313         if (!ready2go) {
1314                 dev_warn(&dd->pdev->dev,
1315                         "Internal cmd active. new cmd [%02X]\n", fis->command);
1316                 return -EBUSY;
1317         }
1318         set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1319         port->ic_pause_timer = 0;
1320
1321         clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags);
1322         clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags);
1323
1324         if (atomic == GFP_KERNEL) {
1325                 if (fis->command != ATA_CMD_STANDBYNOW1) {
1326                         /* wait for io to complete if non atomic */
1327                         if (mtip_quiesce_io(port, 5000) < 0) {
1328                                 dev_warn(&dd->pdev->dev,
1329                                         "Failed to quiesce IO\n");
1330                                 release_slot(port, MTIP_TAG_INTERNAL);
1331                                 clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1332                                 wake_up_interruptible(&port->svc_wait);
1333                                 return -EBUSY;
1334                         }
1335                 }
1336
1337                 /* Set the completion function and data for the command. */
1338                 int_cmd->comp_data = &wait;
1339                 int_cmd->comp_func = mtip_completion;
1340
1341         } else {
1342                 /* Clear completion - we're going to poll */
1343                 int_cmd->comp_data = NULL;
1344                 int_cmd->comp_func = mtip_null_completion;
1345         }
1346
1347         /* Copy the command to the command table */
1348         memcpy(int_cmd->command, fis, fis_len*4);
1349
1350         /* Populate the SG list */
1351         int_cmd->command_header->opts =
1352                  __force_bit2int cpu_to_le32(opts | fis_len);
1353         if (buf_len) {
1354                 command_sg = int_cmd->command + AHCI_CMD_TBL_HDR_SZ;
1355
1356                 command_sg->info =
1357                         __force_bit2int cpu_to_le32((buf_len-1) & 0x3FFFFF);
1358                 command_sg->dba =
1359                         __force_bit2int cpu_to_le32(buffer & 0xFFFFFFFF);
1360                 command_sg->dba_upper =
1361                         __force_bit2int cpu_to_le32((buffer >> 16) >> 16);
1362
1363                 int_cmd->command_header->opts |=
1364                         __force_bit2int cpu_to_le32((1 << 16));
1365         }
1366
1367         /* Populate the command header */
1368         int_cmd->command_header->byte_count = 0;
1369
1370         /* Issue the command to the hardware */
1371         mtip_issue_non_ncq_command(port, MTIP_TAG_INTERNAL);
1372
1373         if (atomic == GFP_KERNEL) {
1374                 /* Wait for the command to complete or timeout. */
1375                 if (wait_for_completion_interruptible_timeout(
1376                                 &wait,
1377                                 msecs_to_jiffies(timeout)) <= 0) {
1378                         if (rv == -ERESTARTSYS) { /* interrupted */
1379                                 dev_err(&dd->pdev->dev,
1380                                         "Internal command [%02X] was interrupted after %lu ms\n",
1381                                         fis->command, timeout);
1382                                 rv = -EINTR;
1383                                 goto exec_ic_exit;
1384                         } else if (rv == 0) /* timeout */
1385                                 dev_err(&dd->pdev->dev,
1386                                         "Internal command did not complete [%02X] within timeout of  %lu ms\n",
1387                                         fis->command, timeout);
1388                         else
1389                                 dev_err(&dd->pdev->dev,
1390                                         "Internal command [%02X] wait returned code [%d] after %lu ms - unhandled\n",
1391                                         fis->command, rv, timeout);
1392
1393                         if (mtip_check_surprise_removal(dd->pdev) ||
1394                                 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1395                                                 &dd->dd_flag)) {
1396                                 dev_err(&dd->pdev->dev,
1397                                         "Internal command [%02X] wait returned due to SR\n",
1398                                         fis->command);
1399                                 rv = -ENXIO;
1400                                 goto exec_ic_exit;
1401                         }
1402                         mtip_device_reset(dd); /* recover from timeout issue */
1403                         rv = -EAGAIN;
1404                         goto exec_ic_exit;
1405                 }
1406         } else {
1407                 u32 hba_stat, port_stat;
1408
1409                 /* Spin for <timeout> checking if command still outstanding */
1410                 timeout = jiffies + msecs_to_jiffies(timeout);
1411                 while ((readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1412                                 & (1 << MTIP_TAG_INTERNAL))
1413                                 && time_before(jiffies, timeout)) {
1414                         if (mtip_check_surprise_removal(dd->pdev)) {
1415                                 rv = -ENXIO;
1416                                 goto exec_ic_exit;
1417                         }
1418                         if ((fis->command != ATA_CMD_STANDBYNOW1) &&
1419                                 test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
1420                                                 &dd->dd_flag)) {
1421                                 rv = -ENXIO;
1422                                 goto exec_ic_exit;
1423                         }
1424                         port_stat = readl(port->mmio + PORT_IRQ_STAT);
1425                         if (!port_stat)
1426                                 continue;
1427
1428                         if (port_stat & PORT_IRQ_ERR) {
1429                                 dev_err(&dd->pdev->dev,
1430                                         "Internal command [%02X] failed\n",
1431                                         fis->command);
1432                                 mtip_device_reset(dd);
1433                                 rv = -EIO;
1434                                 goto exec_ic_exit;
1435                         } else {
1436                                 writel(port_stat, port->mmio + PORT_IRQ_STAT);
1437                                 hba_stat = readl(dd->mmio + HOST_IRQ_STAT);
1438                                 if (hba_stat)
1439                                         writel(hba_stat,
1440                                                 dd->mmio + HOST_IRQ_STAT);
1441                         }
1442                         break;
1443                 }
1444         }
1445
1446         if (readl(port->cmd_issue[MTIP_TAG_INTERNAL])
1447                         & (1 << MTIP_TAG_INTERNAL)) {
1448                 rv = -ENXIO;
1449                 if (!test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
1450                         mtip_device_reset(dd);
1451                         rv = -EAGAIN;
1452                 }
1453         }
1454 exec_ic_exit:
1455         /* Clear the allocated and active bits for the internal command. */
1456         atomic_set(&int_cmd->active, 0);
1457         release_slot(port, MTIP_TAG_INTERNAL);
1458         if (rv >= 0 && mtip_pause_ncq(port, fis)) {
1459                 /* NCQ paused */
1460                 return rv;
1461         }
1462         clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags);
1463         wake_up_interruptible(&port->svc_wait);
1464
1465         return rv;
1466 }
1467
1468 /*
1469  * Byte-swap ATA ID strings.
1470  *
1471  * ATA identify data contains strings in byte-swapped 16-bit words.
1472  * They must be swapped (on all architectures) to be usable as C strings.
1473  * This function swaps bytes in-place.
1474  *
1475  * @buf The buffer location of the string
1476  * @len The number of bytes to swap
1477  *
1478  * return value
1479  *      None
1480  */
1481 static inline void ata_swap_string(u16 *buf, unsigned int len)
1482 {
1483         int i;
1484         for (i = 0; i < (len/2); i++)
1485                 be16_to_cpus(&buf[i]);
1486 }
1487
1488 /*
1489  * Request the device identity information.
1490  *
1491  * If a user space buffer is not specified, i.e. is NULL, the
1492  * identify information is still read from the drive and placed
1493  * into the identify data buffer (@e port->identify) in the
1494  * port data structure.
1495  * When the identify buffer contains valid identify information @e
1496  * port->identify_valid is non-zero.
1497  *
1498  * @port         Pointer to the port structure.
1499  * @user_buffer  A user space buffer where the identify data should be
1500  *                    copied.
1501  *
1502  * return value
1503  *      0       Command completed successfully.
1504  *      -EFAULT An error occurred while coping data to the user buffer.
1505  *      -1      Command failed.
1506  */
1507 static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer)
1508 {
1509         int rv = 0;
1510         struct host_to_dev_fis fis;
1511
1512         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag))
1513                 return -EFAULT;
1514
1515         /* Build the FIS. */
1516         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1517         fis.type        = 0x27;
1518         fis.opts        = 1 << 7;
1519         fis.command     = ATA_CMD_ID_ATA;
1520
1521         /* Set the identify information as invalid. */
1522         port->identify_valid = 0;
1523
1524         /* Clear the identify information. */
1525         memset(port->identify, 0, sizeof(u16) * ATA_ID_WORDS);
1526
1527         /* Execute the command. */
1528         if (mtip_exec_internal_command(port,
1529                                 &fis,
1530                                 5,
1531                                 port->identify_dma,
1532                                 sizeof(u16) * ATA_ID_WORDS,
1533                                 0,
1534                                 GFP_KERNEL,
1535                                 MTIP_INTERNAL_COMMAND_TIMEOUT_MS)
1536                                 < 0) {
1537                 rv = -1;
1538                 goto out;
1539         }
1540
1541         /*
1542          * Perform any necessary byte-swapping.  Yes, the kernel does in fact
1543          * perform field-sensitive swapping on the string fields.
1544          * See the kernel use of ata_id_string() for proof of this.
1545          */
1546 #ifdef __LITTLE_ENDIAN
1547         ata_swap_string(port->identify + 27, 40);  /* model string*/
1548         ata_swap_string(port->identify + 23, 8);   /* firmware string*/
1549         ata_swap_string(port->identify + 10, 20);  /* serial# string*/
1550 #else
1551         {
1552                 int i;
1553                 for (i = 0; i < ATA_ID_WORDS; i++)
1554                         port->identify[i] = le16_to_cpu(port->identify[i]);
1555         }
1556 #endif
1557
1558         /* Demux ID.DRAT & ID.RZAT to determine trim support */
1559         if (port->identify[69] & (1 << 14) && port->identify[69] & (1 << 5))
1560                 port->dd->trim_supp = true;
1561         else
1562                 port->dd->trim_supp = false;
1563
1564         /* Set the identify buffer as valid. */
1565         port->identify_valid = 1;
1566
1567         if (user_buffer) {
1568                 if (copy_to_user(
1569                         user_buffer,
1570                         port->identify,
1571                         ATA_ID_WORDS * sizeof(u16))) {
1572                         rv = -EFAULT;
1573                         goto out;
1574                 }
1575         }
1576
1577 out:
1578         return rv;
1579 }
1580
1581 /*
1582  * Issue a standby immediate command to the device.
1583  *
1584  * @port Pointer to the port structure.
1585  *
1586  * return value
1587  *      0       Command was executed successfully.
1588  *      -1      An error occurred while executing the command.
1589  */
1590 static int mtip_standby_immediate(struct mtip_port *port)
1591 {
1592         int rv;
1593         struct host_to_dev_fis  fis;
1594         unsigned long start;
1595
1596         /* Build the FIS. */
1597         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1598         fis.type        = 0x27;
1599         fis.opts        = 1 << 7;
1600         fis.command     = ATA_CMD_STANDBYNOW1;
1601
1602         start = jiffies;
1603         rv = mtip_exec_internal_command(port,
1604                                         &fis,
1605                                         5,
1606                                         0,
1607                                         0,
1608                                         0,
1609                                         GFP_ATOMIC,
1610                                         15000);
1611         dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n",
1612                         jiffies_to_msecs(jiffies - start));
1613         if (rv)
1614                 dev_warn(&port->dd->pdev->dev,
1615                         "STANDBY IMMEDIATE command failed.\n");
1616
1617         return rv;
1618 }
1619
1620 /*
1621  * Issue a READ LOG EXT command to the device.
1622  *
1623  * @port        pointer to the port structure.
1624  * @page        page number to fetch
1625  * @buffer      pointer to buffer
1626  * @buffer_dma  dma address corresponding to @buffer
1627  * @sectors     page length to fetch, in sectors
1628  *
1629  * return value
1630  *      @rv     return value from mtip_exec_internal_command()
1631  */
1632 static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer,
1633                                 dma_addr_t buffer_dma, unsigned int sectors)
1634 {
1635         struct host_to_dev_fis fis;
1636
1637         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1638         fis.type        = 0x27;
1639         fis.opts        = 1 << 7;
1640         fis.command     = ATA_CMD_READ_LOG_EXT;
1641         fis.sect_count  = sectors & 0xFF;
1642         fis.sect_cnt_ex = (sectors >> 8) & 0xFF;
1643         fis.lba_low     = page;
1644         fis.lba_mid     = 0;
1645         fis.device      = ATA_DEVICE_OBS;
1646
1647         memset(buffer, 0, sectors * ATA_SECT_SIZE);
1648
1649         return mtip_exec_internal_command(port,
1650                                         &fis,
1651                                         5,
1652                                         buffer_dma,
1653                                         sectors * ATA_SECT_SIZE,
1654                                         0,
1655                                         GFP_ATOMIC,
1656                                         MTIP_INTERNAL_COMMAND_TIMEOUT_MS);
1657 }
1658
1659 /*
1660  * Issue a SMART READ DATA command to the device.
1661  *
1662  * @port        pointer to the port structure.
1663  * @buffer      pointer to buffer
1664  * @buffer_dma  dma address corresponding to @buffer
1665  *
1666  * return value
1667  *      @rv     return value from mtip_exec_internal_command()
1668  */
1669 static int mtip_get_smart_data(struct mtip_port *port, u8 *buffer,
1670                                         dma_addr_t buffer_dma)
1671 {
1672         struct host_to_dev_fis fis;
1673
1674         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1675         fis.type        = 0x27;
1676         fis.opts        = 1 << 7;
1677         fis.command     = ATA_CMD_SMART;
1678         fis.features    = 0xD0;
1679         fis.sect_count  = 1;
1680         fis.lba_mid     = 0x4F;
1681         fis.lba_hi      = 0xC2;
1682         fis.device      = ATA_DEVICE_OBS;
1683
1684         return mtip_exec_internal_command(port,
1685                                         &fis,
1686                                         5,
1687                                         buffer_dma,
1688                                         ATA_SECT_SIZE,
1689                                         0,
1690                                         GFP_ATOMIC,
1691                                         15000);
1692 }
1693
1694 /*
1695  * Get the value of a smart attribute
1696  *
1697  * @port        pointer to the port structure
1698  * @id          attribute number
1699  * @attrib      pointer to return attrib information corresponding to @id
1700  *
1701  * return value
1702  *      -EINVAL NULL buffer passed or unsupported attribute @id.
1703  *      -EPERM  Identify data not valid, SMART not supported or not enabled
1704  */
1705 static int mtip_get_smart_attr(struct mtip_port *port, unsigned int id,
1706                                                 struct smart_attr *attrib)
1707 {
1708         int rv, i;
1709         struct smart_attr *pattr;
1710
1711         if (!attrib)
1712                 return -EINVAL;
1713
1714         if (!port->identify_valid) {
1715                 dev_warn(&port->dd->pdev->dev, "IDENTIFY DATA not valid\n");
1716                 return -EPERM;
1717         }
1718         if (!(port->identify[82] & 0x1)) {
1719                 dev_warn(&port->dd->pdev->dev, "SMART not supported\n");
1720                 return -EPERM;
1721         }
1722         if (!(port->identify[85] & 0x1)) {
1723                 dev_warn(&port->dd->pdev->dev, "SMART not enabled\n");
1724                 return -EPERM;
1725         }
1726
1727         memset(port->smart_buf, 0, ATA_SECT_SIZE);
1728         rv = mtip_get_smart_data(port, port->smart_buf, port->smart_buf_dma);
1729         if (rv) {
1730                 dev_warn(&port->dd->pdev->dev, "Failed to ge SMART data\n");
1731                 return rv;
1732         }
1733
1734         pattr = (struct smart_attr *)(port->smart_buf + 2);
1735         for (i = 0; i < 29; i++, pattr++)
1736                 if (pattr->attr_id == id) {
1737                         memcpy(attrib, pattr, sizeof(struct smart_attr));
1738                         break;
1739                 }
1740
1741         if (i == 29) {
1742                 dev_warn(&port->dd->pdev->dev,
1743                         "Query for invalid SMART attribute ID\n");
1744                 rv = -EINVAL;
1745         }
1746
1747         return rv;
1748 }
1749
1750 /*
1751  * Trim unused sectors
1752  *
1753  * @dd          pointer to driver_data structure
1754  * @lba         starting lba
1755  * @len         # of 512b sectors to trim
1756  *
1757  * return value
1758  *      -ENOMEM         Out of dma memory
1759  *      -EINVAL         Invalid parameters passed in, trim not supported
1760  *      -EIO            Error submitting trim request to hw
1761  */
1762 static int mtip_send_trim(struct driver_data *dd, unsigned int lba,
1763                                 unsigned int len)
1764 {
1765         int i, rv = 0;
1766         u64 tlba, tlen, sect_left;
1767         struct mtip_trim_entry *buf;
1768         dma_addr_t dma_addr;
1769         struct host_to_dev_fis fis;
1770
1771         if (!len || dd->trim_supp == false)
1772                 return -EINVAL;
1773
1774         /* Trim request too big */
1775         WARN_ON(len > (MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES));
1776
1777         /* Trim request not aligned on 4k boundary */
1778         WARN_ON(len % 8 != 0);
1779
1780         /* Warn if vu_trim structure is too big */
1781         WARN_ON(sizeof(struct mtip_trim) > ATA_SECT_SIZE);
1782
1783         /* Allocate a DMA buffer for the trim structure */
1784         buf = dmam_alloc_coherent(&dd->pdev->dev, ATA_SECT_SIZE, &dma_addr,
1785                                                                 GFP_KERNEL);
1786         if (!buf)
1787                 return -ENOMEM;
1788         memset(buf, 0, ATA_SECT_SIZE);
1789
1790         for (i = 0, sect_left = len, tlba = lba;
1791                         i < MTIP_MAX_TRIM_ENTRIES && sect_left;
1792                         i++) {
1793                 tlen = (sect_left >= MTIP_MAX_TRIM_ENTRY_LEN ?
1794                                         MTIP_MAX_TRIM_ENTRY_LEN :
1795                                         sect_left);
1796                 buf[i].lba = __force_bit2int cpu_to_le32(tlba);
1797                 buf[i].range = __force_bit2int cpu_to_le16(tlen);
1798                 tlba += tlen;
1799                 sect_left -= tlen;
1800         }
1801         WARN_ON(sect_left != 0);
1802
1803         /* Build the fis */
1804         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1805         fis.type       = 0x27;
1806         fis.opts       = 1 << 7;
1807         fis.command    = 0xfb;
1808         fis.features   = 0x60;
1809         fis.sect_count = 1;
1810         fis.device     = ATA_DEVICE_OBS;
1811
1812         if (mtip_exec_internal_command(dd->port,
1813                                         &fis,
1814                                         5,
1815                                         dma_addr,
1816                                         ATA_SECT_SIZE,
1817                                         0,
1818                                         GFP_KERNEL,
1819                                         MTIP_TRIM_TIMEOUT_MS) < 0)
1820                 rv = -EIO;
1821
1822         dmam_free_coherent(&dd->pdev->dev, ATA_SECT_SIZE, buf, dma_addr);
1823         return rv;
1824 }
1825
1826 /*
1827  * Get the drive capacity.
1828  *
1829  * @dd      Pointer to the device data structure.
1830  * @sectors Pointer to the variable that will receive the sector count.
1831  *
1832  * return value
1833  *      1 Capacity was returned successfully.
1834  *      0 The identify information is invalid.
1835  */
1836 static bool mtip_hw_get_capacity(struct driver_data *dd, sector_t *sectors)
1837 {
1838         struct mtip_port *port = dd->port;
1839         u64 total, raw0, raw1, raw2, raw3;
1840         raw0 = port->identify[100];
1841         raw1 = port->identify[101];
1842         raw2 = port->identify[102];
1843         raw3 = port->identify[103];
1844         total = raw0 | raw1<<16 | raw2<<32 | raw3<<48;
1845         *sectors = total;
1846         return (bool) !!port->identify_valid;
1847 }
1848
1849 /*
1850  * Display the identify command data.
1851  *
1852  * @port Pointer to the port data structure.
1853  *
1854  * return value
1855  *      None
1856  */
1857 static void mtip_dump_identify(struct mtip_port *port)
1858 {
1859         sector_t sectors;
1860         unsigned short revid;
1861         char cbuf[42];
1862
1863         if (!port->identify_valid)
1864                 return;
1865
1866         strlcpy(cbuf, (char *)(port->identify+10), 21);
1867         dev_info(&port->dd->pdev->dev,
1868                 "Serial No.: %s\n", cbuf);
1869
1870         strlcpy(cbuf, (char *)(port->identify+23), 9);
1871         dev_info(&port->dd->pdev->dev,
1872                 "Firmware Ver.: %s\n", cbuf);
1873
1874         strlcpy(cbuf, (char *)(port->identify+27), 41);
1875         dev_info(&port->dd->pdev->dev, "Model: %s\n", cbuf);
1876
1877         if (mtip_hw_get_capacity(port->dd, &sectors))
1878                 dev_info(&port->dd->pdev->dev,
1879                         "Capacity: %llu sectors (%llu MB)\n",
1880                          (u64)sectors,
1881                          ((u64)sectors) * ATA_SECT_SIZE >> 20);
1882
1883         pci_read_config_word(port->dd->pdev, PCI_REVISION_ID, &revid);
1884         switch (revid & 0xFF) {
1885         case 0x1:
1886                 strlcpy(cbuf, "A0", 3);
1887                 break;
1888         case 0x3:
1889                 strlcpy(cbuf, "A2", 3);
1890                 break;
1891         default:
1892                 strlcpy(cbuf, "?", 2);
1893                 break;
1894         }
1895         dev_info(&port->dd->pdev->dev,
1896                 "Card Type: %s\n", cbuf);
1897 }
1898
1899 /*
1900  * Map the commands scatter list into the command table.
1901  *
1902  * @command Pointer to the command.
1903  * @nents Number of scatter list entries.
1904  *
1905  * return value
1906  *      None
1907  */
1908 static inline void fill_command_sg(struct driver_data *dd,
1909                                 struct mtip_cmd *command,
1910                                 int nents)
1911 {
1912         int n;
1913         unsigned int dma_len;
1914         struct mtip_cmd_sg *command_sg;
1915         struct scatterlist *sg = command->sg;
1916
1917         command_sg = command->command + AHCI_CMD_TBL_HDR_SZ;
1918
1919         for (n = 0; n < nents; n++) {
1920                 dma_len = sg_dma_len(sg);
1921                 if (dma_len > 0x400000)
1922                         dev_err(&dd->pdev->dev,
1923                                 "DMA segment length truncated\n");
1924                 command_sg->info = __force_bit2int
1925                         cpu_to_le32((dma_len-1) & 0x3FFFFF);
1926                 command_sg->dba = __force_bit2int
1927                         cpu_to_le32(sg_dma_address(sg));
1928                 command_sg->dba_upper = __force_bit2int
1929                         cpu_to_le32((sg_dma_address(sg) >> 16) >> 16);
1930                 command_sg++;
1931                 sg++;
1932         }
1933 }
1934
1935 /*
1936  * @brief Execute a drive command.
1937  *
1938  * return value 0 The command completed successfully.
1939  * return value -1 An error occurred while executing the command.
1940  */
1941 static int exec_drive_task(struct mtip_port *port, u8 *command)
1942 {
1943         struct host_to_dev_fis  fis;
1944         struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG);
1945
1946         /* Build the FIS. */
1947         memset(&fis, 0, sizeof(struct host_to_dev_fis));
1948         fis.type        = 0x27;
1949         fis.opts        = 1 << 7;
1950         fis.command     = command[0];
1951         fis.features    = command[1];
1952         fis.sect_count  = command[2];
1953         fis.sector      = command[3];
1954         fis.cyl_low     = command[4];
1955         fis.cyl_hi      = command[5];
1956         fis.device      = command[6] & ~0x10; /* Clear the dev bit*/
1957
1958         dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n",
1959                 __func__,
1960                 command[0],
1961                 command[1],
1962                 command[2],
1963                 command[3],
1964                 command[4],
1965                 command[5],
1966                 command[6]);
1967
1968         /* Execute the command. */
1969         if (mtip_exec_internal_command(port,
1970                                  &fis,
1971                                  5,
1972                                  0,
1973                                  0,
1974                                  0,
1975                                  GFP_KERNEL,
1976                                  MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) {
1977                 return -1;
1978         }
1979
1980         command[0] = reply->command; /* Status*/
1981         command[1] = reply->features; /* Error*/
1982         command[4] = reply->cyl_low;
1983         command[5] = reply->cyl_hi;
1984
1985         dbg_printk(MTIP_DRV_NAME " %s: Completion Status: stat %x, err %x , cyl_lo %x cyl_hi %x\n",
1986                 __func__,
1987                 command[0],
1988                 command[1],
1989                 command[4],
1990                 command[5]);
1991
1992         return 0;
1993 }
1994
1995 /*
1996  * @brief Execute a drive command.
1997  *
1998  * @param port Pointer to the port data structure.
1999  * @param command Pointer to the user specified command parameters.
2000  * @param user_buffer Pointer to the user space buffer where read sector
2001  *                   data should be copied.
2002  *
2003  * return value 0 The command completed successfully.
2004  * return value -EFAULT An error occurred while copying the completion
2005  *                 data to the user space buffer.
2006  * return value -1 An error occurred while executing the command.
2007  */
2008 static int exec_drive_command(struct mtip_port *port, u8 *command,
2009                                 void __user *user_buffer)
2010 {
2011         struct host_to_dev_fis  fis;
2012         struct host_to_dev_fis *reply;
2013         u8 *buf = NULL;
2014         dma_addr_t dma_addr = 0;
2015         int rv = 0, xfer_sz = command[3];
2016
2017         if (xfer_sz) {
2018                 if (!user_buffer)
2019                         return -EFAULT;
2020
2021                 buf = dmam_alloc_coherent(&port->dd->pdev->dev,
2022                                 ATA_SECT_SIZE * xfer_sz,
2023                                 &dma_addr,
2024                                 GFP_KERNEL);
2025                 if (!buf) {
2026                         dev_err(&port->dd->pdev->dev,
2027                                 "Memory allocation failed (%d bytes)\n",
2028                                 ATA_SECT_SIZE * xfer_sz);
2029                         return -ENOMEM;
2030                 }
2031                 memset(buf, 0, ATA_SECT_SIZE * xfer_sz);
2032         }
2033
2034         /* Build the FIS. */
2035         memset(&fis, 0, sizeof(struct host_to_dev_fis));
2036         fis.type        = 0x27;
2037         fis.opts        = 1 << 7;
2038         fis.command     = command[0];
2039         fis.features    = command[2];
2040         fis.sect_count  = command[3];
2041         if (fis.command == ATA_CMD_SMART) {
2042                 fis.sector      = command[1];
2043                 fis.cyl_low     = 0x4F;
2044                 fis.cyl_hi      = 0xC2;
2045         }
2046
2047         if (xfer_sz)
2048                 reply = (port->rxfis + RX_FIS_PIO_SETUP);
2049         else
2050                 reply = (port->rxfis + RX_FIS_D2H_REG);
2051
2052         dbg_printk(MTIP_DRV_NAME
2053                 " %s: User Command: cmd %x, sect %x, "
2054                 "feat %x, sectcnt %x\n",
2055                 __func__,
2056                 command[0],
2057                 command[1],
2058                 command[2],
2059                 command[3]);
2060
2061         /* Execute the command. */
2062         if (mtip_exec_internal_command(port,
2063                                 &fis,
2064                                  5,
2065                                  (xfer_sz ? dma_addr : 0),
2066                                  (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0),
2067                                  0,
2068                                  GFP_KERNEL,
2069                                  MTIP_IOCTL_COMMAND_TIMEOUT_MS)
2070                                  < 0) {
2071                 rv = -EFAULT;
2072                 goto exit_drive_command;
2073         }
2074
2075         /* Collect the completion status. */
2076         command[0] = reply->command; /* Status*/
2077         command[1] = reply->features; /* Error*/
2078         command[2] = reply->sect_count;
2079
2080         dbg_printk(MTIP_DRV_NAME
2081                 " %s: Completion Status: stat %x, "
2082                 "err %x, nsect %x\n",
2083                 __func__,
2084                 command[0],
2085                 command[1],
2086                 command[2]);
2087
2088         if (xfer_sz) {
2089                 if (copy_to_user(user_buffer,
2090                                  buf,
2091                                  ATA_SECT_SIZE * command[3])) {
2092                         rv = -EFAULT;
2093                         goto exit_drive_command;
2094                 }
2095         }
2096 exit_drive_command:
2097         if (buf)
2098                 dmam_free_coherent(&port->dd->pdev->dev,
2099                                 ATA_SECT_SIZE * xfer_sz, buf, dma_addr);
2100         return rv;
2101 }
2102
2103 /*
2104  *  Indicates whether a command has a single sector payload.
2105  *
2106  *  @command passed to the device to perform the certain event.
2107  *  @features passed to the device to perform the certain event.
2108  *
2109  *  return value
2110  *      1       command is one that always has a single sector payload,
2111  *              regardless of the value in the Sector Count field.
2112  *      0       otherwise
2113  *
2114  */
2115 static unsigned int implicit_sector(unsigned char command,
2116                                     unsigned char features)
2117 {
2118         unsigned int rv = 0;
2119
2120         /* list of commands that have an implicit sector count of 1 */
2121         switch (command) {
2122         case ATA_CMD_SEC_SET_PASS:
2123         case ATA_CMD_SEC_UNLOCK:
2124         case ATA_CMD_SEC_ERASE_PREP:
2125         case ATA_CMD_SEC_ERASE_UNIT:
2126         case ATA_CMD_SEC_FREEZE_LOCK:
2127         case ATA_CMD_SEC_DISABLE_PASS:
2128         case ATA_CMD_PMP_READ:
2129         case ATA_CMD_PMP_WRITE:
2130                 rv = 1;
2131                 break;
2132         case ATA_CMD_SET_MAX:
2133                 if (features == ATA_SET_MAX_UNLOCK)
2134                         rv = 1;
2135                 break;
2136         case ATA_CMD_SMART:
2137                 if ((features == ATA_SMART_READ_VALUES) ||
2138                                 (features == ATA_SMART_READ_THRESHOLDS))
2139                         rv = 1;
2140                 break;
2141         case ATA_CMD_CONF_OVERLAY:
2142                 if ((features == ATA_DCO_IDENTIFY) ||
2143                                 (features == ATA_DCO_SET))
2144                         rv = 1;
2145                 break;
2146         }
2147         return rv;
2148 }
2149 static void mtip_set_timeout(struct driver_data *dd,
2150                                         struct host_to_dev_fis *fis,
2151                                         unsigned int *timeout, u8 erasemode)
2152 {
2153         switch (fis->command) {
2154         case ATA_CMD_DOWNLOAD_MICRO:
2155                 *timeout = 120000; /* 2 minutes */
2156                 break;
2157         case ATA_CMD_SEC_ERASE_UNIT:
2158         case 0xFC:
2159                 if (erasemode)
2160                         *timeout = ((*(dd->port->identify + 90) * 2) * 60000);
2161                 else
2162                         *timeout = ((*(dd->port->identify + 89) * 2) * 60000);
2163                 break;
2164         case ATA_CMD_STANDBYNOW1:
2165                 *timeout = 120000;  /* 2 minutes */
2166                 break;
2167         case 0xF7:
2168         case 0xFA:
2169                 *timeout = 60000;  /* 60 seconds */
2170                 break;
2171         case ATA_CMD_SMART:
2172                 *timeout = 15000;  /* 15 seconds */
2173                 break;
2174         default:
2175                 *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS;
2176                 break;
2177         }
2178 }
2179
2180 /*
2181  * Executes a taskfile
2182  * See ide_taskfile_ioctl() for derivation
2183  */
2184 static int exec_drive_taskfile(struct driver_data *dd,
2185                                void __user *buf,
2186                                ide_task_request_t *req_task,
2187                                int outtotal)
2188 {
2189         struct host_to_dev_fis  fis;
2190         struct host_to_dev_fis *reply;
2191         u8 *outbuf = NULL;
2192         u8 *inbuf = NULL;
2193         dma_addr_t outbuf_dma = 0;
2194         dma_addr_t inbuf_dma = 0;
2195         dma_addr_t dma_buffer = 0;
2196         int err = 0;
2197         unsigned int taskin = 0;
2198         unsigned int taskout = 0;
2199         u8 nsect = 0;
2200         unsigned int timeout;
2201         unsigned int force_single_sector;
2202         unsigned int transfer_size;
2203         unsigned long task_file_data;
2204         int intotal = outtotal + req_task->out_size;
2205         int erasemode = 0;
2206
2207         taskout = req_task->out_size;
2208         taskin = req_task->in_size;
2209         /* 130560 = 512 * 0xFF*/
2210         if (taskin > 130560 || taskout > 130560) {
2211                 err = -EINVAL;
2212                 goto abort;
2213         }
2214
2215         if (taskout) {
2216                 outbuf = kzalloc(taskout, GFP_KERNEL);
2217                 if (outbuf == NULL) {
2218                         err = -ENOMEM;
2219                         goto abort;
2220                 }
2221                 if (copy_from_user(outbuf, buf + outtotal, taskout)) {
2222                         err = -EFAULT;
2223                         goto abort;
2224                 }
2225                 outbuf_dma = pci_map_single(dd->pdev,
2226                                          outbuf,
2227                                          taskout,
2228                                          DMA_TO_DEVICE);
2229                 if (outbuf_dma == 0) {
2230                         err = -ENOMEM;
2231                         goto abort;
2232                 }
2233                 dma_buffer = outbuf_dma;
2234         }
2235
2236         if (taskin) {
2237                 inbuf = kzalloc(taskin, GFP_KERNEL);
2238                 if (inbuf == NULL) {
2239                         err = -ENOMEM;
2240                         goto abort;
2241                 }
2242
2243                 if (copy_from_user(inbuf, buf + intotal, taskin)) {
2244                         err = -EFAULT;
2245                         goto abort;
2246                 }
2247                 inbuf_dma = pci_map_single(dd->pdev,
2248                                          inbuf,
2249                                          taskin, DMA_FROM_DEVICE);
2250                 if (inbuf_dma == 0) {
2251                         err = -ENOMEM;
2252                         goto abort;
2253                 }
2254                 dma_buffer = inbuf_dma;
2255         }
2256
2257         /* only supports PIO and non-data commands from this ioctl. */
2258         switch (req_task->data_phase) {
2259         case TASKFILE_OUT:
2260                 nsect = taskout / ATA_SECT_SIZE;
2261                 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2262                 break;
2263         case TASKFILE_IN:
2264                 reply = (dd->port->rxfis + RX_FIS_PIO_SETUP);
2265                 break;
2266         case TASKFILE_NO_DATA:
2267                 reply = (dd->port->rxfis + RX_FIS_D2H_REG);
2268                 break;
2269         default:
2270                 err = -EINVAL;
2271                 goto abort;
2272         }
2273
2274         /* Build the FIS. */
2275         memset(&fis, 0, sizeof(struct host_to_dev_fis));
2276
2277         fis.type        = 0x27;
2278         fis.opts        = 1 << 7;
2279         fis.command     = req_task->io_ports[7];
2280         fis.features    = req_task->io_ports[1];
2281         fis.sect_count  = req_task->io_ports[2];
2282         fis.lba_low     = req_task->io_ports[3];
2283         fis.lba_mid     = req_task->io_ports[4];
2284         fis.lba_hi      = req_task->io_ports[5];
2285          /* Clear the dev bit*/
2286         fis.device      = req_task->io_ports[6] & ~0x10;
2287
2288         if ((req_task->in_flags.all == 0) && (req_task->out_flags.all & 1)) {
2289                 req_task->in_flags.all  =
2290                         IDE_TASKFILE_STD_IN_FLAGS |
2291                         (IDE_HOB_STD_IN_FLAGS << 8);
2292                 fis.lba_low_ex          = req_task->hob_ports[3];
2293                 fis.lba_mid_ex          = req_task->hob_ports[4];
2294                 fis.lba_hi_ex           = req_task->hob_ports[5];
2295                 fis.features_ex         = req_task->hob_ports[1];
2296                 fis.sect_cnt_ex         = req_task->hob_ports[2];
2297
2298         } else {
2299                 req_task->in_flags.all = IDE_TASKFILE_STD_IN_FLAGS;
2300         }
2301
2302         force_single_sector = implicit_sector(fis.command, fis.features);
2303
2304         if ((taskin || taskout) && (!fis.sect_count)) {
2305                 if (nsect)
2306                         fis.sect_count = nsect;
2307                 else {
2308                         if (!force_single_sector) {
2309                                 dev_warn(&dd->pdev->dev,
2310                                         "data movement but "
2311                                         "sect_count is 0\n");
2312                                         err = -EINVAL;
2313                                         goto abort;
2314                         }
2315                 }
2316         }
2317
2318         dbg_printk(MTIP_DRV_NAME
2319                 " %s: cmd %x, feat %x, nsect %x,"
2320                 " sect/lbal %x, lcyl/lbam %x, hcyl/lbah %x,"
2321                 " head/dev %x\n",
2322                 __func__,
2323                 fis.command,
2324                 fis.features,
2325                 fis.sect_count,
2326                 fis.lba_low,
2327                 fis.lba_mid,
2328                 fis.lba_hi,
2329                 fis.device);
2330
2331         /* check for erase mode support during secure erase.*/
2332         if ((fis.command == ATA_CMD_SEC_ERASE_UNIT) && outbuf &&
2333                                         (outbuf[0] & MTIP_SEC_ERASE_MODE)) {
2334                 erasemode = 1;
2335         }
2336
2337         mtip_set_timeout(dd, &fis, &timeout, erasemode);
2338
2339         /* Determine the correct transfer size.*/
2340         if (force_single_sector)
2341                 transfer_size = ATA_SECT_SIZE;
2342         else
2343                 transfer_size = ATA_SECT_SIZE * fis.sect_count;
2344
2345         /* Execute the command.*/
2346         if (mtip_exec_internal_command(dd->port,
2347                                  &fis,
2348                                  5,
2349                                  dma_buffer,
2350                                  transfer_size,
2351                                  0,
2352                                  GFP_KERNEL,
2353                                  timeout) < 0) {
2354                 err = -EIO;
2355                 goto abort;
2356         }
2357
2358         task_file_data = readl(dd->port->mmio+PORT_TFDATA);
2359
2360         if ((req_task->data_phase == TASKFILE_IN) && !(task_file_data & 1)) {
2361                 reply = dd->port->rxfis + RX_FIS_PIO_SETUP;
2362                 req_task->io_ports[7] = reply->control;
2363         } else {
2364                 reply = dd->port->rxfis + RX_FIS_D2H_REG;
2365                 req_task->io_ports[7] = reply->command;
2366         }
2367
2368         /* reclaim the DMA buffers.*/
2369         if (inbuf_dma)
2370                 pci_unmap_single(dd->pdev, inbuf_dma,
2371                         taskin, DMA_FROM_DEVICE);
2372         if (outbuf_dma)
2373                 pci_unmap_single(dd->pdev, outbuf_dma,
2374                         taskout, DMA_TO_DEVICE);
2375         inbuf_dma  = 0;
2376         outbuf_dma = 0;
2377
2378         /* return the ATA registers to the caller.*/
2379         req_task->io_ports[1] = reply->features;
2380         req_task->io_ports[2] = reply->sect_count;
2381         req_task->io_ports[3] = reply->lba_low;
2382         req_task->io_ports[4] = reply->lba_mid;
2383         req_task->io_ports[5] = reply->lba_hi;
2384         req_task->io_ports[6] = reply->device;
2385
2386         if (req_task->out_flags.all & 1)  {
2387
2388                 req_task->hob_ports[3] = reply->lba_low_ex;
2389                 req_task->hob_ports[4] = reply->lba_mid_ex;
2390                 req_task->hob_ports[5] = reply->lba_hi_ex;
2391                 req_task->hob_ports[1] = reply->features_ex;
2392                 req_task->hob_ports[2] = reply->sect_cnt_ex;
2393         }
2394         dbg_printk(MTIP_DRV_NAME
2395                 " %s: Completion: stat %x,"
2396                 "err %x, sect_cnt %x, lbalo %x,"
2397                 "lbamid %x, lbahi %x, dev %x\n",
2398                 __func__,
2399                 req_task->io_ports[7],
2400                 req_task->io_ports[1],
2401                 req_task->io_ports[2],
2402                 req_task->io_ports[3],
2403                 req_task->io_ports[4],
2404                 req_task->io_ports[5],
2405                 req_task->io_ports[6]);
2406
2407         if (taskout) {
2408                 if (copy_to_user(buf + outtotal, outbuf, taskout)) {
2409                         err = -EFAULT;
2410                         goto abort;
2411                 }
2412         }
2413         if (taskin) {
2414                 if (copy_to_user(buf + intotal, inbuf, taskin)) {
2415                         err = -EFAULT;
2416                         goto abort;
2417                 }
2418         }
2419 abort:
2420         if (inbuf_dma)
2421                 pci_unmap_single(dd->pdev, inbuf_dma,
2422                                         taskin, DMA_FROM_DEVICE);
2423         if (outbuf_dma)
2424                 pci_unmap_single(dd->pdev, outbuf_dma,
2425                                         taskout, DMA_TO_DEVICE);
2426         kfree(outbuf);
2427         kfree(inbuf);
2428
2429         return err;
2430 }
2431
2432 /*
2433  * Handle IOCTL calls from the Block Layer.
2434  *
2435  * This function is called by the Block Layer when it receives an IOCTL
2436  * command that it does not understand. If the IOCTL command is not supported
2437  * this function returns -ENOTTY.
2438  *
2439  * @dd  Pointer to the driver data structure.
2440  * @cmd IOCTL command passed from the Block Layer.
2441  * @arg IOCTL argument passed from the Block Layer.
2442  *
2443  * return value
2444  *      0       The IOCTL completed successfully.
2445  *      -ENOTTY The specified command is not supported.
2446  *      -EFAULT An error occurred copying data to a user space buffer.
2447  *      -EIO    An error occurred while executing the command.
2448  */
2449 static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
2450                          unsigned long arg)
2451 {
2452         switch (cmd) {
2453         case HDIO_GET_IDENTITY:
2454         {
2455                 if (copy_to_user((void __user *)arg, dd->port->identify,
2456                                                 sizeof(u16) * ATA_ID_WORDS))
2457                         return -EFAULT;
2458                 break;
2459         }
2460         case HDIO_DRIVE_CMD:
2461         {
2462                 u8 drive_command[4];
2463
2464                 /* Copy the user command info to our buffer. */
2465                 if (copy_from_user(drive_command,
2466                                          (void __user *) arg,
2467                                          sizeof(drive_command)))
2468                         return -EFAULT;
2469
2470                 /* Execute the drive command. */
2471                 if (exec_drive_command(dd->port,
2472                                          drive_command,
2473                                          (void __user *) (arg+4)))
2474                         return -EIO;
2475
2476                 /* Copy the status back to the users buffer. */
2477                 if (copy_to_user((void __user *) arg,
2478                                          drive_command,
2479                                          sizeof(drive_command)))
2480                         return -EFAULT;
2481
2482                 break;
2483         }
2484         case HDIO_DRIVE_TASK:
2485         {
2486                 u8 drive_command[7];
2487
2488                 /* Copy the user command info to our buffer. */
2489                 if (copy_from_user(drive_command,
2490                                          (void __user *) arg,
2491                                          sizeof(drive_command)))
2492                         return -EFAULT;
2493
2494                 /* Execute the drive command. */
2495                 if (exec_drive_task(dd->port, drive_command))
2496                         return -EIO;
2497
2498                 /* Copy the status back to the users buffer. */
2499                 if (copy_to_user((void __user *) arg,
2500                                          drive_command,
2501                                          sizeof(drive_command)))
2502                         return -EFAULT;
2503
2504                 break;
2505         }
2506         case HDIO_DRIVE_TASKFILE: {
2507                 ide_task_request_t req_task;
2508                 int ret, outtotal;
2509
2510                 if (copy_from_user(&req_task, (void __user *) arg,
2511                                         sizeof(req_task)))
2512                         return -EFAULT;
2513
2514                 outtotal = sizeof(req_task);
2515
2516                 ret = exec_drive_taskfile(dd, (void __user *) arg,
2517                                                 &req_task, outtotal);
2518
2519                 if (copy_to_user((void __user *) arg, &req_task,
2520                                                         sizeof(req_task)))
2521                         return -EFAULT;
2522
2523                 return ret;
2524         }
2525
2526         default:
2527                 return -EINVAL;
2528         }
2529         return 0;
2530 }
2531
2532 /*
2533  * Submit an IO to the hw
2534  *
2535  * This function is called by the block layer to issue an io
2536  * to the device. Upon completion, the callback function will
2537  * be called with the data parameter passed as the callback data.
2538  *
2539  * @dd       Pointer to the driver data structure.
2540  * @start    First sector to read.
2541  * @nsect    Number of sectors to read.
2542  * @nents    Number of entries in scatter list for the read command.
2543  * @tag      The tag of this read command.
2544  * @callback Pointer to the function that should be called
2545  *           when the read completes.
2546  * @data     Callback data passed to the callback function
2547  *           when the read completes.
2548  * @dir      Direction (read or write)
2549  *
2550  * return value
2551  *      None
2552  */
2553 static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
2554                               int nsect, int nents, int tag, void *callback,
2555                               void *data, int dir)
2556 {
2557         struct host_to_dev_fis  *fis;
2558         struct mtip_port *port = dd->port;
2559         struct mtip_cmd *command = &port->commands[tag];
2560         int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
2561         u64 start = sector;
2562
2563         /* Map the scatter list for DMA access */
2564         nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir);
2565
2566         command->scatter_ents = nents;
2567
2568         /*
2569          * The number of retries for this command before it is
2570          * reported as a failure to the upper layers.
2571          */
2572         command->retries = MTIP_MAX_RETRIES;
2573
2574         /* Fill out fis */
2575         fis = command->command;
2576         fis->type        = 0x27;
2577         fis->opts        = 1 << 7;
2578         fis->command     =
2579                 (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE);
2580         fis->lba_low     = start & 0xFF;
2581         fis->lba_mid     = (start >> 8) & 0xFF;
2582         fis->lba_hi      = (start >> 16) & 0xFF;
2583         fis->lba_low_ex  = (start >> 24) & 0xFF;
2584         fis->lba_mid_ex  = (start >> 32) & 0xFF;
2585         fis->lba_hi_ex   = (start >> 40) & 0xFF;
2586         fis->device      = 1 << 6;
2587         fis->features    = nsect & 0xFF;
2588         fis->features_ex = (nsect >> 8) & 0xFF;
2589         fis->sect_count  = ((tag << 3) | (tag >> 5));
2590         fis->sect_cnt_ex = 0;
2591         fis->control     = 0;
2592         fis->res2        = 0;
2593         fis->res3        = 0;
2594         fill_command_sg(dd, command, nents);
2595
2596         /* Populate the command header */
2597         command->command_header->opts =
2598                         __force_bit2int cpu_to_le32(
2599                                 (nents << 16) | 5 | AHCI_CMD_PREFETCH);
2600         command->command_header->byte_count = 0;
2601
2602         /*
2603          * Set the completion function and data for the command
2604          * within this layer.
2605          */
2606         command->comp_data = dd;
2607         command->comp_func = mtip_async_complete;
2608         command->direction = dma_dir;
2609
2610         /*
2611          * Set the completion function and data for the command passed
2612          * from the upper layer.
2613          */
2614         command->async_data = data;
2615         command->async_callback = callback;
2616
2617         /*
2618          * To prevent this command from being issued
2619          * if an internal command is in progress or error handling is active.
2620          */
2621         if (port->flags & MTIP_PF_PAUSE_IO) {
2622                 set_bit(tag, port->cmds_to_issue);
2623                 set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
2624                 return;
2625         }
2626
2627         /* Issue the command to the hardware */
2628         mtip_issue_ncq_command(port, tag);
2629
2630         return;
2631 }
2632
2633 /*
2634  * Release a command slot.
2635  *
2636  * @dd  Pointer to the driver data structure.
2637  * @tag Slot tag
2638  *
2639  * return value
2640  *      None
2641  */
2642 static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag)
2643 {
2644         release_slot(dd->port, tag);
2645 }
2646
2647 /*
2648  * Obtain a command slot and return its associated scatter list.
2649  *
2650  * @dd  Pointer to the driver data structure.
2651  * @tag Pointer to an int that will receive the allocated command
2652  *            slot tag.
2653  *
2654  * return value
2655  *      Pointer to the scatter list for the allocated command slot
2656  *      or NULL if no command slots are available.
2657  */
2658 static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd,
2659                                                    int *tag)
2660 {
2661         /*
2662          * It is possible that, even with this semaphore, a thread
2663          * may think that no command slots are available. Therefore, we
2664          * need to make an attempt to get_slot().
2665          */
2666         down(&dd->port->cmd_slot);
2667         *tag = get_slot(dd->port);
2668
2669         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
2670                 up(&dd->port->cmd_slot);
2671                 return NULL;
2672         }
2673         if (unlikely(*tag < 0)) {
2674                 up(&dd->port->cmd_slot);
2675                 return NULL;
2676         }
2677
2678         return dd->port->commands[*tag].sg;
2679 }
2680
2681 /*
2682  * Sysfs status dump.
2683  *
2684  * @dev  Pointer to the device structure, passed by the kernrel.
2685  * @attr Pointer to the device_attribute structure passed by the kernel.
2686  * @buf  Pointer to the char buffer that will receive the stats info.
2687  *
2688  * return value
2689  *      The size, in bytes, of the data copied into buf.
2690  */
2691 static ssize_t mtip_hw_show_status(struct device *dev,
2692                                 struct device_attribute *attr,
2693                                 char *buf)
2694 {
2695         struct driver_data *dd = dev_to_disk(dev)->private_data;
2696         int size = 0;
2697
2698         if (test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))
2699                 size += sprintf(buf, "%s", "thermal_shutdown\n");
2700         else if (test_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag))
2701                 size += sprintf(buf, "%s", "write_protect\n");
2702         else
2703                 size += sprintf(buf, "%s", "online\n");
2704
2705         return size;
2706 }
2707
2708 static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
2709
2710 static ssize_t mtip_hw_read_registers(struct file *f, char __user *ubuf,
2711                                   size_t len, loff_t *offset)
2712 {
2713         struct driver_data *dd =  (struct driver_data *)f->private_data;
2714         char buf[MTIP_DFS_MAX_BUF_SIZE];
2715         u32 group_allocated;
2716         int size = *offset;
2717         int n;
2718
2719         if (!len || size)
2720                 return 0;
2721
2722         size += sprintf(&buf[size], "H/ S ACTive      : [ 0x");
2723
2724         for (n = dd->slot_groups-1; n >= 0; n--)
2725                 size += sprintf(&buf[size], "%08X ",
2726                                          readl(dd->port->s_active[n]));
2727
2728         size += sprintf(&buf[size], "]\n");
2729         size += sprintf(&buf[size], "H/ Command Issue : [ 0x");
2730
2731         for (n = dd->slot_groups-1; n >= 0; n--)
2732                 size += sprintf(&buf[size], "%08X ",
2733                                         readl(dd->port->cmd_issue[n]));
2734
2735         size += sprintf(&buf[size], "]\n");
2736         size += sprintf(&buf[size], "H/ Completed     : [ 0x");
2737
2738         for (n = dd->slot_groups-1; n >= 0; n--)
2739                 size += sprintf(&buf[size], "%08X ",
2740                                 readl(dd->port->completed[n]));
2741
2742         size += sprintf(&buf[size], "]\n");
2743         size += sprintf(&buf[size], "H/ PORT IRQ STAT : [ 0x%08X ]\n",
2744                                 readl(dd->port->mmio + PORT_IRQ_STAT));
2745         size += sprintf(&buf[size], "H/ HOST IRQ STAT : [ 0x%08X ]\n",
2746                                 readl(dd->mmio + HOST_IRQ_STAT));
2747         size += sprintf(&buf[size], "\n");
2748
2749         size += sprintf(&buf[size], "L/ Allocated     : [ 0x");
2750
2751         for (n = dd->slot_groups-1; n >= 0; n--) {
2752                 if (sizeof(long) > sizeof(u32))
2753                         group_allocated =
2754                                 dd->port->allocated[n/2] >> (32*(n&1));
2755                 else
2756                         group_allocated = dd->port->allocated[n];
2757                 size += sprintf(&buf[size], "%08X ", group_allocated);
2758         }
2759         size += sprintf(&buf[size], "]\n");
2760
2761         size += sprintf(&buf[size], "L/ Commands in Q : [ 0x");
2762
2763         for (n = dd->slot_groups-1; n >= 0; n--) {
2764                 if (sizeof(long) > sizeof(u32))
2765                         group_allocated =
2766                                 dd->port->cmds_to_issue[n/2] >> (32*(n&1));
2767                 else
2768                         group_allocated = dd->port->cmds_to_issue[n];
2769                 size += sprintf(&buf[size], "%08X ", group_allocated);
2770         }
2771         size += sprintf(&buf[size], "]\n");
2772
2773         *offset = size <= len ? size : len;
2774         size = copy_to_user(ubuf, buf, *offset);
2775         if (size)
2776                 return -EFAULT;
2777
2778         return *offset;
2779 }
2780
2781 static ssize_t mtip_hw_read_flags(struct file *f, char __user *ubuf,
2782                                   size_t len, loff_t *offset)
2783 {
2784         struct driver_data *dd =  (struct driver_data *)f->private_data;
2785         char buf[MTIP_DFS_MAX_BUF_SIZE];
2786         int size = *offset;
2787
2788         if (!len || size)
2789                 return 0;
2790
2791         size += sprintf(&buf[size], "Flag-port : [ %08lX ]\n",
2792                                                         dd->port->flags);
2793         size += sprintf(&buf[size], "Flag-dd   : [ %08lX ]\n",
2794                                                         dd->dd_flag);
2795
2796         *offset = size <= len ? size : len;
2797         size = copy_to_user(ubuf, buf, *offset);
2798         if (size)
2799                 return -EFAULT;
2800
2801         return *offset;
2802 }
2803
2804 static const struct file_operations mtip_regs_fops = {
2805         .owner  = THIS_MODULE,
2806         .open   = simple_open,
2807         .read   = mtip_hw_read_registers,
2808         .llseek = no_llseek,
2809 };
2810
2811 static const struct file_operations mtip_flags_fops = {
2812         .owner  = THIS_MODULE,
2813         .open   = simple_open,
2814         .read   = mtip_hw_read_flags,
2815         .llseek = no_llseek,
2816 };
2817
2818 /*
2819  * Create the sysfs related attributes.
2820  *
2821  * @dd   Pointer to the driver data structure.
2822  * @kobj Pointer to the kobj for the block device.
2823  *
2824  * return value
2825  *      0       Operation completed successfully.
2826  *      -EINVAL Invalid parameter.
2827  */
2828 static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
2829 {
2830         if (!kobj || !dd)
2831                 return -EINVAL;
2832
2833         if (sysfs_create_file(kobj, &dev_attr_status.attr))
2834                 dev_warn(&dd->pdev->dev,
2835                         "Error creating 'status' sysfs entry\n");
2836         return 0;
2837 }
2838
2839 /*
2840  * Remove the sysfs related attributes.
2841  *
2842  * @dd   Pointer to the driver data structure.
2843  * @kobj Pointer to the kobj for the block device.
2844  *
2845  * return value
2846  *      0       Operation completed successfully.
2847  *      -EINVAL Invalid parameter.
2848  */
2849 static int mtip_hw_sysfs_exit(struct driver_data *dd, struct kobject *kobj)
2850 {
2851         if (!kobj || !dd)
2852                 return -EINVAL;
2853
2854         sysfs_remove_file(kobj, &dev_attr_status.attr);
2855
2856         return 0;
2857 }
2858
2859 static int mtip_hw_debugfs_init(struct driver_data *dd)
2860 {
2861         if (!dfs_parent)
2862                 return -1;
2863
2864         dd->dfs_node = debugfs_create_dir(dd->disk->disk_name, dfs_parent);
2865         if (IS_ERR_OR_NULL(dd->dfs_node)) {
2866                 dev_warn(&dd->pdev->dev,
2867                         "Error creating node %s under debugfs\n",
2868                                                 dd->disk->disk_name);
2869                 dd->dfs_node = NULL;
2870                 return -1;
2871         }
2872
2873         debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
2874                                                         &mtip_flags_fops);
2875         debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
2876                                                         &mtip_regs_fops);
2877
2878         return 0;
2879 }
2880
2881 static void mtip_hw_debugfs_exit(struct driver_data *dd)
2882 {
2883         debugfs_remove_recursive(dd->dfs_node);
2884 }
2885
2886
2887 /*
2888  * Perform any init/resume time hardware setup
2889  *
2890  * @dd Pointer to the driver data structure.
2891  *
2892  * return value
2893  *      None
2894  */
2895 static inline void hba_setup(struct driver_data *dd)
2896 {
2897         u32 hwdata;
2898         hwdata = readl(dd->mmio + HOST_HSORG);
2899
2900         /* interrupt bug workaround: use only 1 IS bit.*/
2901         writel(hwdata |
2902                 HSORG_DISABLE_SLOTGRP_INTR |
2903                 HSORG_DISABLE_SLOTGRP_PXIS,
2904                 dd->mmio + HOST_HSORG);
2905 }
2906
2907 /*
2908  * Detect the details of the product, and store anything needed
2909  * into the driver data structure.  This includes product type and
2910  * version and number of slot groups.
2911  *
2912  * @dd Pointer to the driver data structure.
2913  *
2914  * return value
2915  *      None
2916  */
2917 static void mtip_detect_product(struct driver_data *dd)
2918 {
2919         u32 hwdata;
2920         unsigned int rev, slotgroups;
2921
2922         /*
2923          * HBA base + 0xFC [15:0] - vendor-specific hardware interface
2924          * info register:
2925          * [15:8] hardware/software interface rev#
2926          * [   3] asic-style interface
2927          * [ 2:0] number of slot groups, minus 1 (only valid for asic-style).
2928          */
2929         hwdata = readl(dd->mmio + HOST_HSORG);
2930
2931         dd->product_type = MTIP_PRODUCT_UNKNOWN;
2932         dd->slot_groups = 1;
2933
2934         if (hwdata & 0x8) {
2935                 dd->product_type = MTIP_PRODUCT_ASICFPGA;
2936                 rev = (hwdata & HSORG_HWREV) >> 8;
2937                 slotgroups = (hwdata & HSORG_SLOTGROUPS) + 1;
2938                 dev_info(&dd->pdev->dev,
2939                         "ASIC-FPGA design, HS rev 0x%x, "
2940                         "%i slot groups [%i slots]\n",
2941                          rev,
2942                          slotgroups,
2943                          slotgroups * 32);
2944
2945                 if (slotgroups > MTIP_MAX_SLOT_GROUPS) {
2946                         dev_warn(&dd->pdev->dev,
2947                                 "Warning: driver only supports "
2948                                 "%i slot groups.\n", MTIP_MAX_SLOT_GROUPS);
2949                         slotgroups = MTIP_MAX_SLOT_GROUPS;
2950                 }
2951                 dd->slot_groups = slotgroups;
2952                 return;
2953         }
2954
2955         dev_warn(&dd->pdev->dev, "Unrecognized product id\n");
2956 }
2957
2958 /*
2959  * Blocking wait for FTL rebuild to complete
2960  *
2961  * @dd Pointer to the DRIVER_DATA structure.
2962  *
2963  * return value
2964  *      0       FTL rebuild completed successfully
2965  *      -EFAULT FTL rebuild error/timeout/interruption
2966  */
2967 static int mtip_ftl_rebuild_poll(struct driver_data *dd)
2968 {
2969         unsigned long timeout, cnt = 0, start;
2970
2971         dev_warn(&dd->pdev->dev,
2972                 "FTL rebuild in progress. Polling for completion.\n");
2973
2974         start = jiffies;
2975         timeout = jiffies + msecs_to_jiffies(MTIP_FTL_REBUILD_TIMEOUT_MS);
2976
2977         do {
2978                 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
2979                                 &dd->dd_flag)))
2980                         return -EFAULT;
2981                 if (mtip_check_surprise_removal(dd->pdev))
2982                         return -EFAULT;
2983
2984                 if (mtip_get_identify(dd->port, NULL) < 0)
2985                         return -EFAULT;
2986
2987                 if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
2988                         MTIP_FTL_REBUILD_MAGIC) {
2989                         ssleep(1);
2990                         /* Print message every 3 minutes */
2991                         if (cnt++ >= 180) {
2992                                 dev_warn(&dd->pdev->dev,
2993                                 "FTL rebuild in progress (%d secs).\n",
2994                                 jiffies_to_msecs(jiffies - start) / 1000);
2995                                 cnt = 0;
2996                         }
2997                 } else {
2998                         dev_warn(&dd->pdev->dev,
2999                                 "FTL rebuild complete (%d secs).\n",
3000                         jiffies_to_msecs(jiffies - start) / 1000);
3001                         mtip_block_initialize(dd);
3002                         return 0;
3003                 }
3004                 ssleep(10);
3005         } while (time_before(jiffies, timeout));
3006
3007         /* Check for timeout */
3008         dev_err(&dd->pdev->dev,
3009                 "Timed out waiting for FTL rebuild to complete (%d secs).\n",
3010                 jiffies_to_msecs(jiffies - start) / 1000);
3011         return -EFAULT;
3012 }
3013
3014 /*
3015  * service thread to issue queued commands
3016  *
3017  * @data Pointer to the driver data structure.
3018  *
3019  * return value
3020  *      0
3021  */
3022
3023 static int mtip_service_thread(void *data)
3024 {
3025         struct driver_data *dd = (struct driver_data *)data;
3026         unsigned long slot, slot_start, slot_wrap;
3027         unsigned int num_cmd_slots = dd->slot_groups * 32;
3028         struct mtip_port *port = dd->port;
3029
3030         while (1) {
3031                 /*
3032                  * the condition is to check neither an internal command is
3033                  * is in progress nor error handling is active
3034                  */
3035                 wait_event_interruptible(port->svc_wait, (port->flags) &&
3036                         !(port->flags & MTIP_PF_PAUSE_IO));
3037
3038                 if (kthread_should_stop())
3039                         break;
3040
3041                 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3042                                 &dd->dd_flag)))
3043                         break;
3044
3045                 set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3046                 if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) {
3047                         slot = 1;
3048                         /* used to restrict the loop to one iteration */
3049                         slot_start = num_cmd_slots;
3050                         slot_wrap = 0;
3051                         while (1) {
3052                                 slot = find_next_bit(port->cmds_to_issue,
3053                                                 num_cmd_slots, slot);
3054                                 if (slot_wrap == 1) {
3055                                         if ((slot_start >= slot) ||
3056                                                 (slot >= num_cmd_slots))
3057                                                 break;
3058                                 }
3059                                 if (unlikely(slot_start == num_cmd_slots))
3060                                         slot_start = slot;
3061
3062                                 if (unlikely(slot == num_cmd_slots)) {
3063                                         slot = 1;
3064                                         slot_wrap = 1;
3065                                         continue;
3066                                 }
3067
3068                                 /* Issue the command to the hardware */
3069                                 mtip_issue_ncq_command(port, slot);
3070
3071                                 clear_bit(slot, port->cmds_to_issue);
3072                         }
3073
3074                         clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags);
3075                 } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) {
3076                         if (!mtip_ftl_rebuild_poll(dd))
3077                                 set_bit(MTIP_DDF_REBUILD_FAILED_BIT,
3078                                                         &dd->dd_flag);
3079                         clear_bit(MTIP_PF_REBUILD_BIT, &port->flags);
3080                 }
3081                 clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags);
3082
3083                 if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags))
3084                         break;
3085         }
3086         return 0;
3087 }
3088
3089 /*
3090  * Called once for each card.
3091  *
3092  * @dd Pointer to the driver data structure.
3093  *
3094  * return value
3095  *      0 on success, else an error code.
3096  */
3097 static int mtip_hw_init(struct driver_data *dd)
3098 {
3099         int i;
3100         int rv;
3101         unsigned int num_command_slots;
3102         unsigned long timeout, timetaken;
3103         unsigned char *buf;
3104         struct smart_attr attr242;
3105
3106         dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR];
3107
3108         mtip_detect_product(dd);
3109         if (dd->product_type == MTIP_PRODUCT_UNKNOWN) {
3110                 rv = -EIO;
3111                 goto out1;
3112         }
3113         num_command_slots = dd->slot_groups * 32;
3114
3115         hba_setup(dd);
3116
3117         dd->port = kzalloc_node(sizeof(struct mtip_port), GFP_KERNEL,
3118                                 dd->numa_node);
3119         if (!dd->port) {
3120                 dev_err(&dd->pdev->dev,
3121                         "Memory allocation: port structure\n");
3122                 return -ENOMEM;
3123         }
3124
3125         /* Continue workqueue setup */
3126         for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3127                 dd->work[i].port = dd->port;
3128
3129         /* Counting semaphore to track command slot usage */
3130         sema_init(&dd->port->cmd_slot, num_command_slots - 1);
3131
3132         /* Spinlock to prevent concurrent issue */
3133         for (i = 0; i < MTIP_MAX_SLOT_GROUPS; i++)
3134                 spin_lock_init(&dd->port->cmd_issue_lock[i]);
3135
3136         /* Set the port mmio base address. */
3137         dd->port->mmio  = dd->mmio + PORT_OFFSET;
3138         dd->port->dd    = dd;
3139
3140         /* Allocate memory for the command list. */
3141         dd->port->command_list =
3142                 dmam_alloc_coherent(&dd->pdev->dev,
3143                         HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3144                         &dd->port->command_list_dma,
3145                         GFP_KERNEL);
3146         if (!dd->port->command_list) {
3147                 dev_err(&dd->pdev->dev,
3148                         "Memory allocation: command list\n");
3149                 rv = -ENOMEM;
3150                 goto out1;
3151         }
3152
3153         /* Clear the memory we have allocated. */
3154         memset(dd->port->command_list,
3155                 0,
3156                 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4));
3157
3158         /* Setup the addresse of the RX FIS. */
3159         dd->port->rxfis     = dd->port->command_list + HW_CMD_SLOT_SZ;
3160         dd->port->rxfis_dma = dd->port->command_list_dma + HW_CMD_SLOT_SZ;
3161
3162         /* Setup the address of the command tables. */
3163         dd->port->command_table   = dd->port->rxfis + AHCI_RX_FIS_SZ;
3164         dd->port->command_tbl_dma = dd->port->rxfis_dma + AHCI_RX_FIS_SZ;
3165
3166         /* Setup the address of the identify data. */
3167         dd->port->identify     = dd->port->command_table +
3168                                         HW_CMD_TBL_AR_SZ;
3169         dd->port->identify_dma = dd->port->command_tbl_dma +
3170                                         HW_CMD_TBL_AR_SZ;
3171
3172         /* Setup the address of the sector buffer - for some non-ncq cmds */
3173         dd->port->sector_buffer = (void *) dd->port->identify + ATA_SECT_SIZE;
3174         dd->port->sector_buffer_dma = dd->port->identify_dma + ATA_SECT_SIZE;
3175
3176         /* Setup the address of the log buf - for read log command */
3177         dd->port->log_buf = (void *)dd->port->sector_buffer  + ATA_SECT_SIZE;
3178         dd->port->log_buf_dma = dd->port->sector_buffer_dma + ATA_SECT_SIZE;
3179
3180         /* Setup the address of the smart buf - for smart read data command */
3181         dd->port->smart_buf = (void *)dd->port->log_buf  + ATA_SECT_SIZE;
3182         dd->port->smart_buf_dma = dd->port->log_buf_dma + ATA_SECT_SIZE;
3183
3184
3185         /* Point the command headers at the command tables. */
3186         for (i = 0; i < num_command_slots; i++) {
3187                 dd->port->commands[i].command_header =
3188                                         dd->port->command_list +
3189                                         (sizeof(struct mtip_cmd_hdr) * i);
3190                 dd->port->commands[i].command_header_dma =
3191                                         dd->port->command_list_dma +
3192                                         (sizeof(struct mtip_cmd_hdr) * i);
3193
3194                 dd->port->commands[i].command =
3195                         dd->port->command_table + (HW_CMD_TBL_SZ * i);
3196                 dd->port->commands[i].command_dma =
3197                         dd->port->command_tbl_dma + (HW_CMD_TBL_SZ * i);
3198
3199                 if (readl(dd->mmio + HOST_CAP) & HOST_CAP_64)
3200                         dd->port->commands[i].command_header->ctbau =
3201                         __force_bit2int cpu_to_le32(
3202                         (dd->port->commands[i].command_dma >> 16) >> 16);
3203                 dd->port->commands[i].command_header->ctba =
3204                         __force_bit2int cpu_to_le32(
3205                         dd->port->commands[i].command_dma & 0xFFFFFFFF);
3206
3207                 /*
3208                  * If this is not done, a bug is reported by the stock
3209                  * FC11 i386. Due to the fact that it has lots of kernel
3210                  * debugging enabled.
3211                  */
3212                 sg_init_table(dd->port->commands[i].sg, MTIP_MAX_SG);
3213
3214                 /* Mark all commands as currently inactive.*/
3215                 atomic_set(&dd->port->commands[i].active, 0);
3216         }
3217
3218         /* Setup the pointers to the extended s_active and CI registers. */
3219         for (i = 0; i < dd->slot_groups; i++) {
3220                 dd->port->s_active[i] =
3221                         dd->port->mmio + i*0x80 + PORT_SCR_ACT;
3222                 dd->port->cmd_issue[i] =
3223                         dd->port->mmio + i*0x80 + PORT_COMMAND_ISSUE;
3224                 dd->port->completed[i] =
3225                         dd->port->mmio + i*0x80 + PORT_SDBV;
3226         }
3227
3228         timetaken = jiffies;
3229         timeout = jiffies + msecs_to_jiffies(30000);
3230         while (((readl(dd->port->mmio + PORT_SCR_STAT) & 0x0F) != 0x03) &&
3231                  time_before(jiffies, timeout)) {
3232                 mdelay(100);
3233         }
3234         if (unlikely(mtip_check_surprise_removal(dd->pdev))) {
3235                 timetaken = jiffies - timetaken;
3236                 dev_warn(&dd->pdev->dev,
3237                         "Surprise removal detected at %u ms\n",
3238                         jiffies_to_msecs(timetaken));
3239                 rv = -ENODEV;
3240                 goto out2 ;
3241         }
3242         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) {
3243                 timetaken = jiffies - timetaken;
3244                 dev_warn(&dd->pdev->dev,
3245                         "Removal detected at %u ms\n",
3246                         jiffies_to_msecs(timetaken));
3247                 rv = -EFAULT;
3248                 goto out2;
3249         }
3250
3251         /* Conditionally reset the HBA. */
3252         if (!(readl(dd->mmio + HOST_CAP) & HOST_CAP_NZDMA)) {
3253                 if (mtip_hba_reset(dd) < 0) {
3254                         dev_err(&dd->pdev->dev,
3255                                 "Card did not reset within timeout\n");
3256                         rv = -EIO;
3257                         goto out2;
3258                 }
3259         } else {
3260                 /* Clear any pending interrupts on the HBA */
3261                 writel(readl(dd->mmio + HOST_IRQ_STAT),
3262                         dd->mmio + HOST_IRQ_STAT);
3263         }
3264
3265         mtip_init_port(dd->port);
3266         mtip_start_port(dd->port);
3267
3268         /* Setup the ISR and enable interrupts. */
3269         rv = devm_request_irq(&dd->pdev->dev,
3270                                 dd->pdev->irq,
3271                                 mtip_irq_handler,
3272                                 IRQF_SHARED,
3273                                 dev_driver_string(&dd->pdev->dev),
3274                                 dd);
3275
3276         if (rv) {
3277                 dev_err(&dd->pdev->dev,
3278                         "Unable to allocate IRQ %d\n", dd->pdev->irq);
3279                 goto out2;
3280         }
3281         irq_set_affinity_hint(dd->pdev->irq, get_cpu_mask(dd->isr_binding));
3282
3283         /* Enable interrupts on the HBA. */
3284         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3285                                         dd->mmio + HOST_CTL);
3286
3287         init_timer(&dd->port->cmd_timer);
3288         init_waitqueue_head(&dd->port->svc_wait);
3289
3290         dd->port->cmd_timer.data = (unsigned long int) dd->port;
3291         dd->port->cmd_timer.function = mtip_timeout_function;
3292         mod_timer(&dd->port->cmd_timer,
3293                 jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD));
3294
3295
3296         if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) {
3297                 rv = -EFAULT;
3298                 goto out3;
3299         }
3300
3301         if (mtip_get_identify(dd->port, NULL) < 0) {
3302                 rv = -EFAULT;
3303                 goto out3;
3304         }
3305
3306         if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) ==
3307                 MTIP_FTL_REBUILD_MAGIC) {
3308                 set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags);
3309                 return MTIP_FTL_REBUILD_MAGIC;
3310         }
3311         mtip_dump_identify(dd->port);
3312
3313         /* check write protect, over temp and rebuild statuses */
3314         rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ,
3315                                 dd->port->log_buf,
3316                                 dd->port->log_buf_dma, 1);
3317         if (rv) {
3318                 dev_warn(&dd->pdev->dev,
3319                         "Error in READ LOG EXT (10h) command\n");
3320                 /* non-critical error, don't fail the load */
3321         } else {
3322                 buf = (unsigned char *)dd->port->log_buf;
3323                 if (buf[259] & 0x1) {
3324                         dev_info(&dd->pdev->dev,
3325                                 "Write protect bit is set.\n");
3326                         set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag);
3327                 }
3328                 if (buf[288] == 0xF7) {
3329                         dev_info(&dd->pdev->dev,
3330                                 "Exceeded Tmax, drive in thermal shutdown.\n");
3331                         set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag);
3332                 }
3333                 if (buf[288] == 0xBF) {
3334                         dev_info(&dd->pdev->dev,
3335                                 "Drive indicates rebuild has failed.\n");
3336                         /* TODO */
3337                 }
3338         }
3339
3340         /* get write protect progess */
3341         memset(&attr242, 0, sizeof(struct smart_attr));
3342         if (mtip_get_smart_attr(dd->port, 242, &attr242))
3343                 dev_warn(&dd->pdev->dev,
3344                                 "Unable to check write protect progress\n");
3345         else
3346                 dev_info(&dd->pdev->dev,
3347                                 "Write protect progress: %u%% (%u blocks)\n",
3348                                 attr242.cur, le32_to_cpu(attr242.data));
3349         return rv;
3350
3351 out3:
3352         del_timer_sync(&dd->port->cmd_timer);
3353
3354         /* Disable interrupts on the HBA. */
3355         writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3356                         dd->mmio + HOST_CTL);
3357
3358         /* Release the IRQ. */
3359         irq_set_affinity_hint(dd->pdev->irq, NULL);
3360         devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3361
3362 out2:
3363         mtip_deinit_port(dd->port);
3364
3365         /* Free the command/command header memory. */
3366         dmam_free_coherent(&dd->pdev->dev,
3367                                 HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3368                                 dd->port->command_list,
3369                                 dd->port->command_list_dma);
3370 out1:
3371         /* Free the memory allocated for the for structure. */
3372         kfree(dd->port);
3373
3374         return rv;
3375 }
3376
3377 /*
3378  * Called to deinitialize an interface.
3379  *
3380  * @dd Pointer to the driver data structure.
3381  *
3382  * return value
3383  *      0
3384  */
3385 static int mtip_hw_exit(struct driver_data *dd)
3386 {
3387         /*
3388          * Send standby immediate (E0h) to the drive so that it
3389          * saves its state.
3390          */
3391         if (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
3392
3393                 if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags))
3394                         if (mtip_standby_immediate(dd->port))
3395                                 dev_warn(&dd->pdev->dev,
3396                                         "STANDBY IMMEDIATE failed\n");
3397
3398                 /* de-initialize the port. */
3399                 mtip_deinit_port(dd->port);
3400
3401                 /* Disable interrupts on the HBA. */
3402                 writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3403                                 dd->mmio + HOST_CTL);
3404         }
3405
3406         del_timer_sync(&dd->port->cmd_timer);
3407
3408         /* Release the IRQ. */
3409         irq_set_affinity_hint(dd->pdev->irq, NULL);
3410         devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd);
3411
3412         /* Free the command/command header memory. */
3413         dmam_free_coherent(&dd->pdev->dev,
3414                         HW_PORT_PRIV_DMA_SZ + (ATA_SECT_SIZE * 4),
3415                         dd->port->command_list,
3416                         dd->port->command_list_dma);
3417         /* Free the memory allocated for the for structure. */
3418         kfree(dd->port);
3419
3420         return 0;
3421 }
3422
3423 /*
3424  * Issue a Standby Immediate command to the device.
3425  *
3426  * This function is called by the Block Layer just before the
3427  * system powers off during a shutdown.
3428  *
3429  * @dd Pointer to the driver data structure.
3430  *
3431  * return value
3432  *      0
3433  */
3434 static int mtip_hw_shutdown(struct driver_data *dd)
3435 {
3436         /*
3437          * Send standby immediate (E0h) to the drive so that it
3438          * saves its state.
3439          */
3440         mtip_standby_immediate(dd->port);
3441
3442         return 0;
3443 }
3444
3445 /*
3446  * Suspend function
3447  *
3448  * This function is called by the Block Layer just before the
3449  * system hibernates.
3450  *
3451  * @dd Pointer to the driver data structure.
3452  *
3453  * return value
3454  *      0       Suspend was successful
3455  *      -EFAULT Suspend was not successful
3456  */
3457 static int mtip_hw_suspend(struct driver_data *dd)
3458 {
3459         /*
3460          * Send standby immediate (E0h) to the drive
3461          * so that it saves its state.
3462          */
3463         if (mtip_standby_immediate(dd->port) != 0) {
3464                 dev_err(&dd->pdev->dev,
3465                         "Failed standby-immediate command\n");
3466                 return -EFAULT;
3467         }
3468
3469         /* Disable interrupts on the HBA.*/
3470         writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN,
3471                         dd->mmio + HOST_CTL);
3472         mtip_deinit_port(dd->port);
3473
3474         return 0;
3475 }
3476
3477 /*
3478  * Resume function
3479  *
3480  * This function is called by the Block Layer as the
3481  * system resumes.
3482  *
3483  * @dd Pointer to the driver data structure.
3484  *
3485  * return value
3486  *      0       Resume was successful
3487  *      -EFAULT Resume was not successful
3488  */
3489 static int mtip_hw_resume(struct driver_data *dd)
3490 {
3491         /* Perform any needed hardware setup steps */
3492         hba_setup(dd);
3493
3494         /* Reset the HBA */
3495         if (mtip_hba_reset(dd) != 0) {
3496                 dev_err(&dd->pdev->dev,
3497                         "Unable to reset the HBA\n");
3498                 return -EFAULT;
3499         }
3500
3501         /*
3502          * Enable the port, DMA engine, and FIS reception specific
3503          * h/w in controller.
3504          */
3505         mtip_init_port(dd->port);
3506         mtip_start_port(dd->port);
3507
3508         /* Enable interrupts on the HBA.*/
3509         writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN,
3510                         dd->mmio + HOST_CTL);
3511
3512         return 0;
3513 }
3514
3515 /*
3516  * Helper function for reusing disk name
3517  * upon hot insertion.
3518  */
3519 static int rssd_disk_name_format(char *prefix,
3520                                  int index,
3521                                  char *buf,
3522                                  int buflen)
3523 {
3524         const int base = 'z' - 'a' + 1;
3525         char *begin = buf + strlen(prefix);
3526         char *end = buf + buflen;
3527         char *p;
3528         int unit;
3529
3530         p = end - 1;
3531         *p = '\0';
3532         unit = base;
3533         do {
3534                 if (p == begin)
3535                         return -EINVAL;
3536                 *--p = 'a' + (index % unit);
3537                 index = (index / unit) - 1;
3538         } while (index >= 0);
3539
3540         memmove(begin, p, end - p);
3541         memcpy(buf, prefix, strlen(prefix));
3542
3543         return 0;
3544 }
3545
3546 /*
3547  * Block layer IOCTL handler.
3548  *
3549  * @dev Pointer to the block_device structure.
3550  * @mode ignored
3551  * @cmd IOCTL command passed from the user application.
3552  * @arg Argument passed from the user application.
3553  *
3554  * return value
3555  *      0        IOCTL completed successfully.
3556  *      -ENOTTY  IOCTL not supported or invalid driver data
3557  *                 structure pointer.
3558  */
3559 static int mtip_block_ioctl(struct block_device *dev,
3560                             fmode_t mode,
3561                             unsigned cmd,
3562                             unsigned long arg)
3563 {
3564         struct driver_data *dd = dev->bd_disk->private_data;
3565
3566         if (!capable(CAP_SYS_ADMIN))
3567                 return -EACCES;
3568
3569         if (!dd)
3570                 return -ENOTTY;
3571
3572         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3573                 return -ENOTTY;
3574
3575         switch (cmd) {
3576         case BLKFLSBUF:
3577                 return -ENOTTY;
3578         default:
3579                 return mtip_hw_ioctl(dd, cmd, arg);
3580         }
3581 }
3582
3583 #ifdef CONFIG_COMPAT
3584 /*
3585  * Block layer compat IOCTL handler.
3586  *
3587  * @dev Pointer to the block_device structure.
3588  * @mode ignored
3589  * @cmd IOCTL command passed from the user application.
3590  * @arg Argument passed from the user application.
3591  *
3592  * return value
3593  *      0        IOCTL completed successfully.
3594  *      -ENOTTY  IOCTL not supported or invalid driver data
3595  *                 structure pointer.
3596  */
3597 static int mtip_block_compat_ioctl(struct block_device *dev,
3598                             fmode_t mode,
3599                             unsigned cmd,
3600                             unsigned long arg)
3601 {
3602         struct driver_data *dd = dev->bd_disk->private_data;
3603
3604         if (!capable(CAP_SYS_ADMIN))
3605                 return -EACCES;
3606
3607         if (!dd)
3608                 return -ENOTTY;
3609
3610         if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)))
3611                 return -ENOTTY;
3612
3613         switch (cmd) {
3614         case BLKFLSBUF:
3615                 return -ENOTTY;
3616         case HDIO_DRIVE_TASKFILE: {
3617                 struct mtip_compat_ide_task_request_s __user *compat_req_task;
3618                 ide_task_request_t req_task;
3619                 int compat_tasksize, outtotal, ret;
3620
3621                 compat_tasksize =
3622                         sizeof(struct mtip_compat_ide_task_request_s);
3623
3624                 compat_req_task =
3625                         (struct mtip_compat_ide_task_request_s __user *) arg;
3626
3627                 if (copy_from_user(&req_task, (void __user *) arg,
3628                         compat_tasksize - (2 * sizeof(compat_long_t))))
3629                         return -EFAULT;
3630
3631                 if (get_user(req_task.out_size, &compat_req_task->out_size))
3632                         return -EFAULT;
3633
3634                 if (get_user(req_task.in_size, &compat_req_task->in_size))
3635                         return -EFAULT;
3636
3637                 outtotal = sizeof(struct mtip_compat_ide_task_request_s);
3638
3639                 ret = exec_drive_taskfile(dd, (void __user *) arg,
3640                                                 &req_task, outtotal);
3641
3642                 if (copy_to_user((void __user *) arg, &req_task,
3643                                 compat_tasksize -
3644                                 (2 * sizeof(compat_long_t))))
3645                         return -EFAULT;
3646
3647                 if (put_user(req_task.out_size, &compat_req_task->out_size))
3648                         return -EFAULT;
3649
3650                 if (put_user(req_task.in_size, &compat_req_task->in_size))
3651                         return -EFAULT;
3652
3653                 return ret;
3654         }
3655         default:
3656                 return mtip_hw_ioctl(dd, cmd, arg);
3657         }
3658 }
3659 #endif
3660
3661 /*
3662  * Obtain the geometry of the device.
3663  *
3664  * You may think that this function is obsolete, but some applications,
3665  * fdisk for example still used CHS values. This function describes the
3666  * device as having 224 heads and 56 sectors per cylinder. These values are
3667  * chosen so that each cylinder is aligned on a 4KB boundary. Since a
3668  * partition is described in terms of a start and end cylinder this means
3669  * that each partition is also 4KB aligned. Non-aligned partitions adversely
3670  * affects performance.
3671  *
3672  * @dev Pointer to the block_device strucutre.
3673  * @geo Pointer to a hd_geometry structure.
3674  *
3675  * return value
3676  *      0       Operation completed successfully.
3677  *      -ENOTTY An error occurred while reading the drive capacity.
3678  */
3679 static int mtip_block_getgeo(struct block_device *dev,
3680                                 struct hd_geometry *geo)
3681 {
3682         struct driver_data *dd = dev->bd_disk->private_data;
3683         sector_t capacity;
3684
3685         if (!dd)
3686                 return -ENOTTY;
3687
3688         if (!(mtip_hw_get_capacity(dd, &capacity))) {
3689                 dev_warn(&dd->pdev->dev,
3690                         "Could not get drive capacity.\n");
3691                 return -ENOTTY;
3692         }
3693
3694         geo->heads = 224;
3695         geo->sectors = 56;
3696         sector_div(capacity, (geo->heads * geo->sectors));
3697         geo->cylinders = capacity;
3698         return 0;
3699 }
3700
3701 /*
3702  * Block device operation function.
3703  *
3704  * This structure contains pointers to the functions required by the block
3705  * layer.
3706  */
3707 static const struct block_device_operations mtip_block_ops = {
3708         .ioctl          = mtip_block_ioctl,
3709 #ifdef CONFIG_COMPAT
3710         .compat_ioctl   = mtip_block_compat_ioctl,
3711 #endif
3712         .getgeo         = mtip_block_getgeo,
3713         .owner          = THIS_MODULE
3714 };
3715
3716 /*
3717  * Block layer make request function.
3718  *
3719  * This function is called by the kernel to process a BIO for
3720  * the P320 device.
3721  *
3722  * @queue Pointer to the request queue. Unused other than to obtain
3723  *              the driver data structure.
3724  * @bio   Pointer to the BIO.
3725  *
3726  */
3727 static void mtip_make_request(struct request_queue *queue, struct bio *bio)
3728 {
3729         struct driver_data *dd = queue->queuedata;
3730         struct scatterlist *sg;
3731         struct bio_vec *bvec;
3732         int nents = 0;
3733         int tag = 0;
3734
3735         if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) {
3736                 if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT,
3737                                                         &dd->dd_flag))) {
3738                         bio_endio(bio, -ENXIO);
3739                         return;
3740                 }
3741                 if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) {
3742                         bio_endio(bio, -ENODATA);
3743                         return;
3744                 }
3745                 if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT,
3746                                                         &dd->dd_flag) &&
3747                                 bio_data_dir(bio))) {
3748                         bio_endio(bio, -ENODATA);
3749                         return;
3750                 }
3751                 if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) {
3752                         bio_endio(bio, -ENODATA);
3753                         return;
3754                 }
3755         }
3756
3757         if (unlikely(bio->bi_rw & REQ_DISCARD)) {
3758                 bio_endio(bio, mtip_send_trim(dd, bio->bi_sector,
3759                                                 bio_sectors(bio)));
3760                 return;
3761         }
3762
3763         if (unlikely(!bio_has_data(bio))) {
3764                 blk_queue_flush(queue, 0);
3765                 bio_endio(bio, 0);
3766                 return;
3767         }
3768
3769         sg = mtip_hw_get_scatterlist(dd, &tag);
3770         if (likely(sg != NULL)) {
3771                 blk_queue_bounce(queue, &bio);
3772
3773                 if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) {
3774                         dev_warn(&dd->pdev->dev,
3775                                 "Maximum number of SGL entries exceeded\n");
3776                         bio_io_error(bio);
3777                         mtip_hw_release_scatterlist(dd, tag);
3778                         return;
3779                 }
3780
3781                 /* Create the scatter list for this bio. */
3782                 bio_for_each_segment(bvec, bio, nents) {
3783                         sg_set_page(&sg[nents],
3784                                         bvec->bv_page,
3785                                         bvec->bv_len,
3786                                         bvec->bv_offset);
3787                 }
3788
3789                 /* Issue the read/write. */
3790                 mtip_hw_submit_io(dd,
3791                                 bio->bi_sector,
3792                                 bio_sectors(bio),
3793                                 nents,
3794                                 tag,
3795                                 bio_endio,
3796                                 bio,
3797                                 bio_data_dir(bio));
3798         } else
3799                 bio_io_error(bio);
3800 }
3801
3802 /*
3803  * Block layer initialization function.
3804  *
3805  * This function is called once by the PCI layer for each P320
3806  * device that is connected to the system.
3807  *
3808  * @dd Pointer to the driver data structure.
3809  *
3810  * return value
3811  *      0 on success else an error code.
3812  */
3813 static int mtip_block_initialize(struct driver_data *dd)
3814 {
3815         int rv = 0, wait_for_rebuild = 0;
3816         sector_t capacity;
3817         unsigned int index = 0;
3818         struct kobject *kobj;
3819         unsigned char thd_name[16];
3820
3821         if (dd->disk)
3822                 goto skip_create_disk; /* hw init done, before rebuild */
3823
3824         /* Initialize the protocol layer. */
3825         wait_for_rebuild = mtip_hw_init(dd);
3826         if (wait_for_rebuild < 0) {
3827                 dev_err(&dd->pdev->dev,
3828                         "Protocol layer initialization failed\n");
3829                 rv = -EINVAL;
3830                 goto protocol_init_error;
3831         }
3832
3833         dd->disk = alloc_disk_node(MTIP_MAX_MINORS, dd->numa_node);
3834         if (dd->disk  == NULL) {
3835                 dev_err(&dd->pdev->dev,
3836                         "Unable to allocate gendisk structure\n");
3837                 rv = -EINVAL;
3838                 goto alloc_disk_error;
3839         }
3840
3841         /* Generate the disk name, implemented same as in sd.c */
3842         do {
3843                 if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL))
3844                         goto ida_get_error;
3845
3846                 spin_lock(&rssd_index_lock);
3847                 rv = ida_get_new(&rssd_index_ida, &index);
3848                 spin_unlock(&rssd_index_lock);
3849         } while (rv == -EAGAIN);
3850
3851         if (rv)
3852                 goto ida_get_error;
3853
3854         rv = rssd_disk_name_format("rssd",
3855                                 index,
3856                                 dd->disk->disk_name,
3857                                 DISK_NAME_LEN);
3858         if (rv)
3859                 goto disk_index_error;
3860
3861         dd->disk->driverfs_dev  = &dd->pdev->dev;
3862         dd->disk->major         = dd->major;
3863         dd->disk->first_minor   = dd->instance * MTIP_MAX_MINORS;
3864         dd->disk->fops          = &mtip_block_ops;
3865         dd->disk->private_data  = dd;
3866         dd->index               = index;
3867
3868         /*
3869          * if rebuild pending, start the service thread, and delay the block
3870          * queue creation and add_disk()
3871          */
3872         if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3873                 goto start_service_thread;
3874
3875 skip_create_disk:
3876         /* Allocate the request queue. */
3877         dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node);
3878         if (dd->queue == NULL) {
3879                 dev_err(&dd->pdev->dev,
3880                         "Unable to allocate request queue\n");
3881                 rv = -ENOMEM;
3882                 goto block_queue_alloc_init_error;
3883         }
3884
3885         /* Attach our request function to the request queue. */
3886         blk_queue_make_request(dd->queue, mtip_make_request);
3887
3888         dd->disk->queue         = dd->queue;
3889         dd->queue->queuedata    = dd;
3890
3891         /* Set device limits. */
3892         set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags);
3893         blk_queue_max_segments(dd->queue, MTIP_MAX_SG);
3894         blk_queue_physical_block_size(dd->queue, 4096);
3895         blk_queue_max_hw_sectors(dd->queue, 0xffff);
3896         blk_queue_max_segment_size(dd->queue, 0x400000);
3897         blk_queue_io_min(dd->queue, 4096);
3898
3899         /*
3900          * write back cache is not supported in the device. FUA depends on
3901          * write back cache support, hence setting flush support to zero.
3902          */
3903         blk_queue_flush(dd->queue, 0);
3904
3905         /* Signal trim support */
3906         if (dd->trim_supp == true) {
3907                 set_bit(QUEUE_FLAG_DISCARD, &dd->queue->queue_flags);
3908                 dd->queue->limits.discard_granularity = 4096;
3909                 blk_queue_max_discard_sectors(dd->queue,
3910                         MTIP_MAX_TRIM_ENTRY_LEN * MTIP_MAX_TRIM_ENTRIES);
3911                 dd->queue->limits.discard_zeroes_data = 0;
3912         }
3913
3914         /* Set the capacity of the device in 512 byte sectors. */
3915         if (!(mtip_hw_get_capacity(dd, &capacity))) {
3916                 dev_warn(&dd->pdev->dev,
3917                         "Could not read drive capacity\n");
3918                 rv = -EIO;
3919                 goto read_capacity_error;
3920         }
3921         set_capacity(dd->disk, capacity);
3922
3923         /* Enable the block device and add it to /dev */
3924         add_disk(dd->disk);
3925
3926         /*
3927          * Now that the disk is active, initialize any sysfs attributes
3928          * managed by the protocol layer.
3929          */
3930         kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
3931         if (kobj) {
3932                 mtip_hw_sysfs_init(dd, kobj);
3933                 kobject_put(kobj);
3934         }
3935         mtip_hw_debugfs_init(dd);
3936
3937         if (dd->mtip_svc_handler) {
3938                 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
3939                 return rv; /* service thread created for handling rebuild */
3940         }
3941
3942 start_service_thread:
3943         sprintf(thd_name, "mtip_svc_thd_%02d", index);
3944         dd->mtip_svc_handler = kthread_create_on_node(mtip_service_thread,
3945                                                 dd, dd->numa_node, thd_name);
3946
3947         if (IS_ERR(dd->mtip_svc_handler)) {
3948                 dev_err(&dd->pdev->dev, "service thread failed to start\n");
3949                 dd->mtip_svc_handler = NULL;
3950                 rv = -EFAULT;
3951                 goto kthread_run_error;
3952         }
3953         wake_up_process(dd->mtip_svc_handler);
3954         if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC)
3955                 rv = wait_for_rebuild;
3956
3957         return rv;
3958
3959 kthread_run_error:
3960         mtip_hw_debugfs_exit(dd);
3961
3962         /* Delete our gendisk. This also removes the device from /dev */
3963         del_gendisk(dd->disk);
3964
3965 read_capacity_error:
3966         blk_cleanup_queue(dd->queue);
3967
3968 block_queue_alloc_init_error:
3969 disk_index_error:
3970         spin_lock(&rssd_index_lock);
3971         ida_remove(&rssd_index_ida, index);
3972         spin_unlock(&rssd_index_lock);
3973
3974 ida_get_error:
3975         put_disk(dd->disk);
3976
3977 alloc_disk_error:
3978         mtip_hw_exit(dd); /* De-initialize the protocol layer. */
3979
3980 protocol_init_error:
3981         return rv;
3982 }
3983
3984 /*
3985  * Block layer deinitialization function.
3986  *
3987  * Called by the PCI layer as each P320 device is removed.
3988  *
3989  * @dd Pointer to the driver data structure.
3990  *
3991  * return value
3992  *      0
3993  */
3994 static int mtip_block_remove(struct driver_data *dd)
3995 {
3996         struct kobject *kobj;
3997
3998         if (dd->mtip_svc_handler) {
3999                 set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags);
4000                 wake_up_interruptible(&dd->port->svc_wait);
4001                 kthread_stop(dd->mtip_svc_handler);
4002         }
4003
4004         /* Clean up the sysfs attributes, if created */
4005         if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) {
4006                 kobj = kobject_get(&disk_to_dev(dd->disk)->kobj);
4007                 if (kobj) {
4008                         mtip_hw_sysfs_exit(dd, kobj);
4009                         kobject_put(kobj);
4010                 }
4011         }
4012         mtip_hw_debugfs_exit(dd);
4013
4014         /*
4015          * Delete our gendisk structure. This also removes the device
4016          * from /dev
4017          */
4018         if (dd->disk) {
4019                 if (dd->disk->queue)
4020                         del_gendisk(dd->disk);
4021                 else
4022                         put_disk(dd->disk);
4023         }
4024
4025         spin_lock(&rssd_index_lock);
4026         ida_remove(&rssd_index_ida, dd->index);
4027         spin_unlock(&rssd_index_lock);
4028
4029         blk_cleanup_queue(dd->queue);
4030         dd->disk  = NULL;
4031         dd->queue = NULL;
4032
4033         /* De-initialize the protocol layer. */
4034         mtip_hw_exit(dd);
4035
4036         return 0;
4037 }
4038
4039 /*
4040  * Function called by the PCI layer when just before the
4041  * machine shuts down.
4042  *
4043  * If a protocol layer shutdown function is present it will be called
4044  * by this function.
4045  *
4046  * @dd Pointer to the driver data structure.
4047  *
4048  * return value
4049  *      0
4050  */
4051 static int mtip_block_shutdown(struct driver_data *dd)
4052 {
4053         dev_info(&dd->pdev->dev,
4054                 "Shutting down %s ...\n", dd->disk->disk_name);
4055
4056         /* Delete our gendisk structure, and cleanup the blk queue. */
4057         if (dd->disk) {
4058                 if (dd->disk->queue)
4059                         del_gendisk(dd->disk);
4060                 else
4061                         put_disk(dd->disk);
4062         }
4063
4064
4065         spin_lock(&rssd_index_lock);
4066         ida_remove(&rssd_index_ida, dd->index);
4067         spin_unlock(&rssd_index_lock);
4068
4069         blk_cleanup_queue(dd->queue);
4070         dd->disk  = NULL;
4071         dd->queue = NULL;
4072
4073         mtip_hw_shutdown(dd);
4074         return 0;
4075 }
4076
4077 static int mtip_block_suspend(struct driver_data *dd)
4078 {
4079         dev_info(&dd->pdev->dev,
4080                 "Suspending %s ...\n", dd->disk->disk_name);
4081         mtip_hw_suspend(dd);
4082         return 0;
4083 }
4084
4085 static int mtip_block_resume(struct driver_data *dd)
4086 {
4087         dev_info(&dd->pdev->dev, "Resuming %s ...\n",
4088                 dd->disk->disk_name);
4089         mtip_hw_resume(dd);
4090         return 0;
4091 }
4092
4093 static void drop_cpu(int cpu)
4094 {
4095         cpu_use[cpu]--;
4096 }
4097
4098 static int get_least_used_cpu_on_node(int node)
4099 {
4100         int cpu, least_used_cpu, least_cnt;
4101         const struct cpumask *node_mask;
4102
4103         node_mask = cpumask_of_node(node);
4104         least_used_cpu = cpumask_first(node_mask);
4105         least_cnt = cpu_use[least_used_cpu];
4106         cpu = least_used_cpu;
4107
4108         for_each_cpu(cpu, node_mask) {
4109                 if (cpu_use[cpu] < least_cnt) {
4110                         least_used_cpu = cpu;
4111                         least_cnt = cpu_use[cpu];
4112                 }
4113         }
4114         cpu_use[least_used_cpu]++;
4115         return least_used_cpu;
4116 }
4117
4118 /* Helper for selecting a node in round robin mode */
4119 static inline int mtip_get_next_rr_node(void)
4120 {
4121         static int next_node = -1;
4122
4123         if (next_node == -1) {
4124                 next_node = first_online_node;
4125                 return next_node;
4126         }
4127
4128         next_node = next_online_node(next_node);
4129         if (next_node == MAX_NUMNODES)
4130                 next_node = first_online_node;
4131         return next_node;
4132 }
4133
4134 static DEFINE_HANDLER(0);
4135 static DEFINE_HANDLER(1);
4136 static DEFINE_HANDLER(2);
4137 static DEFINE_HANDLER(3);
4138 static DEFINE_HANDLER(4);
4139 static DEFINE_HANDLER(5);
4140 static DEFINE_HANDLER(6);
4141 static DEFINE_HANDLER(7);
4142
4143 /*
4144  * Called for each supported PCI device detected.
4145  *
4146  * This function allocates the private data structure, enables the
4147  * PCI device and then calls the block layer initialization function.
4148  *
4149  * return value
4150  *      0 on success else an error code.
4151  */
4152 static int mtip_pci_probe(struct pci_dev *pdev,
4153                         const struct pci_device_id *ent)
4154 {
4155         int rv = 0;
4156         struct driver_data *dd = NULL;
4157         char cpu_list[256];
4158         const struct cpumask *node_mask;
4159         int cpu, i = 0, j = 0;
4160         int my_node = NUMA_NO_NODE;
4161
4162         /* Allocate memory for this devices private data. */
4163         my_node = pcibus_to_node(pdev->bus);
4164         if (my_node != NUMA_NO_NODE) {
4165                 if (!node_online(my_node))
4166                         my_node = mtip_get_next_rr_node();
4167         } else {
4168                 dev_info(&pdev->dev, "Kernel not reporting proximity, choosing a node\n");
4169                 my_node = mtip_get_next_rr_node();
4170         }
4171         dev_info(&pdev->dev, "NUMA node %d (closest: %d,%d, probe on %d:%d)\n",
4172                 my_node, pcibus_to_node(pdev->bus), dev_to_node(&pdev->dev),
4173                 cpu_to_node(smp_processor_id()), smp_processor_id());
4174
4175         dd = kzalloc_node(sizeof(struct driver_data), GFP_KERNEL, my_node);
4176         if (dd == NULL) {
4177                 dev_err(&pdev->dev,
4178                         "Unable to allocate memory for driver data\n");
4179                 return -ENOMEM;
4180         }
4181
4182         /* Attach the private data to this PCI device.  */
4183         pci_set_drvdata(pdev, dd);
4184
4185         rv = pcim_enable_device(pdev);
4186         if (rv < 0) {
4187                 dev_err(&pdev->dev, "Unable to enable device\n");
4188                 goto iomap_err;
4189         }
4190
4191         /* Map BAR5 to memory. */
4192         rv = pcim_iomap_regions(pdev, 1 << MTIP_ABAR, MTIP_DRV_NAME);
4193         if (rv < 0) {
4194                 dev_err(&pdev->dev, "Unable to map regions\n");
4195                 goto iomap_err;
4196         }
4197
4198         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
4199                 rv = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
4200
4201                 if (rv) {
4202                         rv = pci_set_consistent_dma_mask(pdev,
4203                                                 DMA_BIT_MASK(32));
4204                         if (rv) {
4205                                 dev_warn(&pdev->dev,
4206                                         "64-bit DMA enable failed\n");
4207                                 goto setmask_err;
4208                         }
4209                 }
4210         }
4211
4212         /* Copy the info we may need later into the private data structure. */
4213         dd->major       = mtip_major;
4214         dd->instance    = instance;
4215         dd->pdev        = pdev;
4216         dd->numa_node   = my_node;
4217
4218         memset(dd->workq_name, 0, 32);
4219         snprintf(dd->workq_name, 31, "mtipq%d", dd->instance);
4220
4221         dd->isr_workq = create_workqueue(dd->workq_name);
4222         if (!dd->isr_workq) {
4223                 dev_warn(&pdev->dev, "Can't create wq %d\n", dd->instance);
4224                 rv = -ENOMEM;
4225                 goto block_initialize_err;
4226         }
4227
4228         memset(cpu_list, 0, sizeof(cpu_list));
4229
4230         node_mask = cpumask_of_node(dd->numa_node);
4231         if (!cpumask_empty(node_mask)) {
4232                 for_each_cpu(cpu, node_mask)
4233                 {
4234                         snprintf(&cpu_list[j], 256 - j, "%d ", cpu);
4235                         j = strlen(cpu_list);
4236                 }
4237
4238                 dev_info(&pdev->dev, "Node %d on package %d has %d cpu(s): %s\n",
4239                         dd->numa_node,
4240                         topology_physical_package_id(cpumask_first(node_mask)),
4241                         nr_cpus_node(dd->numa_node),
4242                         cpu_list);
4243         } else
4244                 dev_dbg(&pdev->dev, "mtip32xx: node_mask empty\n");
4245
4246         dd->isr_binding = get_least_used_cpu_on_node(dd->numa_node);
4247         dev_info(&pdev->dev, "Initial IRQ binding node:cpu %d:%d\n",
4248                 cpu_to_node(dd->isr_binding), dd->isr_binding);
4249
4250         /* first worker context always runs in ISR */
4251         dd->work[0].cpu_binding = dd->isr_binding;
4252         dd->work[1].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4253         dd->work[2].cpu_binding = get_least_used_cpu_on_node(dd->numa_node);
4254         dd->work[3].cpu_binding = dd->work[0].cpu_binding;
4255         dd->work[4].cpu_binding = dd->work[1].cpu_binding;
4256         dd->work[5].cpu_binding = dd->work[2].cpu_binding;
4257         dd->work[6].cpu_binding = dd->work[2].cpu_binding;
4258         dd->work[7].cpu_binding = dd->work[1].cpu_binding;
4259
4260         /* Log the bindings */
4261         for_each_present_cpu(cpu) {
4262                 memset(cpu_list, 0, sizeof(cpu_list));
4263                 for (i = 0, j = 0; i < MTIP_MAX_SLOT_GROUPS; i++) {
4264                         if (dd->work[i].cpu_binding == cpu) {
4265                                 snprintf(&cpu_list[j], 256 - j, "%d ", i);
4266                                 j = strlen(cpu_list);
4267                         }
4268                 }
4269                 if (j)
4270                         dev_info(&pdev->dev, "CPU %d: WQs %s\n", cpu, cpu_list);
4271         }
4272
4273         INIT_WORK(&dd->work[0].work, mtip_workq_sdbf0);
4274         INIT_WORK(&dd->work[1].work, mtip_workq_sdbf1);
4275         INIT_WORK(&dd->work[2].work, mtip_workq_sdbf2);
4276         INIT_WORK(&dd->work[3].work, mtip_workq_sdbf3);
4277         INIT_WORK(&dd->work[4].work, mtip_workq_sdbf4);
4278         INIT_WORK(&dd->work[5].work, mtip_workq_sdbf5);
4279         INIT_WORK(&dd->work[6].work, mtip_workq_sdbf6);
4280         INIT_WORK(&dd->work[7].work, mtip_workq_sdbf7);
4281
4282         pci_set_master(pdev);
4283         rv = pci_enable_msi(pdev);
4284         if (rv) {
4285                 dev_warn(&pdev->dev,
4286                         "Unable to enable MSI interrupt.\n");
4287                 goto block_initialize_err;
4288         }
4289
4290         /* Initialize the block layer. */
4291         rv = mtip_block_initialize(dd);
4292         if (rv < 0) {
4293                 dev_err(&pdev->dev,
4294                         "Unable to initialize block layer\n");
4295                 goto block_initialize_err;
4296         }
4297
4298         /*
4299          * Increment the instance count so that each device has a unique
4300          * instance number.
4301          */
4302         instance++;
4303         if (rv != MTIP_FTL_REBUILD_MAGIC)
4304                 set_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag);
4305         goto done;
4306
4307 block_initialize_err:
4308         pci_disable_msi(pdev);
4309         if (dd->isr_workq) {
4310                 flush_workqueue(dd->isr_workq);
4311                 destroy_workqueue(dd->isr_workq);
4312                 drop_cpu(dd->work[0].cpu_binding);
4313                 drop_cpu(dd->work[1].cpu_binding);
4314                 drop_cpu(dd->work[2].cpu_binding);
4315         }
4316 setmask_err:
4317         pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4318
4319 iomap_err:
4320         kfree(dd);
4321         pci_set_drvdata(pdev, NULL);
4322         return rv;
4323 done:
4324         return rv;
4325 }
4326
4327 /*
4328  * Called for each probed device when the device is removed or the
4329  * driver is unloaded.
4330  *
4331  * return value
4332  *      None
4333  */
4334 static void mtip_pci_remove(struct pci_dev *pdev)
4335 {
4336         struct driver_data *dd = pci_get_drvdata(pdev);
4337         int counter = 0;
4338
4339         set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag);
4340
4341         if (mtip_check_surprise_removal(pdev)) {
4342                 while (!test_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag)) {
4343                         counter++;
4344                         msleep(20);
4345                         if (counter == 10) {
4346                                 /* Cleanup the outstanding commands */
4347                                 mtip_command_cleanup(dd);
4348                                 break;
4349                         }
4350                 }
4351         }
4352
4353         /* Clean up the block layer. */
4354         mtip_block_remove(dd);
4355
4356         if (dd->isr_workq) {
4357                 flush_workqueue(dd->isr_workq);
4358                 destroy_workqueue(dd->isr_workq);
4359                 drop_cpu(dd->work[0].cpu_binding);
4360                 drop_cpu(dd->work[1].cpu_binding);
4361                 drop_cpu(dd->work[2].cpu_binding);
4362         }
4363
4364         pci_disable_msi(pdev);
4365
4366         kfree(dd);
4367         pcim_iounmap_regions(pdev, 1 << MTIP_ABAR);
4368 }
4369
4370 /*
4371  * Called for each probed device when the device is suspended.
4372  *
4373  * return value
4374  *      0  Success
4375  *      <0 Error
4376  */
4377 static int mtip_pci_suspend(struct pci_dev *pdev, pm_message_t mesg)
4378 {
4379         int rv = 0;
4380         struct driver_data *dd = pci_get_drvdata(pdev);
4381
4382         if (!dd) {
4383                 dev_err(&pdev->dev,
4384                         "Driver private datastructure is NULL\n");
4385                 return -EFAULT;
4386         }
4387
4388         set_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4389
4390         /* Disable ports & interrupts then send standby immediate */
4391         rv = mtip_block_suspend(dd);
4392         if (rv < 0) {
4393                 dev_err(&pdev->dev,
4394                         "Failed to suspend controller\n");
4395                 return rv;
4396         }
4397
4398         /*
4399          * Save the pci config space to pdev structure &
4400          * disable the device
4401          */
4402         pci_save_state(pdev);
4403         pci_disable_device(pdev);
4404
4405         /* Move to Low power state*/
4406         pci_set_power_state(pdev, PCI_D3hot);
4407
4408         return rv;
4409 }
4410
4411 /*
4412  * Called for each probed device when the device is resumed.
4413  *
4414  * return value
4415  *      0  Success
4416  *      <0 Error
4417  */
4418 static int mtip_pci_resume(struct pci_dev *pdev)
4419 {
4420         int rv = 0;
4421         struct driver_data *dd;
4422
4423         dd = pci_get_drvdata(pdev);
4424         if (!dd) {
4425                 dev_err(&pdev->dev,
4426                         "Driver private datastructure is NULL\n");
4427                 return -EFAULT;
4428         }
4429
4430         /* Move the device to active State */
4431         pci_set_power_state(pdev, PCI_D0);
4432
4433         /* Restore PCI configuration space */
4434         pci_restore_state(pdev);
4435
4436         /* Enable the PCI device*/
4437         rv = pcim_enable_device(pdev);
4438         if (rv < 0) {
4439                 dev_err(&pdev->dev,
4440                         "Failed to enable card during resume\n");
4441                 goto err;
4442         }
4443         pci_set_master(pdev);
4444
4445         /*
4446          * Calls hbaReset, initPort, & startPort function
4447          * then enables interrupts
4448          */
4449         rv = mtip_block_resume(dd);
4450         if (rv < 0)
4451                 dev_err(&pdev->dev, "Unable to resume\n");
4452
4453 err:
4454         clear_bit(MTIP_DDF_RESUME_BIT, &dd->dd_flag);
4455
4456         return rv;
4457 }
4458
4459 /*
4460  * Shutdown routine
4461  *
4462  * return value
4463  *      None
4464  */
4465 static void mtip_pci_shutdown(struct pci_dev *pdev)
4466 {
4467         struct driver_data *dd = pci_get_drvdata(pdev);
4468         if (dd)
4469                 mtip_block_shutdown(dd);
4470 }
4471
4472 /* Table of device ids supported by this driver. */
4473 static DEFINE_PCI_DEVICE_TABLE(mtip_pci_tbl) = {
4474         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320H_DEVICE_ID) },
4475         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320M_DEVICE_ID) },
4476         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P320S_DEVICE_ID) },
4477         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P325M_DEVICE_ID) },
4478         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420H_DEVICE_ID) },
4479         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P420M_DEVICE_ID) },
4480         { PCI_DEVICE(PCI_VENDOR_ID_MICRON, P425M_DEVICE_ID) },
4481         { 0 }
4482 };
4483
4484 /* Structure that describes the PCI driver functions. */
4485 static struct pci_driver mtip_pci_driver = {
4486         .name                   = MTIP_DRV_NAME,
4487         .id_table               = mtip_pci_tbl,
4488         .probe                  = mtip_pci_probe,
4489         .remove                 = mtip_pci_remove,
4490         .suspend                = mtip_pci_suspend,
4491         .resume                 = mtip_pci_resume,
4492         .shutdown               = mtip_pci_shutdown,
4493 };
4494
4495 MODULE_DEVICE_TABLE(pci, mtip_pci_tbl);
4496
4497 /*
4498  * Module initialization function.
4499  *
4500  * Called once when the module is loaded. This function allocates a major
4501  * block device number to the Cyclone devices and registers the PCI layer
4502  * of the driver.
4503  *
4504  * Return value
4505  *      0 on success else error code.
4506  */
4507 static int __init mtip_init(void)
4508 {
4509         int error;
4510
4511         pr_info(MTIP_DRV_NAME " Version " MTIP_DRV_VERSION "\n");
4512
4513         /* Allocate a major block device number to use with this driver. */
4514         error = register_blkdev(0, MTIP_DRV_NAME);
4515         if (error <= 0) {
4516                 pr_err("Unable to register block device (%d)\n",
4517                 error);
4518                 return -EBUSY;
4519         }
4520         mtip_major = error;
4521
4522         if (!dfs_parent) {
4523                 dfs_parent = debugfs_create_dir("rssd", NULL);
4524                 if (IS_ERR_OR_NULL(dfs_parent)) {
4525                         pr_warn("Error creating debugfs parent\n");
4526                         dfs_parent = NULL;
4527                 }
4528         }
4529
4530         /* Register our PCI operations. */
4531         error = pci_register_driver(&mtip_pci_driver);
4532         if (error) {
4533                 debugfs_remove(dfs_parent);
4534                 unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4535         }
4536
4537         return error;
4538 }
4539
4540 /*
4541  * Module de-initialization function.
4542  *
4543  * Called once when the module is unloaded. This function deallocates
4544  * the major block device number allocated by mtip_init() and
4545  * unregisters the PCI layer of the driver.
4546  *
4547  * Return value
4548  *      none
4549  */
4550 static void __exit mtip_exit(void)
4551 {
4552         debugfs_remove_recursive(dfs_parent);
4553
4554         /* Release the allocated major block device number. */
4555         unregister_blkdev(mtip_major, MTIP_DRV_NAME);
4556
4557         /* Unregister the PCI driver. */
4558         pci_unregister_driver(&mtip_pci_driver);
4559 }
4560
4561 MODULE_AUTHOR("Micron Technology, Inc");
4562 MODULE_DESCRIPTION("Micron RealSSD PCIe Block Driver");
4563 MODULE_LICENSE("GPL");
4564 MODULE_VERSION(MTIP_DRV_VERSION);
4565
4566 module_init(mtip_init);
4567 module_exit(mtip_exit);