drm/amdgpu: Use the default reset when loading or reloading the driver
[platform/kernel/linux-starfive.git] / drivers / gpu / drm / amd / amdgpu / amdgpu_device.c
1 /*
2  * Copyright 2008 Advanced Micro Devices, Inc.
3  * Copyright 2008 Red Hat Inc.
4  * Copyright 2009 Jerome Glisse.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22  * OTHER DEALINGS IN THE SOFTWARE.
23  *
24  * Authors: Dave Airlie
25  *          Alex Deucher
26  *          Jerome Glisse
27  */
28 #include <linux/power_supply.h>
29 #include <linux/kthread.h>
30 #include <linux/module.h>
31 #include <linux/console.h>
32 #include <linux/slab.h>
33 #include <linux/iommu.h>
34 #include <linux/pci.h>
35 #include <linux/devcoredump.h>
36 #include <generated/utsrelease.h>
37 #include <linux/pci-p2pdma.h>
38 #include <linux/apple-gmux.h>
39
40 #include <drm/drm_aperture.h>
41 #include <drm/drm_atomic_helper.h>
42 #include <drm/drm_crtc_helper.h>
43 #include <drm/drm_fb_helper.h>
44 #include <drm/drm_probe_helper.h>
45 #include <drm/amdgpu_drm.h>
46 #include <linux/vgaarb.h>
47 #include <linux/vga_switcheroo.h>
48 #include <linux/efi.h>
49 #include "amdgpu.h"
50 #include "amdgpu_trace.h"
51 #include "amdgpu_i2c.h"
52 #include "atom.h"
53 #include "amdgpu_atombios.h"
54 #include "amdgpu_atomfirmware.h"
55 #include "amd_pcie.h"
56 #ifdef CONFIG_DRM_AMDGPU_SI
57 #include "si.h"
58 #endif
59 #ifdef CONFIG_DRM_AMDGPU_CIK
60 #include "cik.h"
61 #endif
62 #include "vi.h"
63 #include "soc15.h"
64 #include "nv.h"
65 #include "bif/bif_4_1_d.h"
66 #include <linux/firmware.h>
67 #include "amdgpu_vf_error.h"
68
69 #include "amdgpu_amdkfd.h"
70 #include "amdgpu_pm.h"
71
72 #include "amdgpu_xgmi.h"
73 #include "amdgpu_ras.h"
74 #include "amdgpu_pmu.h"
75 #include "amdgpu_fru_eeprom.h"
76 #include "amdgpu_reset.h"
77
78 #include <linux/suspend.h>
79 #include <drm/task_barrier.h>
80 #include <linux/pm_runtime.h>
81
82 #include <drm/drm_drv.h>
83
84 #if IS_ENABLED(CONFIG_X86)
85 #include <asm/intel-family.h>
86 #endif
87
88 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
89 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
90 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
91 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
92 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
93 MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
94 MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
95
96 #define AMDGPU_RESUME_MS                2000
97 #define AMDGPU_MAX_RETRY_LIMIT          2
98 #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL)
99
100 static const struct drm_driver amdgpu_kms_driver;
101
102 const char *amdgpu_asic_name[] = {
103         "TAHITI",
104         "PITCAIRN",
105         "VERDE",
106         "OLAND",
107         "HAINAN",
108         "BONAIRE",
109         "KAVERI",
110         "KABINI",
111         "HAWAII",
112         "MULLINS",
113         "TOPAZ",
114         "TONGA",
115         "FIJI",
116         "CARRIZO",
117         "STONEY",
118         "POLARIS10",
119         "POLARIS11",
120         "POLARIS12",
121         "VEGAM",
122         "VEGA10",
123         "VEGA12",
124         "VEGA20",
125         "RAVEN",
126         "ARCTURUS",
127         "RENOIR",
128         "ALDEBARAN",
129         "NAVI10",
130         "CYAN_SKILLFISH",
131         "NAVI14",
132         "NAVI12",
133         "SIENNA_CICHLID",
134         "NAVY_FLOUNDER",
135         "VANGOGH",
136         "DIMGREY_CAVEFISH",
137         "BEIGE_GOBY",
138         "YELLOW_CARP",
139         "IP DISCOVERY",
140         "LAST",
141 };
142
143 /**
144  * DOC: pcie_replay_count
145  *
146  * The amdgpu driver provides a sysfs API for reporting the total number
147  * of PCIe replays (NAKs)
148  * The file pcie_replay_count is used for this and returns the total
149  * number of replays as a sum of the NAKs generated and NAKs received
150  */
151
152 static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
153                 struct device_attribute *attr, char *buf)
154 {
155         struct drm_device *ddev = dev_get_drvdata(dev);
156         struct amdgpu_device *adev = drm_to_adev(ddev);
157         uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
158
159         return sysfs_emit(buf, "%llu\n", cnt);
160 }
161
162 static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
163                 amdgpu_device_get_pcie_replay_count, NULL);
164
165 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
166
167 /**
168  * DOC: product_name
169  *
170  * The amdgpu driver provides a sysfs API for reporting the product name
171  * for the device
172  * The file product_name is used for this and returns the product name
173  * as returned from the FRU.
174  * NOTE: This is only available for certain server cards
175  */
176
177 static ssize_t amdgpu_device_get_product_name(struct device *dev,
178                 struct device_attribute *attr, char *buf)
179 {
180         struct drm_device *ddev = dev_get_drvdata(dev);
181         struct amdgpu_device *adev = drm_to_adev(ddev);
182
183         return sysfs_emit(buf, "%s\n", adev->product_name);
184 }
185
186 static DEVICE_ATTR(product_name, S_IRUGO,
187                 amdgpu_device_get_product_name, NULL);
188
189 /**
190  * DOC: product_number
191  *
192  * The amdgpu driver provides a sysfs API for reporting the part number
193  * for the device
194  * The file product_number is used for this and returns the part number
195  * as returned from the FRU.
196  * NOTE: This is only available for certain server cards
197  */
198
199 static ssize_t amdgpu_device_get_product_number(struct device *dev,
200                 struct device_attribute *attr, char *buf)
201 {
202         struct drm_device *ddev = dev_get_drvdata(dev);
203         struct amdgpu_device *adev = drm_to_adev(ddev);
204
205         return sysfs_emit(buf, "%s\n", adev->product_number);
206 }
207
208 static DEVICE_ATTR(product_number, S_IRUGO,
209                 amdgpu_device_get_product_number, NULL);
210
211 /**
212  * DOC: serial_number
213  *
214  * The amdgpu driver provides a sysfs API for reporting the serial number
215  * for the device
216  * The file serial_number is used for this and returns the serial number
217  * as returned from the FRU.
218  * NOTE: This is only available for certain server cards
219  */
220
221 static ssize_t amdgpu_device_get_serial_number(struct device *dev,
222                 struct device_attribute *attr, char *buf)
223 {
224         struct drm_device *ddev = dev_get_drvdata(dev);
225         struct amdgpu_device *adev = drm_to_adev(ddev);
226
227         return sysfs_emit(buf, "%s\n", adev->serial);
228 }
229
230 static DEVICE_ATTR(serial_number, S_IRUGO,
231                 amdgpu_device_get_serial_number, NULL);
232
233 /**
234  * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
235  *
236  * @dev: drm_device pointer
237  *
238  * Returns true if the device is a dGPU with ATPX power control,
239  * otherwise return false.
240  */
241 bool amdgpu_device_supports_px(struct drm_device *dev)
242 {
243         struct amdgpu_device *adev = drm_to_adev(dev);
244
245         if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
246                 return true;
247         return false;
248 }
249
250 /**
251  * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
252  *
253  * @dev: drm_device pointer
254  *
255  * Returns true if the device is a dGPU with ACPI power control,
256  * otherwise return false.
257  */
258 bool amdgpu_device_supports_boco(struct drm_device *dev)
259 {
260         struct amdgpu_device *adev = drm_to_adev(dev);
261
262         if (adev->has_pr3 ||
263             ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
264                 return true;
265         return false;
266 }
267
268 /**
269  * amdgpu_device_supports_baco - Does the device support BACO
270  *
271  * @dev: drm_device pointer
272  *
273  * Returns true if the device supporte BACO,
274  * otherwise return false.
275  */
276 bool amdgpu_device_supports_baco(struct drm_device *dev)
277 {
278         struct amdgpu_device *adev = drm_to_adev(dev);
279
280         return amdgpu_asic_supports_baco(adev);
281 }
282
283 /**
284  * amdgpu_device_supports_smart_shift - Is the device dGPU with
285  * smart shift support
286  *
287  * @dev: drm_device pointer
288  *
289  * Returns true if the device is a dGPU with Smart Shift support,
290  * otherwise returns false.
291  */
292 bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
293 {
294         return (amdgpu_device_supports_boco(dev) &&
295                 amdgpu_acpi_is_power_shift_control_supported());
296 }
297
298 /*
299  * VRAM access helper functions
300  */
301
302 /**
303  * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
304  *
305  * @adev: amdgpu_device pointer
306  * @pos: offset of the buffer in vram
307  * @buf: virtual address of the buffer in system memory
308  * @size: read/write size, sizeof(@buf) must > @size
309  * @write: true - write to vram, otherwise - read from vram
310  */
311 void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
312                              void *buf, size_t size, bool write)
313 {
314         unsigned long flags;
315         uint32_t hi = ~0, tmp = 0;
316         uint32_t *data = buf;
317         uint64_t last;
318         int idx;
319
320         if (!drm_dev_enter(adev_to_drm(adev), &idx))
321                 return;
322
323         BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4));
324
325         spin_lock_irqsave(&adev->mmio_idx_lock, flags);
326         for (last = pos + size; pos < last; pos += 4) {
327                 tmp = pos >> 31;
328
329                 WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
330                 if (tmp != hi) {
331                         WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
332                         hi = tmp;
333                 }
334                 if (write)
335                         WREG32_NO_KIQ(mmMM_DATA, *data++);
336                 else
337                         *data++ = RREG32_NO_KIQ(mmMM_DATA);
338         }
339
340         spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
341         drm_dev_exit(idx);
342 }
343
344 /**
345  * amdgpu_device_aper_access - access vram by vram aperature
346  *
347  * @adev: amdgpu_device pointer
348  * @pos: offset of the buffer in vram
349  * @buf: virtual address of the buffer in system memory
350  * @size: read/write size, sizeof(@buf) must > @size
351  * @write: true - write to vram, otherwise - read from vram
352  *
353  * The return value means how many bytes have been transferred.
354  */
355 size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
356                                  void *buf, size_t size, bool write)
357 {
358 #ifdef CONFIG_64BIT
359         void __iomem *addr;
360         size_t count = 0;
361         uint64_t last;
362
363         if (!adev->mman.aper_base_kaddr)
364                 return 0;
365
366         last = min(pos + size, adev->gmc.visible_vram_size);
367         if (last > pos) {
368                 addr = adev->mman.aper_base_kaddr + pos;
369                 count = last - pos;
370
371                 if (write) {
372                         memcpy_toio(addr, buf, count);
373                         mb();
374                         amdgpu_device_flush_hdp(adev, NULL);
375                 } else {
376                         amdgpu_device_invalidate_hdp(adev, NULL);
377                         mb();
378                         memcpy_fromio(buf, addr, count);
379                 }
380
381         }
382
383         return count;
384 #else
385         return 0;
386 #endif
387 }
388
389 /**
390  * amdgpu_device_vram_access - read/write a buffer in vram
391  *
392  * @adev: amdgpu_device pointer
393  * @pos: offset of the buffer in vram
394  * @buf: virtual address of the buffer in system memory
395  * @size: read/write size, sizeof(@buf) must > @size
396  * @write: true - write to vram, otherwise - read from vram
397  */
398 void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
399                                void *buf, size_t size, bool write)
400 {
401         size_t count;
402
403         /* try to using vram apreature to access vram first */
404         count = amdgpu_device_aper_access(adev, pos, buf, size, write);
405         size -= count;
406         if (size) {
407                 /* using MM to access rest vram */
408                 pos += count;
409                 buf += count;
410                 amdgpu_device_mm_access(adev, pos, buf, size, write);
411         }
412 }
413
414 /*
415  * register access helper functions.
416  */
417
418 /* Check if hw access should be skipped because of hotplug or device error */
419 bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
420 {
421         if (adev->no_hw_access)
422                 return true;
423
424 #ifdef CONFIG_LOCKDEP
425         /*
426          * This is a bit complicated to understand, so worth a comment. What we assert
427          * here is that the GPU reset is not running on another thread in parallel.
428          *
429          * For this we trylock the read side of the reset semaphore, if that succeeds
430          * we know that the reset is not running in paralell.
431          *
432          * If the trylock fails we assert that we are either already holding the read
433          * side of the lock or are the reset thread itself and hold the write side of
434          * the lock.
435          */
436         if (in_task()) {
437                 if (down_read_trylock(&adev->reset_domain->sem))
438                         up_read(&adev->reset_domain->sem);
439                 else
440                         lockdep_assert_held(&adev->reset_domain->sem);
441         }
442 #endif
443         return false;
444 }
445
446 /**
447  * amdgpu_device_rreg - read a memory mapped IO or indirect register
448  *
449  * @adev: amdgpu_device pointer
450  * @reg: dword aligned register offset
451  * @acc_flags: access flags which require special behavior
452  *
453  * Returns the 32 bit value from the offset specified.
454  */
455 uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
456                             uint32_t reg, uint32_t acc_flags)
457 {
458         uint32_t ret;
459
460         if (amdgpu_device_skip_hw_access(adev))
461                 return 0;
462
463         if ((reg * 4) < adev->rmmio_size) {
464                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
465                     amdgpu_sriov_runtime(adev) &&
466                     down_read_trylock(&adev->reset_domain->sem)) {
467                         ret = amdgpu_kiq_rreg(adev, reg);
468                         up_read(&adev->reset_domain->sem);
469                 } else {
470                         ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
471                 }
472         } else {
473                 ret = adev->pcie_rreg(adev, reg * 4);
474         }
475
476         trace_amdgpu_device_rreg(adev->pdev->device, reg, ret);
477
478         return ret;
479 }
480
481 /*
482  * MMIO register read with bytes helper functions
483  * @offset:bytes offset from MMIO start
484  *
485 */
486
487 /**
488  * amdgpu_mm_rreg8 - read a memory mapped IO register
489  *
490  * @adev: amdgpu_device pointer
491  * @offset: byte aligned register offset
492  *
493  * Returns the 8 bit value from the offset specified.
494  */
495 uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
496 {
497         if (amdgpu_device_skip_hw_access(adev))
498                 return 0;
499
500         if (offset < adev->rmmio_size)
501                 return (readb(adev->rmmio + offset));
502         BUG();
503 }
504
505 /*
506  * MMIO register write with bytes helper functions
507  * @offset:bytes offset from MMIO start
508  * @value: the value want to be written to the register
509  *
510 */
511 /**
512  * amdgpu_mm_wreg8 - read a memory mapped IO register
513  *
514  * @adev: amdgpu_device pointer
515  * @offset: byte aligned register offset
516  * @value: 8 bit value to write
517  *
518  * Writes the value specified to the offset specified.
519  */
520 void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
521 {
522         if (amdgpu_device_skip_hw_access(adev))
523                 return;
524
525         if (offset < adev->rmmio_size)
526                 writeb(value, adev->rmmio + offset);
527         else
528                 BUG();
529 }
530
531 /**
532  * amdgpu_device_wreg - write to a memory mapped IO or indirect register
533  *
534  * @adev: amdgpu_device pointer
535  * @reg: dword aligned register offset
536  * @v: 32 bit value to write to the register
537  * @acc_flags: access flags which require special behavior
538  *
539  * Writes the value specified to the offset specified.
540  */
541 void amdgpu_device_wreg(struct amdgpu_device *adev,
542                         uint32_t reg, uint32_t v,
543                         uint32_t acc_flags)
544 {
545         if (amdgpu_device_skip_hw_access(adev))
546                 return;
547
548         if ((reg * 4) < adev->rmmio_size) {
549                 if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
550                     amdgpu_sriov_runtime(adev) &&
551                     down_read_trylock(&adev->reset_domain->sem)) {
552                         amdgpu_kiq_wreg(adev, reg, v);
553                         up_read(&adev->reset_domain->sem);
554                 } else {
555                         writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
556                 }
557         } else {
558                 adev->pcie_wreg(adev, reg * 4, v);
559         }
560
561         trace_amdgpu_device_wreg(adev->pdev->device, reg, v);
562 }
563
564 /**
565  * amdgpu_mm_wreg_mmio_rlc -  write register either with direct/indirect mmio or with RLC path if in range
566  *
567  * @adev: amdgpu_device pointer
568  * @reg: mmio/rlc register
569  * @v: value to write
570  *
571  * this function is invoked only for the debugfs register access
572  */
573 void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
574                              uint32_t reg, uint32_t v)
575 {
576         if (amdgpu_device_skip_hw_access(adev))
577                 return;
578
579         if (amdgpu_sriov_fullaccess(adev) &&
580             adev->gfx.rlc.funcs &&
581             adev->gfx.rlc.funcs->is_rlcg_access_range) {
582                 if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
583                         return amdgpu_sriov_wreg(adev, reg, v, 0, 0);
584         } else if ((reg * 4) >= adev->rmmio_size) {
585                 adev->pcie_wreg(adev, reg * 4, v);
586         } else {
587                 writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
588         }
589 }
590
591 /**
592  * amdgpu_mm_rdoorbell - read a doorbell dword
593  *
594  * @adev: amdgpu_device pointer
595  * @index: doorbell index
596  *
597  * Returns the value in the doorbell aperture at the
598  * requested doorbell index (CIK).
599  */
600 u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index)
601 {
602         if (amdgpu_device_skip_hw_access(adev))
603                 return 0;
604
605         if (index < adev->doorbell.num_kernel_doorbells) {
606                 return readl(adev->doorbell.ptr + index);
607         } else {
608                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
609                 return 0;
610         }
611 }
612
613 /**
614  * amdgpu_mm_wdoorbell - write a doorbell dword
615  *
616  * @adev: amdgpu_device pointer
617  * @index: doorbell index
618  * @v: value to write
619  *
620  * Writes @v to the doorbell aperture at the
621  * requested doorbell index (CIK).
622  */
623 void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v)
624 {
625         if (amdgpu_device_skip_hw_access(adev))
626                 return;
627
628         if (index < adev->doorbell.num_kernel_doorbells) {
629                 writel(v, adev->doorbell.ptr + index);
630         } else {
631                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
632         }
633 }
634
635 /**
636  * amdgpu_mm_rdoorbell64 - read a doorbell Qword
637  *
638  * @adev: amdgpu_device pointer
639  * @index: doorbell index
640  *
641  * Returns the value in the doorbell aperture at the
642  * requested doorbell index (VEGA10+).
643  */
644 u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index)
645 {
646         if (amdgpu_device_skip_hw_access(adev))
647                 return 0;
648
649         if (index < adev->doorbell.num_kernel_doorbells) {
650                 return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index));
651         } else {
652                 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
653                 return 0;
654         }
655 }
656
657 /**
658  * amdgpu_mm_wdoorbell64 - write a doorbell Qword
659  *
660  * @adev: amdgpu_device pointer
661  * @index: doorbell index
662  * @v: value to write
663  *
664  * Writes @v to the doorbell aperture at the
665  * requested doorbell index (VEGA10+).
666  */
667 void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v)
668 {
669         if (amdgpu_device_skip_hw_access(adev))
670                 return;
671
672         if (index < adev->doorbell.num_kernel_doorbells) {
673                 atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v);
674         } else {
675                 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
676         }
677 }
678
679 /**
680  * amdgpu_device_indirect_rreg - read an indirect register
681  *
682  * @adev: amdgpu_device pointer
683  * @reg_addr: indirect register address to read from
684  *
685  * Returns the value of indirect register @reg_addr
686  */
687 u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
688                                 u32 reg_addr)
689 {
690         unsigned long flags, pcie_index, pcie_data;
691         void __iomem *pcie_index_offset;
692         void __iomem *pcie_data_offset;
693         u32 r;
694
695         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
696         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
697
698         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
699         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
700         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
701
702         writel(reg_addr, pcie_index_offset);
703         readl(pcie_index_offset);
704         r = readl(pcie_data_offset);
705         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
706
707         return r;
708 }
709
710 /**
711  * amdgpu_device_indirect_rreg64 - read a 64bits indirect register
712  *
713  * @adev: amdgpu_device pointer
714  * @reg_addr: indirect register address to read from
715  *
716  * Returns the value of indirect register @reg_addr
717  */
718 u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
719                                   u32 reg_addr)
720 {
721         unsigned long flags, pcie_index, pcie_data;
722         void __iomem *pcie_index_offset;
723         void __iomem *pcie_data_offset;
724         u64 r;
725
726         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
727         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
728
729         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
730         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
731         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
732
733         /* read low 32 bits */
734         writel(reg_addr, pcie_index_offset);
735         readl(pcie_index_offset);
736         r = readl(pcie_data_offset);
737         /* read high 32 bits */
738         writel(reg_addr + 4, pcie_index_offset);
739         readl(pcie_index_offset);
740         r |= ((u64)readl(pcie_data_offset) << 32);
741         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
742
743         return r;
744 }
745
746 /**
747  * amdgpu_device_indirect_wreg - write an indirect register address
748  *
749  * @adev: amdgpu_device pointer
750  * @pcie_index: mmio register offset
751  * @pcie_data: mmio register offset
752  * @reg_addr: indirect register offset
753  * @reg_data: indirect register data
754  *
755  */
756 void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
757                                  u32 reg_addr, u32 reg_data)
758 {
759         unsigned long flags, pcie_index, pcie_data;
760         void __iomem *pcie_index_offset;
761         void __iomem *pcie_data_offset;
762
763         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
764         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
765
766         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
767         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
768         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
769
770         writel(reg_addr, pcie_index_offset);
771         readl(pcie_index_offset);
772         writel(reg_data, pcie_data_offset);
773         readl(pcie_data_offset);
774         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
775 }
776
777 /**
778  * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
779  *
780  * @adev: amdgpu_device pointer
781  * @pcie_index: mmio register offset
782  * @pcie_data: mmio register offset
783  * @reg_addr: indirect register offset
784  * @reg_data: indirect register data
785  *
786  */
787 void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
788                                    u32 reg_addr, u64 reg_data)
789 {
790         unsigned long flags, pcie_index, pcie_data;
791         void __iomem *pcie_index_offset;
792         void __iomem *pcie_data_offset;
793
794         pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
795         pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
796
797         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
798         pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4;
799         pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4;
800
801         /* write low 32 bits */
802         writel(reg_addr, pcie_index_offset);
803         readl(pcie_index_offset);
804         writel((u32)(reg_data & 0xffffffffULL), pcie_data_offset);
805         readl(pcie_data_offset);
806         /* write high 32 bits */
807         writel(reg_addr + 4, pcie_index_offset);
808         readl(pcie_index_offset);
809         writel((u32)(reg_data >> 32), pcie_data_offset);
810         readl(pcie_data_offset);
811         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
812 }
813
814 /**
815  * amdgpu_device_get_rev_id - query device rev_id
816  *
817  * @adev: amdgpu_device pointer
818  *
819  * Return device rev_id
820  */
821 u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
822 {
823         return adev->nbio.funcs->get_rev_id(adev);
824 }
825
826 /**
827  * amdgpu_invalid_rreg - dummy reg read function
828  *
829  * @adev: amdgpu_device pointer
830  * @reg: offset of register
831  *
832  * Dummy register read function.  Used for register blocks
833  * that certain asics don't have (all asics).
834  * Returns the value in the register.
835  */
836 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
837 {
838         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
839         BUG();
840         return 0;
841 }
842
843 /**
844  * amdgpu_invalid_wreg - dummy reg write function
845  *
846  * @adev: amdgpu_device pointer
847  * @reg: offset of register
848  * @v: value to write to the register
849  *
850  * Dummy register read function.  Used for register blocks
851  * that certain asics don't have (all asics).
852  */
853 static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
854 {
855         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
856                   reg, v);
857         BUG();
858 }
859
860 /**
861  * amdgpu_invalid_rreg64 - dummy 64 bit reg read function
862  *
863  * @adev: amdgpu_device pointer
864  * @reg: offset of register
865  *
866  * Dummy register read function.  Used for register blocks
867  * that certain asics don't have (all asics).
868  * Returns the value in the register.
869  */
870 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
871 {
872         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
873         BUG();
874         return 0;
875 }
876
877 /**
878  * amdgpu_invalid_wreg64 - dummy reg write function
879  *
880  * @adev: amdgpu_device pointer
881  * @reg: offset of register
882  * @v: value to write to the register
883  *
884  * Dummy register read function.  Used for register blocks
885  * that certain asics don't have (all asics).
886  */
887 static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
888 {
889         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
890                   reg, v);
891         BUG();
892 }
893
894 /**
895  * amdgpu_block_invalid_rreg - dummy reg read function
896  *
897  * @adev: amdgpu_device pointer
898  * @block: offset of instance
899  * @reg: offset of register
900  *
901  * Dummy register read function.  Used for register blocks
902  * that certain asics don't have (all asics).
903  * Returns the value in the register.
904  */
905 static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
906                                           uint32_t block, uint32_t reg)
907 {
908         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
909                   reg, block);
910         BUG();
911         return 0;
912 }
913
914 /**
915  * amdgpu_block_invalid_wreg - dummy reg write function
916  *
917  * @adev: amdgpu_device pointer
918  * @block: offset of instance
919  * @reg: offset of register
920  * @v: value to write to the register
921  *
922  * Dummy register read function.  Used for register blocks
923  * that certain asics don't have (all asics).
924  */
925 static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
926                                       uint32_t block,
927                                       uint32_t reg, uint32_t v)
928 {
929         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
930                   reg, block, v);
931         BUG();
932 }
933
934 /**
935  * amdgpu_device_asic_init - Wrapper for atom asic_init
936  *
937  * @adev: amdgpu_device pointer
938  *
939  * Does any asic specific work and then calls atom asic init.
940  */
941 static int amdgpu_device_asic_init(struct amdgpu_device *adev)
942 {
943         amdgpu_asic_pre_asic_init(adev);
944
945         if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
946                 return amdgpu_atomfirmware_asic_init(adev, true);
947         else
948                 return amdgpu_atom_asic_init(adev->mode_info.atom_context);
949 }
950
951 /**
952  * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
953  *
954  * @adev: amdgpu_device pointer
955  *
956  * Allocates a scratch page of VRAM for use by various things in the
957  * driver.
958  */
959 static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
960 {
961         return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
962                                        AMDGPU_GEM_DOMAIN_VRAM |
963                                        AMDGPU_GEM_DOMAIN_GTT,
964                                        &adev->mem_scratch.robj,
965                                        &adev->mem_scratch.gpu_addr,
966                                        (void **)&adev->mem_scratch.ptr);
967 }
968
969 /**
970  * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
971  *
972  * @adev: amdgpu_device pointer
973  *
974  * Frees the VRAM scratch page.
975  */
976 static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
977 {
978         amdgpu_bo_free_kernel(&adev->mem_scratch.robj, NULL, NULL);
979 }
980
981 /**
982  * amdgpu_device_program_register_sequence - program an array of registers.
983  *
984  * @adev: amdgpu_device pointer
985  * @registers: pointer to the register array
986  * @array_size: size of the register array
987  *
988  * Programs an array or registers with and and or masks.
989  * This is a helper for setting golden registers.
990  */
991 void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
992                                              const u32 *registers,
993                                              const u32 array_size)
994 {
995         u32 tmp, reg, and_mask, or_mask;
996         int i;
997
998         if (array_size % 3)
999                 return;
1000
1001         for (i = 0; i < array_size; i += 3) {
1002                 reg = registers[i + 0];
1003                 and_mask = registers[i + 1];
1004                 or_mask = registers[i + 2];
1005
1006                 if (and_mask == 0xffffffff) {
1007                         tmp = or_mask;
1008                 } else {
1009                         tmp = RREG32(reg);
1010                         tmp &= ~and_mask;
1011                         if (adev->family >= AMDGPU_FAMILY_AI)
1012                                 tmp |= (or_mask & and_mask);
1013                         else
1014                                 tmp |= or_mask;
1015                 }
1016                 WREG32(reg, tmp);
1017         }
1018 }
1019
1020 /**
1021  * amdgpu_device_pci_config_reset - reset the GPU
1022  *
1023  * @adev: amdgpu_device pointer
1024  *
1025  * Resets the GPU using the pci config reset sequence.
1026  * Only applicable to asics prior to vega10.
1027  */
1028 void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1029 {
1030         pci_write_config_dword(adev->pdev, 0x7c, AMDGPU_ASIC_RESET_DATA);
1031 }
1032
1033 /**
1034  * amdgpu_device_pci_reset - reset the GPU using generic PCI means
1035  *
1036  * @adev: amdgpu_device pointer
1037  *
1038  * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1039  */
1040 int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1041 {
1042         return pci_reset_function(adev->pdev);
1043 }
1044
1045 /*
1046  * GPU doorbell aperture helpers function.
1047  */
1048 /**
1049  * amdgpu_device_doorbell_init - Init doorbell driver information.
1050  *
1051  * @adev: amdgpu_device pointer
1052  *
1053  * Init doorbell driver information (CIK)
1054  * Returns 0 on success, error on failure.
1055  */
1056 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
1057 {
1058
1059         /* No doorbell on SI hardware generation */
1060         if (adev->asic_type < CHIP_BONAIRE) {
1061                 adev->doorbell.base = 0;
1062                 adev->doorbell.size = 0;
1063                 adev->doorbell.num_kernel_doorbells = 0;
1064                 adev->doorbell.ptr = NULL;
1065                 return 0;
1066         }
1067
1068         if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
1069                 return -EINVAL;
1070
1071         amdgpu_asic_init_doorbell_index(adev);
1072
1073         /* doorbell bar mapping */
1074         adev->doorbell.base = pci_resource_start(adev->pdev, 2);
1075         adev->doorbell.size = pci_resource_len(adev->pdev, 2);
1076
1077         if (adev->enable_mes) {
1078                 adev->doorbell.num_kernel_doorbells =
1079                         adev->doorbell.size / sizeof(u32);
1080         } else {
1081                 adev->doorbell.num_kernel_doorbells =
1082                         min_t(u32, adev->doorbell.size / sizeof(u32),
1083                               adev->doorbell_index.max_assignment+1);
1084                 if (adev->doorbell.num_kernel_doorbells == 0)
1085                         return -EINVAL;
1086
1087                 /* For Vega, reserve and map two pages on doorbell BAR since SDMA
1088                  * paging queue doorbell use the second page. The
1089                  * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the
1090                  * doorbells are in the first page. So with paging queue enabled,
1091                  * the max num_kernel_doorbells should + 1 page (0x400 in dword)
1092                  */
1093                 if (adev->asic_type >= CHIP_VEGA10)
1094                         adev->doorbell.num_kernel_doorbells += 0x400;
1095         }
1096
1097         adev->doorbell.ptr = ioremap(adev->doorbell.base,
1098                                      adev->doorbell.num_kernel_doorbells *
1099                                      sizeof(u32));
1100         if (adev->doorbell.ptr == NULL)
1101                 return -ENOMEM;
1102
1103         return 0;
1104 }
1105
1106 /**
1107  * amdgpu_device_doorbell_fini - Tear down doorbell driver information.
1108  *
1109  * @adev: amdgpu_device pointer
1110  *
1111  * Tear down doorbell driver information (CIK)
1112  */
1113 static void amdgpu_device_doorbell_fini(struct amdgpu_device *adev)
1114 {
1115         iounmap(adev->doorbell.ptr);
1116         adev->doorbell.ptr = NULL;
1117 }
1118
1119
1120
1121 /*
1122  * amdgpu_device_wb_*()
1123  * Writeback is the method by which the GPU updates special pages in memory
1124  * with the status of certain GPU events (fences, ring pointers,etc.).
1125  */
1126
1127 /**
1128  * amdgpu_device_wb_fini - Disable Writeback and free memory
1129  *
1130  * @adev: amdgpu_device pointer
1131  *
1132  * Disables Writeback and frees the Writeback memory (all asics).
1133  * Used at driver shutdown.
1134  */
1135 static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1136 {
1137         if (adev->wb.wb_obj) {
1138                 amdgpu_bo_free_kernel(&adev->wb.wb_obj,
1139                                       &adev->wb.gpu_addr,
1140                                       (void **)&adev->wb.wb);
1141                 adev->wb.wb_obj = NULL;
1142         }
1143 }
1144
1145 /**
1146  * amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1147  *
1148  * @adev: amdgpu_device pointer
1149  *
1150  * Initializes writeback and allocates writeback memory (all asics).
1151  * Used at driver startup.
1152  * Returns 0 on success or an -error on failure.
1153  */
1154 static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1155 {
1156         int r;
1157
1158         if (adev->wb.wb_obj == NULL) {
1159                 /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */
1160                 r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8,
1161                                             PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1162                                             &adev->wb.wb_obj, &adev->wb.gpu_addr,
1163                                             (void **)&adev->wb.wb);
1164                 if (r) {
1165                         dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1166                         return r;
1167                 }
1168
1169                 adev->wb.num_wb = AMDGPU_MAX_WB;
1170                 memset(&adev->wb.used, 0, sizeof(adev->wb.used));
1171
1172                 /* clear wb memory */
1173                 memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8);
1174         }
1175
1176         return 0;
1177 }
1178
1179 /**
1180  * amdgpu_device_wb_get - Allocate a wb entry
1181  *
1182  * @adev: amdgpu_device pointer
1183  * @wb: wb index
1184  *
1185  * Allocate a wb slot for use by the driver (all asics).
1186  * Returns 0 on success or -EINVAL on failure.
1187  */
1188 int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb)
1189 {
1190         unsigned long offset = find_first_zero_bit(adev->wb.used, adev->wb.num_wb);
1191
1192         if (offset < adev->wb.num_wb) {
1193                 __set_bit(offset, adev->wb.used);
1194                 *wb = offset << 3; /* convert to dw offset */
1195                 return 0;
1196         } else {
1197                 return -EINVAL;
1198         }
1199 }
1200
1201 /**
1202  * amdgpu_device_wb_free - Free a wb entry
1203  *
1204  * @adev: amdgpu_device pointer
1205  * @wb: wb index
1206  *
1207  * Free a wb slot allocated for use by the driver (all asics)
1208  */
1209 void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1210 {
1211         wb >>= 3;
1212         if (wb < adev->wb.num_wb)
1213                 __clear_bit(wb, adev->wb.used);
1214 }
1215
1216 /**
1217  * amdgpu_device_resize_fb_bar - try to resize FB BAR
1218  *
1219  * @adev: amdgpu_device pointer
1220  *
1221  * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1222  * to fail, but if any of the BARs is not accessible after the size we abort
1223  * driver loading by returning -ENODEV.
1224  */
1225 int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1226 {
1227         int rbar_size = pci_rebar_bytes_to_size(adev->gmc.real_vram_size);
1228         struct pci_bus *root;
1229         struct resource *res;
1230         unsigned i;
1231         u16 cmd;
1232         int r;
1233
1234         /* Bypass for VF */
1235         if (amdgpu_sriov_vf(adev))
1236                 return 0;
1237
1238         /* skip if the bios has already enabled large BAR */
1239         if (adev->gmc.real_vram_size &&
1240             (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size))
1241                 return 0;
1242
1243         /* Check if the root BUS has 64bit memory resources */
1244         root = adev->pdev->bus;
1245         while (root->parent)
1246                 root = root->parent;
1247
1248         pci_bus_for_each_resource(root, res, i) {
1249                 if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) &&
1250                     res->start > 0x100000000ull)
1251                         break;
1252         }
1253
1254         /* Trying to resize is pointless without a root hub window above 4GB */
1255         if (!res)
1256                 return 0;
1257
1258         /* Limit the BAR size to what is available */
1259         rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1,
1260                         rbar_size);
1261
1262         /* Disable memory decoding while we change the BAR addresses and size */
1263         pci_read_config_word(adev->pdev, PCI_COMMAND, &cmd);
1264         pci_write_config_word(adev->pdev, PCI_COMMAND,
1265                               cmd & ~PCI_COMMAND_MEMORY);
1266
1267         /* Free the VRAM and doorbell BAR, we most likely need to move both. */
1268         amdgpu_device_doorbell_fini(adev);
1269         if (adev->asic_type >= CHIP_BONAIRE)
1270                 pci_release_resource(adev->pdev, 2);
1271
1272         pci_release_resource(adev->pdev, 0);
1273
1274         r = pci_resize_resource(adev->pdev, 0, rbar_size);
1275         if (r == -ENOSPC)
1276                 DRM_INFO("Not enough PCI address space for a large BAR.");
1277         else if (r && r != -ENOTSUPP)
1278                 DRM_ERROR("Problem resizing BAR0 (%d).", r);
1279
1280         pci_assign_unassigned_bus_resources(adev->pdev->bus);
1281
1282         /* When the doorbell or fb BAR isn't available we have no chance of
1283          * using the device.
1284          */
1285         r = amdgpu_device_doorbell_init(adev);
1286         if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET))
1287                 return -ENODEV;
1288
1289         pci_write_config_word(adev->pdev, PCI_COMMAND, cmd);
1290
1291         return 0;
1292 }
1293
1294 /*
1295  * GPU helpers function.
1296  */
1297 /**
1298  * amdgpu_device_need_post - check if the hw need post or not
1299  *
1300  * @adev: amdgpu_device pointer
1301  *
1302  * Check if the asic has been initialized (all asics) at driver startup
1303  * or post is needed if  hw reset is performed.
1304  * Returns true if need or false if not.
1305  */
1306 bool amdgpu_device_need_post(struct amdgpu_device *adev)
1307 {
1308         uint32_t reg;
1309
1310         if (amdgpu_sriov_vf(adev))
1311                 return false;
1312
1313         if (amdgpu_passthrough(adev)) {
1314                 /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot
1315                  * some old smc fw still need driver do vPost otherwise gpu hang, while
1316                  * those smc fw version above 22.15 doesn't have this flaw, so we force
1317                  * vpost executed for smc version below 22.15
1318                  */
1319                 if (adev->asic_type == CHIP_FIJI) {
1320                         int err;
1321                         uint32_t fw_ver;
1322                         err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev);
1323                         /* force vPost if error occured */
1324                         if (err)
1325                                 return true;
1326
1327                         fw_ver = *((uint32_t *)adev->pm.fw->data + 69);
1328                         if (fw_ver < 0x00160e00)
1329                                 return true;
1330                 }
1331         }
1332
1333         /* Don't post if we need to reset whole hive on init */
1334         if (adev->gmc.xgmi.pending_reset)
1335                 return false;
1336
1337         if (adev->has_hw_reset) {
1338                 adev->has_hw_reset = false;
1339                 return true;
1340         }
1341
1342         /* bios scratch used on CIK+ */
1343         if (adev->asic_type >= CHIP_BONAIRE)
1344                 return amdgpu_atombios_scratch_need_asic_init(adev);
1345
1346         /* check MEM_SIZE for older asics */
1347         reg = amdgpu_asic_get_config_memsize(adev);
1348
1349         if ((reg != 0) && (reg != 0xffffffff))
1350                 return false;
1351
1352         return true;
1353 }
1354
1355 /**
1356  * amdgpu_device_should_use_aspm - check if the device should program ASPM
1357  *
1358  * @adev: amdgpu_device pointer
1359  *
1360  * Confirm whether the module parameter and pcie bridge agree that ASPM should
1361  * be set for this device.
1362  *
1363  * Returns true if it should be used or false if not.
1364  */
1365 bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1366 {
1367         switch (amdgpu_aspm) {
1368         case -1:
1369                 break;
1370         case 0:
1371                 return false;
1372         case 1:
1373                 return true;
1374         default:
1375                 return false;
1376         }
1377         return pcie_aspm_enabled(adev->pdev);
1378 }
1379
1380 bool amdgpu_device_aspm_support_quirk(void)
1381 {
1382 #if IS_ENABLED(CONFIG_X86)
1383         struct cpuinfo_x86 *c = &cpu_data(0);
1384
1385         return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
1386 #else
1387         return true;
1388 #endif
1389 }
1390
1391 /* if we get transitioned to only one device, take VGA back */
1392 /**
1393  * amdgpu_device_vga_set_decode - enable/disable vga decode
1394  *
1395  * @pdev: PCI device pointer
1396  * @state: enable/disable vga decode
1397  *
1398  * Enable/disable vga decode (all asics).
1399  * Returns VGA resource flags.
1400  */
1401 static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1402                 bool state)
1403 {
1404         struct amdgpu_device *adev = drm_to_adev(pci_get_drvdata(pdev));
1405         amdgpu_asic_set_vga_state(adev, state);
1406         if (state)
1407                 return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM |
1408                        VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1409         else
1410                 return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
1411 }
1412
1413 /**
1414  * amdgpu_device_check_block_size - validate the vm block size
1415  *
1416  * @adev: amdgpu_device pointer
1417  *
1418  * Validates the vm block size specified via module parameter.
1419  * The vm block size defines number of bits in page table versus page directory,
1420  * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1421  * page table and the remaining bits are in the page directory.
1422  */
1423 static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1424 {
1425         /* defines number of bits in page table versus page directory,
1426          * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1427          * page table and the remaining bits are in the page directory */
1428         if (amdgpu_vm_block_size == -1)
1429                 return;
1430
1431         if (amdgpu_vm_block_size < 9) {
1432                 dev_warn(adev->dev, "VM page table size (%d) too small\n",
1433                          amdgpu_vm_block_size);
1434                 amdgpu_vm_block_size = -1;
1435         }
1436 }
1437
1438 /**
1439  * amdgpu_device_check_vm_size - validate the vm size
1440  *
1441  * @adev: amdgpu_device pointer
1442  *
1443  * Validates the vm size in GB specified via module parameter.
1444  * The VM size is the size of the GPU virtual memory space in GB.
1445  */
1446 static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1447 {
1448         /* no need to check the default value */
1449         if (amdgpu_vm_size == -1)
1450                 return;
1451
1452         if (amdgpu_vm_size < 1) {
1453                 dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
1454                          amdgpu_vm_size);
1455                 amdgpu_vm_size = -1;
1456         }
1457 }
1458
1459 static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
1460 {
1461         struct sysinfo si;
1462         bool is_os_64 = (sizeof(void *) == 8);
1463         uint64_t total_memory;
1464         uint64_t dram_size_seven_GB = 0x1B8000000;
1465         uint64_t dram_size_three_GB = 0xB8000000;
1466
1467         if (amdgpu_smu_memory_pool_size == 0)
1468                 return;
1469
1470         if (!is_os_64) {
1471                 DRM_WARN("Not 64-bit OS, feature not supported\n");
1472                 goto def_value;
1473         }
1474         si_meminfo(&si);
1475         total_memory = (uint64_t)si.totalram * si.mem_unit;
1476
1477         if ((amdgpu_smu_memory_pool_size == 1) ||
1478                 (amdgpu_smu_memory_pool_size == 2)) {
1479                 if (total_memory < dram_size_three_GB)
1480                         goto def_value1;
1481         } else if ((amdgpu_smu_memory_pool_size == 4) ||
1482                 (amdgpu_smu_memory_pool_size == 8)) {
1483                 if (total_memory < dram_size_seven_GB)
1484                         goto def_value1;
1485         } else {
1486                 DRM_WARN("Smu memory pool size not supported\n");
1487                 goto def_value;
1488         }
1489         adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28;
1490
1491         return;
1492
1493 def_value1:
1494         DRM_WARN("No enough system memory\n");
1495 def_value:
1496         adev->pm.smu_prv_buffer_size = 0;
1497 }
1498
1499 static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
1500 {
1501         if (!(adev->flags & AMD_IS_APU) ||
1502             adev->asic_type < CHIP_RAVEN)
1503                 return 0;
1504
1505         switch (adev->asic_type) {
1506         case CHIP_RAVEN:
1507                 if (adev->pdev->device == 0x15dd)
1508                         adev->apu_flags |= AMD_APU_IS_RAVEN;
1509                 if (adev->pdev->device == 0x15d8)
1510                         adev->apu_flags |= AMD_APU_IS_PICASSO;
1511                 break;
1512         case CHIP_RENOIR:
1513                 if ((adev->pdev->device == 0x1636) ||
1514                     (adev->pdev->device == 0x164c))
1515                         adev->apu_flags |= AMD_APU_IS_RENOIR;
1516                 else
1517                         adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE;
1518                 break;
1519         case CHIP_VANGOGH:
1520                 adev->apu_flags |= AMD_APU_IS_VANGOGH;
1521                 break;
1522         case CHIP_YELLOW_CARP:
1523                 break;
1524         case CHIP_CYAN_SKILLFISH:
1525                 if ((adev->pdev->device == 0x13FE) ||
1526                     (adev->pdev->device == 0x143F))
1527                         adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2;
1528                 break;
1529         default:
1530                 break;
1531         }
1532
1533         return 0;
1534 }
1535
1536 /**
1537  * amdgpu_device_check_arguments - validate module params
1538  *
1539  * @adev: amdgpu_device pointer
1540  *
1541  * Validates certain module parameters and updates
1542  * the associated values used by the driver (all asics).
1543  */
1544 static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
1545 {
1546         if (amdgpu_sched_jobs < 4) {
1547                 dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
1548                          amdgpu_sched_jobs);
1549                 amdgpu_sched_jobs = 4;
1550         } else if (!is_power_of_2(amdgpu_sched_jobs)) {
1551                 dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
1552                          amdgpu_sched_jobs);
1553                 amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
1554         }
1555
1556         if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) {
1557                 /* gart size must be greater or equal to 32M */
1558                 dev_warn(adev->dev, "gart size (%d) too small\n",
1559                          amdgpu_gart_size);
1560                 amdgpu_gart_size = -1;
1561         }
1562
1563         if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) {
1564                 /* gtt size must be greater or equal to 32M */
1565                 dev_warn(adev->dev, "gtt size (%d) too small\n",
1566                                  amdgpu_gtt_size);
1567                 amdgpu_gtt_size = -1;
1568         }
1569
1570         /* valid range is between 4 and 9 inclusive */
1571         if (amdgpu_vm_fragment_size != -1 &&
1572             (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) {
1573                 dev_warn(adev->dev, "valid range is between 4 and 9\n");
1574                 amdgpu_vm_fragment_size = -1;
1575         }
1576
1577         if (amdgpu_sched_hw_submission < 2) {
1578                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
1579                          amdgpu_sched_hw_submission);
1580                 amdgpu_sched_hw_submission = 2;
1581         } else if (!is_power_of_2(amdgpu_sched_hw_submission)) {
1582                 dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
1583                          amdgpu_sched_hw_submission);
1584                 amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
1585         }
1586
1587         if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) {
1588                 dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
1589                 amdgpu_reset_method = -1;
1590         }
1591
1592         amdgpu_device_check_smu_prv_buffer_size(adev);
1593
1594         amdgpu_device_check_vm_size(adev);
1595
1596         amdgpu_device_check_block_size(adev);
1597
1598         adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
1599
1600         return 0;
1601 }
1602
1603 /**
1604  * amdgpu_switcheroo_set_state - set switcheroo state
1605  *
1606  * @pdev: pci dev pointer
1607  * @state: vga_switcheroo state
1608  *
1609  * Callback for the switcheroo driver.  Suspends or resumes
1610  * the asics before or after it is powered up using ACPI methods.
1611  */
1612 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
1613                                         enum vga_switcheroo_state state)
1614 {
1615         struct drm_device *dev = pci_get_drvdata(pdev);
1616         int r;
1617
1618         if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
1619                 return;
1620
1621         if (state == VGA_SWITCHEROO_ON) {
1622                 pr_info("switched on\n");
1623                 /* don't suspend or resume card normally */
1624                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1625
1626                 pci_set_power_state(pdev, PCI_D0);
1627                 amdgpu_device_load_pci_state(pdev);
1628                 r = pci_enable_device(pdev);
1629                 if (r)
1630                         DRM_WARN("pci_enable_device failed (%d)\n", r);
1631                 amdgpu_device_resume(dev, true);
1632
1633                 dev->switch_power_state = DRM_SWITCH_POWER_ON;
1634         } else {
1635                 pr_info("switched off\n");
1636                 dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
1637                 amdgpu_device_suspend(dev, true);
1638                 amdgpu_device_cache_pci_state(pdev);
1639                 /* Shut down the device */
1640                 pci_disable_device(pdev);
1641                 pci_set_power_state(pdev, PCI_D3cold);
1642                 dev->switch_power_state = DRM_SWITCH_POWER_OFF;
1643         }
1644 }
1645
1646 /**
1647  * amdgpu_switcheroo_can_switch - see if switcheroo state can change
1648  *
1649  * @pdev: pci dev pointer
1650  *
1651  * Callback for the switcheroo driver.  Check of the switcheroo
1652  * state can be changed.
1653  * Returns true if the state can be changed, false if not.
1654  */
1655 static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
1656 {
1657         struct drm_device *dev = pci_get_drvdata(pdev);
1658
1659         /*
1660         * FIXME: open_count is protected by drm_global_mutex but that would lead to
1661         * locking inversion with the driver load path. And the access here is
1662         * completely racy anyway. So don't bother with locking for now.
1663         */
1664         return atomic_read(&dev->open_count) == 0;
1665 }
1666
1667 static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
1668         .set_gpu_state = amdgpu_switcheroo_set_state,
1669         .reprobe = NULL,
1670         .can_switch = amdgpu_switcheroo_can_switch,
1671 };
1672
1673 /**
1674  * amdgpu_device_ip_set_clockgating_state - set the CG state
1675  *
1676  * @dev: amdgpu_device pointer
1677  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1678  * @state: clockgating state (gate or ungate)
1679  *
1680  * Sets the requested clockgating state for all instances of
1681  * the hardware IP specified.
1682  * Returns the error code from the last instance.
1683  */
1684 int amdgpu_device_ip_set_clockgating_state(void *dev,
1685                                            enum amd_ip_block_type block_type,
1686                                            enum amd_clockgating_state state)
1687 {
1688         struct amdgpu_device *adev = dev;
1689         int i, r = 0;
1690
1691         for (i = 0; i < adev->num_ip_blocks; i++) {
1692                 if (!adev->ip_blocks[i].status.valid)
1693                         continue;
1694                 if (adev->ip_blocks[i].version->type != block_type)
1695                         continue;
1696                 if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
1697                         continue;
1698                 r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
1699                         (void *)adev, state);
1700                 if (r)
1701                         DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
1702                                   adev->ip_blocks[i].version->funcs->name, r);
1703         }
1704         return r;
1705 }
1706
1707 /**
1708  * amdgpu_device_ip_set_powergating_state - set the PG state
1709  *
1710  * @dev: amdgpu_device pointer
1711  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1712  * @state: powergating state (gate or ungate)
1713  *
1714  * Sets the requested powergating state for all instances of
1715  * the hardware IP specified.
1716  * Returns the error code from the last instance.
1717  */
1718 int amdgpu_device_ip_set_powergating_state(void *dev,
1719                                            enum amd_ip_block_type block_type,
1720                                            enum amd_powergating_state state)
1721 {
1722         struct amdgpu_device *adev = dev;
1723         int i, r = 0;
1724
1725         for (i = 0; i < adev->num_ip_blocks; i++) {
1726                 if (!adev->ip_blocks[i].status.valid)
1727                         continue;
1728                 if (adev->ip_blocks[i].version->type != block_type)
1729                         continue;
1730                 if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
1731                         continue;
1732                 r = adev->ip_blocks[i].version->funcs->set_powergating_state(
1733                         (void *)adev, state);
1734                 if (r)
1735                         DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
1736                                   adev->ip_blocks[i].version->funcs->name, r);
1737         }
1738         return r;
1739 }
1740
1741 /**
1742  * amdgpu_device_ip_get_clockgating_state - get the CG state
1743  *
1744  * @adev: amdgpu_device pointer
1745  * @flags: clockgating feature flags
1746  *
1747  * Walks the list of IPs on the device and updates the clockgating
1748  * flags for each IP.
1749  * Updates @flags with the feature flags for each hardware IP where
1750  * clockgating is enabled.
1751  */
1752 void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
1753                                             u64 *flags)
1754 {
1755         int i;
1756
1757         for (i = 0; i < adev->num_ip_blocks; i++) {
1758                 if (!adev->ip_blocks[i].status.valid)
1759                         continue;
1760                 if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
1761                         adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags);
1762         }
1763 }
1764
1765 /**
1766  * amdgpu_device_ip_wait_for_idle - wait for idle
1767  *
1768  * @adev: amdgpu_device pointer
1769  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1770  *
1771  * Waits for the request hardware IP to be idle.
1772  * Returns 0 for success or a negative error code on failure.
1773  */
1774 int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
1775                                    enum amd_ip_block_type block_type)
1776 {
1777         int i, r;
1778
1779         for (i = 0; i < adev->num_ip_blocks; i++) {
1780                 if (!adev->ip_blocks[i].status.valid)
1781                         continue;
1782                 if (adev->ip_blocks[i].version->type == block_type) {
1783                         r = adev->ip_blocks[i].version->funcs->wait_for_idle((void *)adev);
1784                         if (r)
1785                                 return r;
1786                         break;
1787                 }
1788         }
1789         return 0;
1790
1791 }
1792
1793 /**
1794  * amdgpu_device_ip_is_idle - is the hardware IP idle
1795  *
1796  * @adev: amdgpu_device pointer
1797  * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
1798  *
1799  * Check if the hardware IP is idle or not.
1800  * Returns true if it the IP is idle, false if not.
1801  */
1802 bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
1803                               enum amd_ip_block_type block_type)
1804 {
1805         int i;
1806
1807         for (i = 0; i < adev->num_ip_blocks; i++) {
1808                 if (!adev->ip_blocks[i].status.valid)
1809                         continue;
1810                 if (adev->ip_blocks[i].version->type == block_type)
1811                         return adev->ip_blocks[i].version->funcs->is_idle((void *)adev);
1812         }
1813         return true;
1814
1815 }
1816
1817 /**
1818  * amdgpu_device_ip_get_ip_block - get a hw IP pointer
1819  *
1820  * @adev: amdgpu_device pointer
1821  * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
1822  *
1823  * Returns a pointer to the hardware IP block structure
1824  * if it exists for the asic, otherwise NULL.
1825  */
1826 struct amdgpu_ip_block *
1827 amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
1828                               enum amd_ip_block_type type)
1829 {
1830         int i;
1831
1832         for (i = 0; i < adev->num_ip_blocks; i++)
1833                 if (adev->ip_blocks[i].version->type == type)
1834                         return &adev->ip_blocks[i];
1835
1836         return NULL;
1837 }
1838
1839 /**
1840  * amdgpu_device_ip_block_version_cmp
1841  *
1842  * @adev: amdgpu_device pointer
1843  * @type: enum amd_ip_block_type
1844  * @major: major version
1845  * @minor: minor version
1846  *
1847  * return 0 if equal or greater
1848  * return 1 if smaller or the ip_block doesn't exist
1849  */
1850 int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
1851                                        enum amd_ip_block_type type,
1852                                        u32 major, u32 minor)
1853 {
1854         struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
1855
1856         if (ip_block && ((ip_block->version->major > major) ||
1857                         ((ip_block->version->major == major) &&
1858                         (ip_block->version->minor >= minor))))
1859                 return 0;
1860
1861         return 1;
1862 }
1863
1864 /**
1865  * amdgpu_device_ip_block_add
1866  *
1867  * @adev: amdgpu_device pointer
1868  * @ip_block_version: pointer to the IP to add
1869  *
1870  * Adds the IP block driver information to the collection of IPs
1871  * on the asic.
1872  */
1873 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
1874                                const struct amdgpu_ip_block_version *ip_block_version)
1875 {
1876         if (!ip_block_version)
1877                 return -EINVAL;
1878
1879         switch (ip_block_version->type) {
1880         case AMD_IP_BLOCK_TYPE_VCN:
1881                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
1882                         return 0;
1883                 break;
1884         case AMD_IP_BLOCK_TYPE_JPEG:
1885                 if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
1886                         return 0;
1887                 break;
1888         default:
1889                 break;
1890         }
1891
1892         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
1893                   ip_block_version->funcs->name);
1894
1895         adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
1896
1897         return 0;
1898 }
1899
1900 /**
1901  * amdgpu_device_enable_virtual_display - enable virtual display feature
1902  *
1903  * @adev: amdgpu_device pointer
1904  *
1905  * Enabled the virtual display feature if the user has enabled it via
1906  * the module parameter virtual_display.  This feature provides a virtual
1907  * display hardware on headless boards or in virtualized environments.
1908  * This function parses and validates the configuration string specified by
1909  * the user and configues the virtual display configuration (number of
1910  * virtual connectors, crtcs, etc.) specified.
1911  */
1912 static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
1913 {
1914         adev->enable_virtual_display = false;
1915
1916         if (amdgpu_virtual_display) {
1917                 const char *pci_address_name = pci_name(adev->pdev);
1918                 char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname;
1919
1920                 pciaddstr = kstrdup(amdgpu_virtual_display, GFP_KERNEL);
1921                 pciaddstr_tmp = pciaddstr;
1922                 while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
1923                         pciaddname = strsep(&pciaddname_tmp, ",");
1924                         if (!strcmp("all", pciaddname)
1925                             || !strcmp(pci_address_name, pciaddname)) {
1926                                 long num_crtc;
1927                                 int res = -1;
1928
1929                                 adev->enable_virtual_display = true;
1930
1931                                 if (pciaddname_tmp)
1932                                         res = kstrtol(pciaddname_tmp, 10,
1933                                                       &num_crtc);
1934
1935                                 if (!res) {
1936                                         if (num_crtc < 1)
1937                                                 num_crtc = 1;
1938                                         if (num_crtc > 6)
1939                                                 num_crtc = 6;
1940                                         adev->mode_info.num_crtc = num_crtc;
1941                                 } else {
1942                                         adev->mode_info.num_crtc = 1;
1943                                 }
1944                                 break;
1945                         }
1946                 }
1947
1948                 DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
1949                          amdgpu_virtual_display, pci_address_name,
1950                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1951
1952                 kfree(pciaddstr);
1953         }
1954 }
1955
1956 void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
1957 {
1958         if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
1959                 adev->mode_info.num_crtc = 1;
1960                 adev->enable_virtual_display = true;
1961                 DRM_INFO("virtual_display:%d, num_crtc:%d\n",
1962                          adev->enable_virtual_display, adev->mode_info.num_crtc);
1963         }
1964 }
1965
1966 /**
1967  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
1968  *
1969  * @adev: amdgpu_device pointer
1970  *
1971  * Parses the asic configuration parameters specified in the gpu info
1972  * firmware and makes them availale to the driver for use in configuring
1973  * the asic.
1974  * Returns 0 on success, -EINVAL on failure.
1975  */
1976 static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
1977 {
1978         const char *chip_name;
1979         char fw_name[40];
1980         int err;
1981         const struct gpu_info_firmware_header_v1_0 *hdr;
1982
1983         adev->firmware.gpu_info_fw = NULL;
1984
1985         if (adev->mman.discovery_bin) {
1986                 /*
1987                  * FIXME: The bounding box is still needed by Navi12, so
1988                  * temporarily read it from gpu_info firmware. Should be dropped
1989                  * when DAL no longer needs it.
1990                  */
1991                 if (adev->asic_type != CHIP_NAVI12)
1992                         return 0;
1993         }
1994
1995         switch (adev->asic_type) {
1996         default:
1997                 return 0;
1998         case CHIP_VEGA10:
1999                 chip_name = "vega10";
2000                 break;
2001         case CHIP_VEGA12:
2002                 chip_name = "vega12";
2003                 break;
2004         case CHIP_RAVEN:
2005                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2006                         chip_name = "raven2";
2007                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2008                         chip_name = "picasso";
2009                 else
2010                         chip_name = "raven";
2011                 break;
2012         case CHIP_ARCTURUS:
2013                 chip_name = "arcturus";
2014                 break;
2015         case CHIP_NAVI12:
2016                 chip_name = "navi12";
2017                 break;
2018         }
2019
2020         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
2021         err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, fw_name);
2022         if (err) {
2023                 dev_err(adev->dev,
2024                         "Failed to get gpu_info firmware \"%s\"\n",
2025                         fw_name);
2026                 goto out;
2027         }
2028
2029         hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2030         amdgpu_ucode_print_gpu_info_hdr(&hdr->header);
2031
2032         switch (hdr->version_major) {
2033         case 1:
2034         {
2035                 const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2036                         (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2037                                                                 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2038
2039                 /*
2040                  * Should be droped when DAL no longer needs it.
2041                  */
2042                 if (adev->asic_type == CHIP_NAVI12)
2043                         goto parse_soc_bounding_box;
2044
2045                 adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2046                 adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2047                 adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2048                 adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2049                 adev->gfx.config.max_texture_channel_caches =
2050                         le32_to_cpu(gpu_info_fw->gc_num_tccs);
2051                 adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2052                 adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2053                 adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2054                 adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2055                 adev->gfx.config.double_offchip_lds_buf =
2056                         le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2057                 adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2058                 adev->gfx.cu_info.max_waves_per_simd =
2059                         le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2060                 adev->gfx.cu_info.max_scratch_slots_per_cu =
2061                         le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2062                 adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2063                 if (hdr->version_minor >= 1) {
2064                         const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2065                                 (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2066                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2067                         adev->gfx.config.num_sc_per_sh =
2068                                 le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2069                         adev->gfx.config.num_packer_per_sc =
2070                                 le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2071                 }
2072
2073 parse_soc_bounding_box:
2074                 /*
2075                  * soc bounding box info is not integrated in disocovery table,
2076                  * we always need to parse it from gpu info firmware if needed.
2077                  */
2078                 if (hdr->version_minor == 2) {
2079                         const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2080                                 (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2081                                                                         le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2082                         adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2083                 }
2084                 break;
2085         }
2086         default:
2087                 dev_err(adev->dev,
2088                         "Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2089                 err = -EINVAL;
2090                 goto out;
2091         }
2092 out:
2093         return err;
2094 }
2095
2096 /**
2097  * amdgpu_device_ip_early_init - run early init for hardware IPs
2098  *
2099  * @adev: amdgpu_device pointer
2100  *
2101  * Early initialization pass for hardware IPs.  The hardware IPs that make
2102  * up each asic are discovered each IP's early_init callback is run.  This
2103  * is the first stage in initializing the asic.
2104  * Returns 0 on success, negative error code on failure.
2105  */
2106 static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2107 {
2108         struct drm_device *dev = adev_to_drm(adev);
2109         struct pci_dev *parent;
2110         int i, r;
2111         bool total;
2112
2113         amdgpu_device_enable_virtual_display(adev);
2114
2115         if (amdgpu_sriov_vf(adev)) {
2116                 r = amdgpu_virt_request_full_gpu(adev, true);
2117                 if (r)
2118                         return r;
2119         }
2120
2121         switch (adev->asic_type) {
2122 #ifdef CONFIG_DRM_AMDGPU_SI
2123         case CHIP_VERDE:
2124         case CHIP_TAHITI:
2125         case CHIP_PITCAIRN:
2126         case CHIP_OLAND:
2127         case CHIP_HAINAN:
2128                 adev->family = AMDGPU_FAMILY_SI;
2129                 r = si_set_ip_blocks(adev);
2130                 if (r)
2131                         return r;
2132                 break;
2133 #endif
2134 #ifdef CONFIG_DRM_AMDGPU_CIK
2135         case CHIP_BONAIRE:
2136         case CHIP_HAWAII:
2137         case CHIP_KAVERI:
2138         case CHIP_KABINI:
2139         case CHIP_MULLINS:
2140                 if (adev->flags & AMD_IS_APU)
2141                         adev->family = AMDGPU_FAMILY_KV;
2142                 else
2143                         adev->family = AMDGPU_FAMILY_CI;
2144
2145                 r = cik_set_ip_blocks(adev);
2146                 if (r)
2147                         return r;
2148                 break;
2149 #endif
2150         case CHIP_TOPAZ:
2151         case CHIP_TONGA:
2152         case CHIP_FIJI:
2153         case CHIP_POLARIS10:
2154         case CHIP_POLARIS11:
2155         case CHIP_POLARIS12:
2156         case CHIP_VEGAM:
2157         case CHIP_CARRIZO:
2158         case CHIP_STONEY:
2159                 if (adev->flags & AMD_IS_APU)
2160                         adev->family = AMDGPU_FAMILY_CZ;
2161                 else
2162                         adev->family = AMDGPU_FAMILY_VI;
2163
2164                 r = vi_set_ip_blocks(adev);
2165                 if (r)
2166                         return r;
2167                 break;
2168         default:
2169                 r = amdgpu_discovery_set_ip_blocks(adev);
2170                 if (r)
2171                         return r;
2172                 break;
2173         }
2174
2175         if (amdgpu_has_atpx() &&
2176             (amdgpu_is_atpx_hybrid() ||
2177              amdgpu_has_atpx_dgpu_power_cntl()) &&
2178             ((adev->flags & AMD_IS_APU) == 0) &&
2179             !pci_is_thunderbolt_attached(to_pci_dev(dev->dev)))
2180                 adev->flags |= AMD_IS_PX;
2181
2182         if (!(adev->flags & AMD_IS_APU)) {
2183                 parent = pci_upstream_bridge(adev->pdev);
2184                 adev->has_pr3 = parent ? pci_pr3_present(parent) : false;
2185         }
2186
2187
2188         adev->pm.pp_feature = amdgpu_pp_feature_mask;
2189         if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS)
2190                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2191         if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2192                 adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2193
2194         total = true;
2195         for (i = 0; i < adev->num_ip_blocks; i++) {
2196                 if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
2197                         DRM_ERROR("disabled ip block: %d <%s>\n",
2198                                   i, adev->ip_blocks[i].version->funcs->name);
2199                         adev->ip_blocks[i].status.valid = false;
2200                 } else {
2201                         if (adev->ip_blocks[i].version->funcs->early_init) {
2202                                 r = adev->ip_blocks[i].version->funcs->early_init((void *)adev);
2203                                 if (r == -ENOENT) {
2204                                         adev->ip_blocks[i].status.valid = false;
2205                                 } else if (r) {
2206                                         DRM_ERROR("early_init of IP block <%s> failed %d\n",
2207                                                   adev->ip_blocks[i].version->funcs->name, r);
2208                                         total = false;
2209                                 } else {
2210                                         adev->ip_blocks[i].status.valid = true;
2211                                 }
2212                         } else {
2213                                 adev->ip_blocks[i].status.valid = true;
2214                         }
2215                 }
2216                 /* get the vbios after the asic_funcs are set up */
2217                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2218                         r = amdgpu_device_parse_gpu_info_fw(adev);
2219                         if (r)
2220                                 return r;
2221
2222                         /* Read BIOS */
2223                         if (!amdgpu_get_bios(adev))
2224                                 return -EINVAL;
2225
2226                         r = amdgpu_atombios_init(adev);
2227                         if (r) {
2228                                 dev_err(adev->dev, "amdgpu_atombios_init failed\n");
2229                                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
2230                                 return r;
2231                         }
2232
2233                         /*get pf2vf msg info at it's earliest time*/
2234                         if (amdgpu_sriov_vf(adev))
2235                                 amdgpu_virt_init_data_exchange(adev);
2236
2237                 }
2238         }
2239         if (!total)
2240                 return -ENODEV;
2241
2242         amdgpu_amdkfd_device_probe(adev);
2243         adev->cg_flags &= amdgpu_cg_mask;
2244         adev->pg_flags &= amdgpu_pg_mask;
2245
2246         return 0;
2247 }
2248
2249 static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2250 {
2251         int i, r;
2252
2253         for (i = 0; i < adev->num_ip_blocks; i++) {
2254                 if (!adev->ip_blocks[i].status.sw)
2255                         continue;
2256                 if (adev->ip_blocks[i].status.hw)
2257                         continue;
2258                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
2259                     (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
2260                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2261                         r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2262                         if (r) {
2263                                 DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2264                                           adev->ip_blocks[i].version->funcs->name, r);
2265                                 return r;
2266                         }
2267                         adev->ip_blocks[i].status.hw = true;
2268                 }
2269         }
2270
2271         return 0;
2272 }
2273
2274 static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2275 {
2276         int i, r;
2277
2278         for (i = 0; i < adev->num_ip_blocks; i++) {
2279                 if (!adev->ip_blocks[i].status.sw)
2280                         continue;
2281                 if (adev->ip_blocks[i].status.hw)
2282                         continue;
2283                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2284                 if (r) {
2285                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2286                                   adev->ip_blocks[i].version->funcs->name, r);
2287                         return r;
2288                 }
2289                 adev->ip_blocks[i].status.hw = true;
2290         }
2291
2292         return 0;
2293 }
2294
2295 static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2296 {
2297         int r = 0;
2298         int i;
2299         uint32_t smu_version;
2300
2301         if (adev->asic_type >= CHIP_VEGA10) {
2302                 for (i = 0; i < adev->num_ip_blocks; i++) {
2303                         if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2304                                 continue;
2305
2306                         if (!adev->ip_blocks[i].status.sw)
2307                                 continue;
2308
2309                         /* no need to do the fw loading again if already done*/
2310                         if (adev->ip_blocks[i].status.hw == true)
2311                                 break;
2312
2313                         if (amdgpu_in_reset(adev) || adev->in_suspend) {
2314                                 r = adev->ip_blocks[i].version->funcs->resume(adev);
2315                                 if (r) {
2316                                         DRM_ERROR("resume of IP block <%s> failed %d\n",
2317                                                           adev->ip_blocks[i].version->funcs->name, r);
2318                                         return r;
2319                                 }
2320                         } else {
2321                                 r = adev->ip_blocks[i].version->funcs->hw_init(adev);
2322                                 if (r) {
2323                                         DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2324                                                           adev->ip_blocks[i].version->funcs->name, r);
2325                                         return r;
2326                                 }
2327                         }
2328
2329                         adev->ip_blocks[i].status.hw = true;
2330                         break;
2331                 }
2332         }
2333
2334         if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA)
2335                 r = amdgpu_pm_load_smu_firmware(adev, &smu_version);
2336
2337         return r;
2338 }
2339
2340 static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2341 {
2342         long timeout;
2343         int r, i;
2344
2345         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
2346                 struct amdgpu_ring *ring = adev->rings[i];
2347
2348                 /* No need to setup the GPU scheduler for rings that don't need it */
2349                 if (!ring || ring->no_scheduler)
2350                         continue;
2351
2352                 switch (ring->funcs->type) {
2353                 case AMDGPU_RING_TYPE_GFX:
2354                         timeout = adev->gfx_timeout;
2355                         break;
2356                 case AMDGPU_RING_TYPE_COMPUTE:
2357                         timeout = adev->compute_timeout;
2358                         break;
2359                 case AMDGPU_RING_TYPE_SDMA:
2360                         timeout = adev->sdma_timeout;
2361                         break;
2362                 default:
2363                         timeout = adev->video_timeout;
2364                         break;
2365                 }
2366
2367                 r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
2368                                    ring->num_hw_submission, 0,
2369                                    timeout, adev->reset_domain->wq,
2370                                    ring->sched_score, ring->name,
2371                                    adev->dev);
2372                 if (r) {
2373                         DRM_ERROR("Failed to create scheduler on ring %s.\n",
2374                                   ring->name);
2375                         return r;
2376                 }
2377         }
2378
2379         return 0;
2380 }
2381
2382
2383 /**
2384  * amdgpu_device_ip_init - run init for hardware IPs
2385  *
2386  * @adev: amdgpu_device pointer
2387  *
2388  * Main initialization pass for hardware IPs.  The list of all the hardware
2389  * IPs that make up the asic is walked and the sw_init and hw_init callbacks
2390  * are run.  sw_init initializes the software state associated with each IP
2391  * and hw_init initializes the hardware associated with each IP.
2392  * Returns 0 on success, negative error code on failure.
2393  */
2394 static int amdgpu_device_ip_init(struct amdgpu_device *adev)
2395 {
2396         int i, r;
2397
2398         r = amdgpu_ras_init(adev);
2399         if (r)
2400                 return r;
2401
2402         for (i = 0; i < adev->num_ip_blocks; i++) {
2403                 if (!adev->ip_blocks[i].status.valid)
2404                         continue;
2405                 r = adev->ip_blocks[i].version->funcs->sw_init((void *)adev);
2406                 if (r) {
2407                         DRM_ERROR("sw_init of IP block <%s> failed %d\n",
2408                                   adev->ip_blocks[i].version->funcs->name, r);
2409                         goto init_failed;
2410                 }
2411                 adev->ip_blocks[i].status.sw = true;
2412
2413                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2414                         /* need to do common hw init early so everything is set up for gmc */
2415                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2416                         if (r) {
2417                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2418                                 goto init_failed;
2419                         }
2420                         adev->ip_blocks[i].status.hw = true;
2421                 } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2422                         /* need to do gmc hw init early so we can allocate gpu mem */
2423                         /* Try to reserve bad pages early */
2424                         if (amdgpu_sriov_vf(adev))
2425                                 amdgpu_virt_exchange_data(adev);
2426
2427                         r = amdgpu_device_mem_scratch_init(adev);
2428                         if (r) {
2429                                 DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
2430                                 goto init_failed;
2431                         }
2432                         r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
2433                         if (r) {
2434                                 DRM_ERROR("hw_init %d failed %d\n", i, r);
2435                                 goto init_failed;
2436                         }
2437                         r = amdgpu_device_wb_init(adev);
2438                         if (r) {
2439                                 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
2440                                 goto init_failed;
2441                         }
2442                         adev->ip_blocks[i].status.hw = true;
2443
2444                         /* right after GMC hw init, we create CSA */
2445                         if (amdgpu_mcbp) {
2446                                 r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
2447                                                                AMDGPU_GEM_DOMAIN_VRAM |
2448                                                                AMDGPU_GEM_DOMAIN_GTT,
2449                                                                AMDGPU_CSA_SIZE);
2450                                 if (r) {
2451                                         DRM_ERROR("allocate CSA failed %d\n", r);
2452                                         goto init_failed;
2453                                 }
2454                         }
2455                 }
2456         }
2457
2458         if (amdgpu_sriov_vf(adev))
2459                 amdgpu_virt_init_data_exchange(adev);
2460
2461         r = amdgpu_ib_pool_init(adev);
2462         if (r) {
2463                 dev_err(adev->dev, "IB initialization failed (%d).\n", r);
2464                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_IB_INIT_FAIL, 0, r);
2465                 goto init_failed;
2466         }
2467
2468         r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
2469         if (r)
2470                 goto init_failed;
2471
2472         r = amdgpu_device_ip_hw_init_phase1(adev);
2473         if (r)
2474                 goto init_failed;
2475
2476         r = amdgpu_device_fw_loading(adev);
2477         if (r)
2478                 goto init_failed;
2479
2480         r = amdgpu_device_ip_hw_init_phase2(adev);
2481         if (r)
2482                 goto init_failed;
2483
2484         /*
2485          * retired pages will be loaded from eeprom and reserved here,
2486          * it should be called after amdgpu_device_ip_hw_init_phase2  since
2487          * for some ASICs the RAS EEPROM code relies on SMU fully functioning
2488          * for I2C communication which only true at this point.
2489          *
2490          * amdgpu_ras_recovery_init may fail, but the upper only cares the
2491          * failure from bad gpu situation and stop amdgpu init process
2492          * accordingly. For other failed cases, it will still release all
2493          * the resource and print error message, rather than returning one
2494          * negative value to upper level.
2495          *
2496          * Note: theoretically, this should be called before all vram allocations
2497          * to protect retired page from abusing
2498          */
2499         r = amdgpu_ras_recovery_init(adev);
2500         if (r)
2501                 goto init_failed;
2502
2503         /**
2504          * In case of XGMI grab extra reference for reset domain for this device
2505          */
2506         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2507                 if (amdgpu_xgmi_add_device(adev) == 0) {
2508                         if (!amdgpu_sriov_vf(adev)) {
2509                                 struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
2510
2511                                 if (WARN_ON(!hive)) {
2512                                         r = -ENOENT;
2513                                         goto init_failed;
2514                                 }
2515
2516                                 if (!hive->reset_domain ||
2517                                     !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
2518                                         r = -ENOENT;
2519                                         amdgpu_put_xgmi_hive(hive);
2520                                         goto init_failed;
2521                                 }
2522
2523                                 /* Drop the early temporary reset domain we created for device */
2524                                 amdgpu_reset_put_reset_domain(adev->reset_domain);
2525                                 adev->reset_domain = hive->reset_domain;
2526                                 amdgpu_put_xgmi_hive(hive);
2527                         }
2528                 }
2529         }
2530
2531         r = amdgpu_device_init_schedulers(adev);
2532         if (r)
2533                 goto init_failed;
2534
2535         /* Don't init kfd if whole hive need to be reset during init */
2536         if (!adev->gmc.xgmi.pending_reset)
2537                 amdgpu_amdkfd_device_init(adev);
2538
2539         amdgpu_fru_get_product_info(adev);
2540
2541 init_failed:
2542
2543         return r;
2544 }
2545
2546 /**
2547  * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
2548  *
2549  * @adev: amdgpu_device pointer
2550  *
2551  * Writes a reset magic value to the gart pointer in VRAM.  The driver calls
2552  * this function before a GPU reset.  If the value is retained after a
2553  * GPU reset, VRAM has not been lost.  Some GPU resets may destry VRAM contents.
2554  */
2555 static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
2556 {
2557         memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
2558 }
2559
2560 /**
2561  * amdgpu_device_check_vram_lost - check if vram is valid
2562  *
2563  * @adev: amdgpu_device pointer
2564  *
2565  * Checks the reset magic value written to the gart pointer in VRAM.
2566  * The driver calls this after a GPU reset to see if the contents of
2567  * VRAM is lost or now.
2568  * returns true if vram is lost, false if not.
2569  */
2570 static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
2571 {
2572         if (memcmp(adev->gart.ptr, adev->reset_magic,
2573                         AMDGPU_RESET_MAGIC_NUM))
2574                 return true;
2575
2576         if (!amdgpu_in_reset(adev))
2577                 return false;
2578
2579         /*
2580          * For all ASICs with baco/mode1 reset, the VRAM is
2581          * always assumed to be lost.
2582          */
2583         switch (amdgpu_asic_reset_method(adev)) {
2584         case AMD_RESET_METHOD_BACO:
2585         case AMD_RESET_METHOD_MODE1:
2586                 return true;
2587         default:
2588                 return false;
2589         }
2590 }
2591
2592 /**
2593  * amdgpu_device_set_cg_state - set clockgating for amdgpu device
2594  *
2595  * @adev: amdgpu_device pointer
2596  * @state: clockgating state (gate or ungate)
2597  *
2598  * The list of all the hardware IPs that make up the asic is walked and the
2599  * set_clockgating_state callbacks are run.
2600  * Late initialization pass enabling clockgating for hardware IPs.
2601  * Fini or suspend, pass disabling clockgating for hardware IPs.
2602  * Returns 0 on success, negative error code on failure.
2603  */
2604
2605 int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
2606                                enum amd_clockgating_state state)
2607 {
2608         int i, j, r;
2609
2610         if (amdgpu_emu_mode == 1)
2611                 return 0;
2612
2613         for (j = 0; j < adev->num_ip_blocks; j++) {
2614                 i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2615                 if (!adev->ip_blocks[i].status.late_initialized)
2616                         continue;
2617                 /* skip CG for GFX, SDMA on S0ix */
2618                 if (adev->in_s0ix &&
2619                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2620                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2621                         continue;
2622                 /* skip CG for VCE/UVD, it's handled specially */
2623                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2624                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2625                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2626                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2627                     adev->ip_blocks[i].version->funcs->set_clockgating_state) {
2628                         /* enable clockgating to save power */
2629                         r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
2630                                                                                      state);
2631                         if (r) {
2632                                 DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
2633                                           adev->ip_blocks[i].version->funcs->name, r);
2634                                 return r;
2635                         }
2636                 }
2637         }
2638
2639         return 0;
2640 }
2641
2642 int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
2643                                enum amd_powergating_state state)
2644 {
2645         int i, j, r;
2646
2647         if (amdgpu_emu_mode == 1)
2648                 return 0;
2649
2650         for (j = 0; j < adev->num_ip_blocks; j++) {
2651                 i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
2652                 if (!adev->ip_blocks[i].status.late_initialized)
2653                         continue;
2654                 /* skip PG for GFX, SDMA on S0ix */
2655                 if (adev->in_s0ix &&
2656                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
2657                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
2658                         continue;
2659                 /* skip CG for VCE/UVD, it's handled specially */
2660                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
2661                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
2662                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
2663                     adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
2664                     adev->ip_blocks[i].version->funcs->set_powergating_state) {
2665                         /* enable powergating to save power */
2666                         r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
2667                                                                                         state);
2668                         if (r) {
2669                                 DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
2670                                           adev->ip_blocks[i].version->funcs->name, r);
2671                                 return r;
2672                         }
2673                 }
2674         }
2675         return 0;
2676 }
2677
2678 static int amdgpu_device_enable_mgpu_fan_boost(void)
2679 {
2680         struct amdgpu_gpu_instance *gpu_ins;
2681         struct amdgpu_device *adev;
2682         int i, ret = 0;
2683
2684         mutex_lock(&mgpu_info.mutex);
2685
2686         /*
2687          * MGPU fan boost feature should be enabled
2688          * only when there are two or more dGPUs in
2689          * the system
2690          */
2691         if (mgpu_info.num_dgpu < 2)
2692                 goto out;
2693
2694         for (i = 0; i < mgpu_info.num_dgpu; i++) {
2695                 gpu_ins = &(mgpu_info.gpu_ins[i]);
2696                 adev = gpu_ins->adev;
2697                 if (!(adev->flags & AMD_IS_APU) &&
2698                     !gpu_ins->mgpu_fan_enabled) {
2699                         ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
2700                         if (ret)
2701                                 break;
2702
2703                         gpu_ins->mgpu_fan_enabled = 1;
2704                 }
2705         }
2706
2707 out:
2708         mutex_unlock(&mgpu_info.mutex);
2709
2710         return ret;
2711 }
2712
2713 /**
2714  * amdgpu_device_ip_late_init - run late init for hardware IPs
2715  *
2716  * @adev: amdgpu_device pointer
2717  *
2718  * Late initialization pass for hardware IPs.  The list of all the hardware
2719  * IPs that make up the asic is walked and the late_init callbacks are run.
2720  * late_init covers any special initialization that an IP requires
2721  * after all of the have been initialized or something that needs to happen
2722  * late in the init process.
2723  * Returns 0 on success, negative error code on failure.
2724  */
2725 static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
2726 {
2727         struct amdgpu_gpu_instance *gpu_instance;
2728         int i = 0, r;
2729
2730         for (i = 0; i < adev->num_ip_blocks; i++) {
2731                 if (!adev->ip_blocks[i].status.hw)
2732                         continue;
2733                 if (adev->ip_blocks[i].version->funcs->late_init) {
2734                         r = adev->ip_blocks[i].version->funcs->late_init((void *)adev);
2735                         if (r) {
2736                                 DRM_ERROR("late_init of IP block <%s> failed %d\n",
2737                                           adev->ip_blocks[i].version->funcs->name, r);
2738                                 return r;
2739                         }
2740                 }
2741                 adev->ip_blocks[i].status.late_initialized = true;
2742         }
2743
2744         r = amdgpu_ras_late_init(adev);
2745         if (r) {
2746                 DRM_ERROR("amdgpu_ras_late_init failed %d", r);
2747                 return r;
2748         }
2749
2750         amdgpu_ras_set_error_query_ready(adev, true);
2751
2752         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
2753         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);
2754
2755         amdgpu_device_fill_reset_magic(adev);
2756
2757         r = amdgpu_device_enable_mgpu_fan_boost();
2758         if (r)
2759                 DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
2760
2761         /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */
2762         if (amdgpu_passthrough(adev) &&
2763             ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) ||
2764              adev->asic_type == CHIP_ALDEBARAN))
2765                 amdgpu_dpm_handle_passthrough_sbr(adev, true);
2766
2767         if (adev->gmc.xgmi.num_physical_nodes > 1) {
2768                 mutex_lock(&mgpu_info.mutex);
2769
2770                 /*
2771                  * Reset device p-state to low as this was booted with high.
2772                  *
2773                  * This should be performed only after all devices from the same
2774                  * hive get initialized.
2775                  *
2776                  * However, it's unknown how many device in the hive in advance.
2777                  * As this is counted one by one during devices initializations.
2778                  *
2779                  * So, we wait for all XGMI interlinked devices initialized.
2780                  * This may bring some delays as those devices may come from
2781                  * different hives. But that should be OK.
2782                  */
2783                 if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
2784                         for (i = 0; i < mgpu_info.num_gpu; i++) {
2785                                 gpu_instance = &(mgpu_info.gpu_ins[i]);
2786                                 if (gpu_instance->adev->flags & AMD_IS_APU)
2787                                         continue;
2788
2789                                 r = amdgpu_xgmi_set_pstate(gpu_instance->adev,
2790                                                 AMDGPU_XGMI_PSTATE_MIN);
2791                                 if (r) {
2792                                         DRM_ERROR("pstate setting failed (%d).\n", r);
2793                                         break;
2794                                 }
2795                         }
2796                 }
2797
2798                 mutex_unlock(&mgpu_info.mutex);
2799         }
2800
2801         return 0;
2802 }
2803
2804 /**
2805  * amdgpu_device_smu_fini_early - smu hw_fini wrapper
2806  *
2807  * @adev: amdgpu_device pointer
2808  *
2809  * For ASICs need to disable SMC first
2810  */
2811 static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
2812 {
2813         int i, r;
2814
2815         if (adev->ip_versions[GC_HWIP][0] > IP_VERSION(9, 0, 0))
2816                 return;
2817
2818         for (i = 0; i < adev->num_ip_blocks; i++) {
2819                 if (!adev->ip_blocks[i].status.hw)
2820                         continue;
2821                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
2822                         r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2823                         /* XXX handle errors */
2824                         if (r) {
2825                                 DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2826                                           adev->ip_blocks[i].version->funcs->name, r);
2827                         }
2828                         adev->ip_blocks[i].status.hw = false;
2829                         break;
2830                 }
2831         }
2832 }
2833
2834 static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
2835 {
2836         int i, r;
2837
2838         for (i = 0; i < adev->num_ip_blocks; i++) {
2839                 if (!adev->ip_blocks[i].version->funcs->early_fini)
2840                         continue;
2841
2842                 r = adev->ip_blocks[i].version->funcs->early_fini((void *)adev);
2843                 if (r) {
2844                         DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
2845                                   adev->ip_blocks[i].version->funcs->name, r);
2846                 }
2847         }
2848
2849         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2850         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2851
2852         amdgpu_amdkfd_suspend(adev, false);
2853
2854         /* Workaroud for ASICs need to disable SMC first */
2855         amdgpu_device_smu_fini_early(adev);
2856
2857         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2858                 if (!adev->ip_blocks[i].status.hw)
2859                         continue;
2860
2861                 r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
2862                 /* XXX handle errors */
2863                 if (r) {
2864                         DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
2865                                   adev->ip_blocks[i].version->funcs->name, r);
2866                 }
2867
2868                 adev->ip_blocks[i].status.hw = false;
2869         }
2870
2871         if (amdgpu_sriov_vf(adev)) {
2872                 if (amdgpu_virt_release_full_gpu(adev, false))
2873                         DRM_ERROR("failed to release exclusive mode on fini\n");
2874         }
2875
2876         return 0;
2877 }
2878
2879 /**
2880  * amdgpu_device_ip_fini - run fini for hardware IPs
2881  *
2882  * @adev: amdgpu_device pointer
2883  *
2884  * Main teardown pass for hardware IPs.  The list of all the hardware
2885  * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
2886  * are run.  hw_fini tears down the hardware associated with each IP
2887  * and sw_fini tears down any software state associated with each IP.
2888  * Returns 0 on success, negative error code on failure.
2889  */
2890 static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
2891 {
2892         int i, r;
2893
2894         if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
2895                 amdgpu_virt_release_ras_err_handler_data(adev);
2896
2897         if (adev->gmc.xgmi.num_physical_nodes > 1)
2898                 amdgpu_xgmi_remove_device(adev);
2899
2900         amdgpu_amdkfd_device_fini_sw(adev);
2901
2902         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2903                 if (!adev->ip_blocks[i].status.sw)
2904                         continue;
2905
2906                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
2907                         amdgpu_ucode_free_bo(adev);
2908                         amdgpu_free_static_csa(&adev->virt.csa_obj);
2909                         amdgpu_device_wb_fini(adev);
2910                         amdgpu_device_mem_scratch_fini(adev);
2911                         amdgpu_ib_pool_fini(adev);
2912                 }
2913
2914                 r = adev->ip_blocks[i].version->funcs->sw_fini((void *)adev);
2915                 /* XXX handle errors */
2916                 if (r) {
2917                         DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
2918                                   adev->ip_blocks[i].version->funcs->name, r);
2919                 }
2920                 adev->ip_blocks[i].status.sw = false;
2921                 adev->ip_blocks[i].status.valid = false;
2922         }
2923
2924         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2925                 if (!adev->ip_blocks[i].status.late_initialized)
2926                         continue;
2927                 if (adev->ip_blocks[i].version->funcs->late_fini)
2928                         adev->ip_blocks[i].version->funcs->late_fini((void *)adev);
2929                 adev->ip_blocks[i].status.late_initialized = false;
2930         }
2931
2932         amdgpu_ras_fini(adev);
2933
2934         return 0;
2935 }
2936
2937 /**
2938  * amdgpu_device_delayed_init_work_handler - work handler for IB tests
2939  *
2940  * @work: work_struct.
2941  */
2942 static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
2943 {
2944         struct amdgpu_device *adev =
2945                 container_of(work, struct amdgpu_device, delayed_init_work.work);
2946         int r;
2947
2948         r = amdgpu_ib_ring_tests(adev);
2949         if (r)
2950                 DRM_ERROR("ib ring test failed (%d).\n", r);
2951 }
2952
2953 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
2954 {
2955         struct amdgpu_device *adev =
2956                 container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
2957
2958         WARN_ON_ONCE(adev->gfx.gfx_off_state);
2959         WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
2960
2961         if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true))
2962                 adev->gfx.gfx_off_state = true;
2963 }
2964
2965 /**
2966  * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
2967  *
2968  * @adev: amdgpu_device pointer
2969  *
2970  * Main suspend function for hardware IPs.  The list of all the hardware
2971  * IPs that make up the asic is walked, clockgating is disabled and the
2972  * suspend callbacks are run.  suspend puts the hardware and software state
2973  * in each IP into a state suitable for suspend.
2974  * Returns 0 on success, negative error code on failure.
2975  */
2976 static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
2977 {
2978         int i, r;
2979
2980         amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
2981         amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
2982
2983         /*
2984          * Per PMFW team's suggestion, driver needs to handle gfxoff
2985          * and df cstate features disablement for gpu reset(e.g. Mode1Reset)
2986          * scenario. Add the missing df cstate disablement here.
2987          */
2988         if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW))
2989                 dev_warn(adev->dev, "Failed to disallow df cstate");
2990
2991         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
2992                 if (!adev->ip_blocks[i].status.valid)
2993                         continue;
2994
2995                 /* displays are handled separately */
2996                 if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
2997                         continue;
2998
2999                 /* XXX handle errors */
3000                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3001                 /* XXX handle errors */
3002                 if (r) {
3003                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3004                                   adev->ip_blocks[i].version->funcs->name, r);
3005                         return r;
3006                 }
3007
3008                 adev->ip_blocks[i].status.hw = false;
3009         }
3010
3011         return 0;
3012 }
3013
3014 /**
3015  * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3016  *
3017  * @adev: amdgpu_device pointer
3018  *
3019  * Main suspend function for hardware IPs.  The list of all the hardware
3020  * IPs that make up the asic is walked, clockgating is disabled and the
3021  * suspend callbacks are run.  suspend puts the hardware and software state
3022  * in each IP into a state suitable for suspend.
3023  * Returns 0 on success, negative error code on failure.
3024  */
3025 static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3026 {
3027         int i, r;
3028
3029         if (adev->in_s0ix)
3030                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D3Entry);
3031
3032         for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
3033                 if (!adev->ip_blocks[i].status.valid)
3034                         continue;
3035                 /* displays are handled in phase1 */
3036                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3037                         continue;
3038                 /* PSP lost connection when err_event_athub occurs */
3039                 if (amdgpu_ras_intr_triggered() &&
3040                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3041                         adev->ip_blocks[i].status.hw = false;
3042                         continue;
3043                 }
3044
3045                 /* skip unnecessary suspend if we do not initialize them yet */
3046                 if (adev->gmc.xgmi.pending_reset &&
3047                     !(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3048                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC ||
3049                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3050                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH)) {
3051                         adev->ip_blocks[i].status.hw = false;
3052                         continue;
3053                 }
3054
3055                 /* skip suspend of gfx/mes and psp for S0ix
3056                  * gfx is in gfxoff state, so on resume it will exit gfxoff just
3057                  * like at runtime. PSP is also part of the always on hardware
3058                  * so no need to suspend it.
3059                  */
3060                 if (adev->in_s0ix &&
3061                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
3062                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
3063                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3064                         continue;
3065
3066                 /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
3067                 if (adev->in_s0ix &&
3068                     (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0)) &&
3069                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3070                         continue;
3071
3072                 /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
3073                  * These are in TMR, hence are expected to be reused by PSP-TOS to reload
3074                  * from this location and RLC Autoload automatically also gets loaded
3075                  * from here based on PMFW -> PSP message during re-init sequence.
3076                  * Therefore, the psp suspend & resume should be skipped to avoid destroy
3077                  * the TMR and reload FWs again for IMU enabled APU ASICs.
3078                  */
3079                 if (amdgpu_in_reset(adev) &&
3080                     (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3081                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3082                         continue;
3083
3084                 /* XXX handle errors */
3085                 r = adev->ip_blocks[i].version->funcs->suspend(adev);
3086                 /* XXX handle errors */
3087                 if (r) {
3088                         DRM_ERROR("suspend of IP block <%s> failed %d\n",
3089                                   adev->ip_blocks[i].version->funcs->name, r);
3090                 }
3091                 adev->ip_blocks[i].status.hw = false;
3092                 /* handle putting the SMC in the appropriate state */
3093                 if (!amdgpu_sriov_vf(adev)) {
3094                         if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3095                                 r = amdgpu_dpm_set_mp1_state(adev, adev->mp1_state);
3096                                 if (r) {
3097                                         DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3098                                                         adev->mp1_state, r);
3099                                         return r;
3100                                 }
3101                         }
3102                 }
3103         }
3104
3105         return 0;
3106 }
3107
3108 /**
3109  * amdgpu_device_ip_suspend - run suspend for hardware IPs
3110  *
3111  * @adev: amdgpu_device pointer
3112  *
3113  * Main suspend function for hardware IPs.  The list of all the hardware
3114  * IPs that make up the asic is walked, clockgating is disabled and the
3115  * suspend callbacks are run.  suspend puts the hardware and software state
3116  * in each IP into a state suitable for suspend.
3117  * Returns 0 on success, negative error code on failure.
3118  */
3119 int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3120 {
3121         int r;
3122
3123         if (amdgpu_sriov_vf(adev)) {
3124                 amdgpu_virt_fini_data_exchange(adev);
3125                 amdgpu_virt_request_full_gpu(adev, false);
3126         }
3127
3128         r = amdgpu_device_ip_suspend_phase1(adev);
3129         if (r)
3130                 return r;
3131         r = amdgpu_device_ip_suspend_phase2(adev);
3132
3133         if (amdgpu_sriov_vf(adev))
3134                 amdgpu_virt_release_full_gpu(adev, false);
3135
3136         return r;
3137 }
3138
3139 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3140 {
3141         int i, r;
3142
3143         static enum amd_ip_block_type ip_order[] = {
3144                 AMD_IP_BLOCK_TYPE_COMMON,
3145                 AMD_IP_BLOCK_TYPE_GMC,
3146                 AMD_IP_BLOCK_TYPE_PSP,
3147                 AMD_IP_BLOCK_TYPE_IH,
3148         };
3149
3150         for (i = 0; i < adev->num_ip_blocks; i++) {
3151                 int j;
3152                 struct amdgpu_ip_block *block;
3153
3154                 block = &adev->ip_blocks[i];
3155                 block->status.hw = false;
3156
3157                 for (j = 0; j < ARRAY_SIZE(ip_order); j++) {
3158
3159                         if (block->version->type != ip_order[j] ||
3160                                 !block->status.valid)
3161                                 continue;
3162
3163                         r = block->version->funcs->hw_init(adev);
3164                         DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3165                         if (r)
3166                                 return r;
3167                         block->status.hw = true;
3168                 }
3169         }
3170
3171         return 0;
3172 }
3173
3174 static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3175 {
3176         int i, r;
3177
3178         static enum amd_ip_block_type ip_order[] = {
3179                 AMD_IP_BLOCK_TYPE_SMC,
3180                 AMD_IP_BLOCK_TYPE_DCE,
3181                 AMD_IP_BLOCK_TYPE_GFX,
3182                 AMD_IP_BLOCK_TYPE_SDMA,
3183                 AMD_IP_BLOCK_TYPE_MES,
3184                 AMD_IP_BLOCK_TYPE_UVD,
3185                 AMD_IP_BLOCK_TYPE_VCE,
3186                 AMD_IP_BLOCK_TYPE_VCN,
3187                 AMD_IP_BLOCK_TYPE_JPEG
3188         };
3189
3190         for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
3191                 int j;
3192                 struct amdgpu_ip_block *block;
3193
3194                 for (j = 0; j < adev->num_ip_blocks; j++) {
3195                         block = &adev->ip_blocks[j];
3196
3197                         if (block->version->type != ip_order[i] ||
3198                                 !block->status.valid ||
3199                                 block->status.hw)
3200                                 continue;
3201
3202                         if (block->version->type == AMD_IP_BLOCK_TYPE_SMC)
3203                                 r = block->version->funcs->resume(adev);
3204                         else
3205                                 r = block->version->funcs->hw_init(adev);
3206
3207                         DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
3208                         if (r)
3209                                 return r;
3210                         block->status.hw = true;
3211                 }
3212         }
3213
3214         return 0;
3215 }
3216
3217 /**
3218  * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3219  *
3220  * @adev: amdgpu_device pointer
3221  *
3222  * First resume function for hardware IPs.  The list of all the hardware
3223  * IPs that make up the asic is walked and the resume callbacks are run for
3224  * COMMON, GMC, and IH.  resume puts the hardware into a functional state
3225  * after a suspend and updates the software state as necessary.  This
3226  * function is also used for restoring the GPU after a GPU reset.
3227  * Returns 0 on success, negative error code on failure.
3228  */
3229 static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3230 {
3231         int i, r;
3232
3233         for (i = 0; i < adev->num_ip_blocks; i++) {
3234                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3235                         continue;
3236                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3237                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3238                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3239                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3240
3241                         r = adev->ip_blocks[i].version->funcs->resume(adev);
3242                         if (r) {
3243                                 DRM_ERROR("resume of IP block <%s> failed %d\n",
3244                                           adev->ip_blocks[i].version->funcs->name, r);
3245                                 return r;
3246                         }
3247                         adev->ip_blocks[i].status.hw = true;
3248                 }
3249         }
3250
3251         return 0;
3252 }
3253
3254 /**
3255  * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3256  *
3257  * @adev: amdgpu_device pointer
3258  *
3259  * First resume function for hardware IPs.  The list of all the hardware
3260  * IPs that make up the asic is walked and the resume callbacks are run for
3261  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
3262  * functional state after a suspend and updates the software state as
3263  * necessary.  This function is also used for restoring the GPU after a GPU
3264  * reset.
3265  * Returns 0 on success, negative error code on failure.
3266  */
3267 static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3268 {
3269         int i, r;
3270
3271         for (i = 0; i < adev->num_ip_blocks; i++) {
3272                 if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
3273                         continue;
3274                 if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3275                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3276                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3277                     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3278                         continue;
3279                 r = adev->ip_blocks[i].version->funcs->resume(adev);
3280                 if (r) {
3281                         DRM_ERROR("resume of IP block <%s> failed %d\n",
3282                                   adev->ip_blocks[i].version->funcs->name, r);
3283                         return r;
3284                 }
3285                 adev->ip_blocks[i].status.hw = true;
3286         }
3287
3288         return 0;
3289 }
3290
3291 /**
3292  * amdgpu_device_ip_resume - run resume for hardware IPs
3293  *
3294  * @adev: amdgpu_device pointer
3295  *
3296  * Main resume function for hardware IPs.  The hardware IPs
3297  * are split into two resume functions because they are
3298  * are also used in in recovering from a GPU reset and some additional
3299  * steps need to be take between them.  In this case (S3/S4) they are
3300  * run sequentially.
3301  * Returns 0 on success, negative error code on failure.
3302  */
3303 static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3304 {
3305         int r;
3306
3307         if (!adev->in_s0ix) {
3308                 r = amdgpu_amdkfd_resume_iommu(adev);
3309                 if (r)
3310                         return r;
3311         }
3312
3313         r = amdgpu_device_ip_resume_phase1(adev);
3314         if (r)
3315                 return r;
3316
3317         r = amdgpu_device_fw_loading(adev);
3318         if (r)
3319                 return r;
3320
3321         r = amdgpu_device_ip_resume_phase2(adev);
3322
3323         return r;
3324 }
3325
3326 /**
3327  * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
3328  *
3329  * @adev: amdgpu_device pointer
3330  *
3331  * Query the VBIOS data tables to determine if the board supports SR-IOV.
3332  */
3333 static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
3334 {
3335         if (amdgpu_sriov_vf(adev)) {
3336                 if (adev->is_atom_fw) {
3337                         if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
3338                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3339                 } else {
3340                         if (amdgpu_atombios_has_gpu_virtualization_table(adev))
3341                                 adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
3342                 }
3343
3344                 if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
3345                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
3346         }
3347 }
3348
3349 /**
3350  * amdgpu_device_asic_has_dc_support - determine if DC supports the asic
3351  *
3352  * @asic_type: AMD asic type
3353  *
3354  * Check if there is DC (new modesetting infrastructre) support for an asic.
3355  * returns true if DC has support, false if not.
3356  */
3357 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
3358 {
3359         switch (asic_type) {
3360 #ifdef CONFIG_DRM_AMDGPU_SI
3361         case CHIP_HAINAN:
3362 #endif
3363         case CHIP_TOPAZ:
3364                 /* chips with no display hardware */
3365                 return false;
3366 #if defined(CONFIG_DRM_AMD_DC)
3367         case CHIP_TAHITI:
3368         case CHIP_PITCAIRN:
3369         case CHIP_VERDE:
3370         case CHIP_OLAND:
3371                 /*
3372                  * We have systems in the wild with these ASICs that require
3373                  * LVDS and VGA support which is not supported with DC.
3374                  *
3375                  * Fallback to the non-DC driver here by default so as not to
3376                  * cause regressions.
3377                  */
3378 #if defined(CONFIG_DRM_AMD_DC_SI)
3379                 return amdgpu_dc > 0;
3380 #else
3381                 return false;
3382 #endif
3383         case CHIP_BONAIRE:
3384         case CHIP_KAVERI:
3385         case CHIP_KABINI:
3386         case CHIP_MULLINS:
3387                 /*
3388                  * We have systems in the wild with these ASICs that require
3389                  * VGA support which is not supported with DC.
3390                  *
3391                  * Fallback to the non-DC driver here by default so as not to
3392                  * cause regressions.
3393                  */
3394                 return amdgpu_dc > 0;
3395         default:
3396                 return amdgpu_dc != 0;
3397 #else
3398         default:
3399                 if (amdgpu_dc > 0)
3400                         DRM_INFO_ONCE("Display Core has been requested via kernel parameter "
3401                                          "but isn't supported by ASIC, ignoring\n");
3402                 return false;
3403 #endif
3404         }
3405 }
3406
3407 /**
3408  * amdgpu_device_has_dc_support - check if dc is supported
3409  *
3410  * @adev: amdgpu_device pointer
3411  *
3412  * Returns true for supported, false for not supported
3413  */
3414 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
3415 {
3416         if (adev->enable_virtual_display ||
3417             (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
3418                 return false;
3419
3420         return amdgpu_device_asic_has_dc_support(adev->asic_type);
3421 }
3422
3423 static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
3424 {
3425         struct amdgpu_device *adev =
3426                 container_of(__work, struct amdgpu_device, xgmi_reset_work);
3427         struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3428
3429         /* It's a bug to not have a hive within this function */
3430         if (WARN_ON(!hive))
3431                 return;
3432
3433         /*
3434          * Use task barrier to synchronize all xgmi reset works across the
3435          * hive. task_barrier_enter and task_barrier_exit will block
3436          * until all the threads running the xgmi reset works reach
3437          * those points. task_barrier_full will do both blocks.
3438          */
3439         if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
3440
3441                 task_barrier_enter(&hive->tb);
3442                 adev->asic_reset_res = amdgpu_device_baco_enter(adev_to_drm(adev));
3443
3444                 if (adev->asic_reset_res)
3445                         goto fail;
3446
3447                 task_barrier_exit(&hive->tb);
3448                 adev->asic_reset_res = amdgpu_device_baco_exit(adev_to_drm(adev));
3449
3450                 if (adev->asic_reset_res)
3451                         goto fail;
3452
3453                 if (adev->mmhub.ras && adev->mmhub.ras->ras_block.hw_ops &&
3454                     adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
3455                         adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(adev);
3456         } else {
3457
3458                 task_barrier_full(&hive->tb);
3459                 adev->asic_reset_res =  amdgpu_asic_reset(adev);
3460         }
3461
3462 fail:
3463         if (adev->asic_reset_res)
3464                 DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
3465                          adev->asic_reset_res, adev_to_drm(adev)->unique);
3466         amdgpu_put_xgmi_hive(hive);
3467 }
3468
3469 static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
3470 {
3471         char *input = amdgpu_lockup_timeout;
3472         char *timeout_setting = NULL;
3473         int index = 0;
3474         long timeout;
3475         int ret = 0;
3476
3477         /*
3478          * By default timeout for non compute jobs is 10000
3479          * and 60000 for compute jobs.
3480          * In SR-IOV or passthrough mode, timeout for compute
3481          * jobs are 60000 by default.
3482          */
3483         adev->gfx_timeout = msecs_to_jiffies(10000);
3484         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3485         if (amdgpu_sriov_vf(adev))
3486                 adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
3487                                         msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
3488         else
3489                 adev->compute_timeout =  msecs_to_jiffies(60000);
3490
3491         if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3492                 while ((timeout_setting = strsep(&input, ",")) &&
3493                                 strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
3494                         ret = kstrtol(timeout_setting, 0, &timeout);
3495                         if (ret)
3496                                 return ret;
3497
3498                         if (timeout == 0) {
3499                                 index++;
3500                                 continue;
3501                         } else if (timeout < 0) {
3502                                 timeout = MAX_SCHEDULE_TIMEOUT;
3503                                 dev_warn(adev->dev, "lockup timeout disabled");
3504                                 add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
3505                         } else {
3506                                 timeout = msecs_to_jiffies(timeout);
3507                         }
3508
3509                         switch (index++) {
3510                         case 0:
3511                                 adev->gfx_timeout = timeout;
3512                                 break;
3513                         case 1:
3514                                 adev->compute_timeout = timeout;
3515                                 break;
3516                         case 2:
3517                                 adev->sdma_timeout = timeout;
3518                                 break;
3519                         case 3:
3520                                 adev->video_timeout = timeout;
3521                                 break;
3522                         default:
3523                                 break;
3524                         }
3525                 }
3526                 /*
3527                  * There is only one value specified and
3528                  * it should apply to all non-compute jobs.
3529                  */
3530                 if (index == 1) {
3531                         adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
3532                         if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev))
3533                                 adev->compute_timeout = adev->gfx_timeout;
3534                 }
3535         }
3536
3537         return ret;
3538 }
3539
3540 /**
3541  * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
3542  *
3543  * @adev: amdgpu_device pointer
3544  *
3545  * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
3546  */
3547 static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
3548 {
3549         struct iommu_domain *domain;
3550
3551         domain = iommu_get_domain_for_dev(adev->dev);
3552         if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
3553                 adev->ram_is_direct_mapped = true;
3554 }
3555
3556 static const struct attribute *amdgpu_dev_attributes[] = {
3557         &dev_attr_product_name.attr,
3558         &dev_attr_product_number.attr,
3559         &dev_attr_serial_number.attr,
3560         &dev_attr_pcie_replay_count.attr,
3561         NULL
3562 };
3563
3564 /**
3565  * amdgpu_device_init - initialize the driver
3566  *
3567  * @adev: amdgpu_device pointer
3568  * @flags: driver flags
3569  *
3570  * Initializes the driver info and hw (all asics).
3571  * Returns 0 for success or an error on failure.
3572  * Called at driver startup.
3573  */
3574 int amdgpu_device_init(struct amdgpu_device *adev,
3575                        uint32_t flags)
3576 {
3577         struct drm_device *ddev = adev_to_drm(adev);
3578         struct pci_dev *pdev = adev->pdev;
3579         int r, i;
3580         bool px = false;
3581         u32 max_MBps;
3582         int tmp;
3583
3584         adev->shutdown = false;
3585         adev->flags = flags;
3586
3587         if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST)
3588                 adev->asic_type = amdgpu_force_asic_type;
3589         else
3590                 adev->asic_type = flags & AMD_ASIC_MASK;
3591
3592         adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
3593         if (amdgpu_emu_mode == 1)
3594                 adev->usec_timeout *= 10;
3595         adev->gmc.gart_size = 512 * 1024 * 1024;
3596         adev->accel_working = false;
3597         adev->num_rings = 0;
3598         RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
3599         adev->mman.buffer_funcs = NULL;
3600         adev->mman.buffer_funcs_ring = NULL;
3601         adev->vm_manager.vm_pte_funcs = NULL;
3602         adev->vm_manager.vm_pte_num_scheds = 0;
3603         adev->gmc.gmc_funcs = NULL;
3604         adev->harvest_ip_mask = 0x0;
3605         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
3606         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
3607
3608         adev->smc_rreg = &amdgpu_invalid_rreg;
3609         adev->smc_wreg = &amdgpu_invalid_wreg;
3610         adev->pcie_rreg = &amdgpu_invalid_rreg;
3611         adev->pcie_wreg = &amdgpu_invalid_wreg;
3612         adev->pciep_rreg = &amdgpu_invalid_rreg;
3613         adev->pciep_wreg = &amdgpu_invalid_wreg;
3614         adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
3615         adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
3616         adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
3617         adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
3618         adev->didt_rreg = &amdgpu_invalid_rreg;
3619         adev->didt_wreg = &amdgpu_invalid_wreg;
3620         adev->gc_cac_rreg = &amdgpu_invalid_rreg;
3621         adev->gc_cac_wreg = &amdgpu_invalid_wreg;
3622         adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
3623         adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
3624
3625         DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
3626                  amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
3627                  pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
3628
3629         /* mutex initialization are all done here so we
3630          * can recall function without having locking issues */
3631         mutex_init(&adev->firmware.mutex);
3632         mutex_init(&adev->pm.mutex);
3633         mutex_init(&adev->gfx.gpu_clock_mutex);
3634         mutex_init(&adev->srbm_mutex);
3635         mutex_init(&adev->gfx.pipe_reserve_mutex);
3636         mutex_init(&adev->gfx.gfx_off_mutex);
3637         mutex_init(&adev->grbm_idx_mutex);
3638         mutex_init(&adev->mn_lock);
3639         mutex_init(&adev->virt.vf_errors.lock);
3640         hash_init(adev->mn_hash);
3641         mutex_init(&adev->psp.mutex);
3642         mutex_init(&adev->notifier_lock);
3643         mutex_init(&adev->pm.stable_pstate_ctx_lock);
3644         mutex_init(&adev->benchmark_mutex);
3645
3646         amdgpu_device_init_apu_flags(adev);
3647
3648         r = amdgpu_device_check_arguments(adev);
3649         if (r)
3650                 return r;
3651
3652         spin_lock_init(&adev->mmio_idx_lock);
3653         spin_lock_init(&adev->smc_idx_lock);
3654         spin_lock_init(&adev->pcie_idx_lock);
3655         spin_lock_init(&adev->uvd_ctx_idx_lock);
3656         spin_lock_init(&adev->didt_idx_lock);
3657         spin_lock_init(&adev->gc_cac_idx_lock);
3658         spin_lock_init(&adev->se_cac_idx_lock);
3659         spin_lock_init(&adev->audio_endpt_idx_lock);
3660         spin_lock_init(&adev->mm_stats.lock);
3661
3662         INIT_LIST_HEAD(&adev->shadow_list);
3663         mutex_init(&adev->shadow_list_lock);
3664
3665         INIT_LIST_HEAD(&adev->reset_list);
3666
3667         INIT_LIST_HEAD(&adev->ras_list);
3668
3669         INIT_DELAYED_WORK(&adev->delayed_init_work,
3670                           amdgpu_device_delayed_init_work_handler);
3671         INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
3672                           amdgpu_device_delay_enable_gfx_off);
3673
3674         INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
3675
3676         adev->gfx.gfx_off_req_count = 1;
3677         adev->gfx.gfx_off_residency = 0;
3678         adev->gfx.gfx_off_entrycount = 0;
3679         adev->pm.ac_power = power_supply_is_system_supplied() > 0;
3680
3681         atomic_set(&adev->throttling_logging_enabled, 1);
3682         /*
3683          * If throttling continues, logging will be performed every minute
3684          * to avoid log flooding. "-1" is subtracted since the thermal
3685          * throttling interrupt comes every second. Thus, the total logging
3686          * interval is 59 seconds(retelimited printk interval) + 1(waiting
3687          * for throttling interrupt) = 60 seconds.
3688          */
3689         ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
3690         ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
3691
3692         /* Registers mapping */
3693         /* TODO: block userspace mapping of io register */
3694         if (adev->asic_type >= CHIP_BONAIRE) {
3695                 adev->rmmio_base = pci_resource_start(adev->pdev, 5);
3696                 adev->rmmio_size = pci_resource_len(adev->pdev, 5);
3697         } else {
3698                 adev->rmmio_base = pci_resource_start(adev->pdev, 2);
3699                 adev->rmmio_size = pci_resource_len(adev->pdev, 2);
3700         }
3701
3702         for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++)
3703                 atomic_set(&adev->pm.pwr_state[i], POWER_STATE_UNKNOWN);
3704
3705         adev->rmmio = ioremap(adev->rmmio_base, adev->rmmio_size);
3706         if (adev->rmmio == NULL) {
3707                 return -ENOMEM;
3708         }
3709         DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
3710         DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size);
3711
3712         amdgpu_device_get_pcie_info(adev);
3713
3714         if (amdgpu_mcbp)
3715                 DRM_INFO("MCBP is enabled\n");
3716
3717         /*
3718          * Reset domain needs to be present early, before XGMI hive discovered
3719          * (if any) and intitialized to use reset sem and in_gpu reset flag
3720          * early on during init and before calling to RREG32.
3721          */
3722         adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev");
3723         if (!adev->reset_domain)
3724                 return -ENOMEM;
3725
3726         /* detect hw virtualization here */
3727         amdgpu_detect_virtualization(adev);
3728
3729         r = amdgpu_device_get_job_timeout_settings(adev);
3730         if (r) {
3731                 dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
3732                 return r;
3733         }
3734
3735         /* early init functions */
3736         r = amdgpu_device_ip_early_init(adev);
3737         if (r)
3738                 return r;
3739
3740         /* Get rid of things like offb */
3741         r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
3742         if (r)
3743                 return r;
3744
3745         /* Enable TMZ based on IP_VERSION */
3746         amdgpu_gmc_tmz_set(adev);
3747
3748         amdgpu_gmc_noretry_set(adev);
3749         /* Need to get xgmi info early to decide the reset behavior*/
3750         if (adev->gmc.xgmi.supported) {
3751                 r = adev->gfxhub.funcs->get_xgmi_info(adev);
3752                 if (r)
3753                         return r;
3754         }
3755
3756         /* enable PCIE atomic ops */
3757         if (amdgpu_sriov_vf(adev))
3758                 adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
3759                         adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
3760                         (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3761         else
3762                 adev->have_atomics_support =
3763                         !pci_enable_atomic_ops_to_root(adev->pdev,
3764                                           PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
3765                                           PCI_EXP_DEVCAP2_ATOMIC_COMP64);
3766         if (!adev->have_atomics_support)
3767                 dev_info(adev->dev, "PCIE atomic ops is not supported\n");
3768
3769         /* doorbell bar mapping and doorbell index init*/
3770         amdgpu_device_doorbell_init(adev);
3771
3772         if (amdgpu_emu_mode == 1) {
3773                 /* post the asic on emulation mode */
3774                 emu_soc_asic_init(adev);
3775                 goto fence_driver_init;
3776         }
3777
3778         amdgpu_reset_init(adev);
3779
3780         /* detect if we are with an SRIOV vbios */
3781         amdgpu_device_detect_sriov_bios(adev);
3782
3783         /* check if we need to reset the asic
3784          *  E.g., driver was not cleanly unloaded previously, etc.
3785          */
3786         if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
3787                 if (adev->gmc.xgmi.num_physical_nodes) {
3788                         dev_info(adev->dev, "Pending hive reset.\n");
3789                         adev->gmc.xgmi.pending_reset = true;
3790                         /* Only need to init necessary block for SMU to handle the reset */
3791                         for (i = 0; i < adev->num_ip_blocks; i++) {
3792                                 if (!adev->ip_blocks[i].status.valid)
3793                                         continue;
3794                                 if (!(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
3795                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
3796                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
3797                                       adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC)) {
3798                                         DRM_DEBUG("IP %s disabled for hw_init.\n",
3799                                                 adev->ip_blocks[i].version->funcs->name);
3800                                         adev->ip_blocks[i].status.hw = true;
3801                                 }
3802                         }
3803                 } else {
3804                         tmp = amdgpu_reset_method;
3805                         /* It should do a default reset when loading or reloading the driver,
3806                          * regardless of the module parameter reset_method.
3807                          */
3808                         amdgpu_reset_method = AMD_RESET_METHOD_NONE;
3809                         r = amdgpu_asic_reset(adev);
3810                         amdgpu_reset_method = tmp;
3811                         if (r) {
3812                                 dev_err(adev->dev, "asic reset on init failed\n");
3813                                 goto failed;
3814                         }
3815                 }
3816         }
3817
3818         /* Post card if necessary */
3819         if (amdgpu_device_need_post(adev)) {
3820                 if (!adev->bios) {
3821                         dev_err(adev->dev, "no vBIOS found\n");
3822                         r = -EINVAL;
3823                         goto failed;
3824                 }
3825                 DRM_INFO("GPU posting now...\n");
3826                 r = amdgpu_device_asic_init(adev);
3827                 if (r) {
3828                         dev_err(adev->dev, "gpu post error!\n");
3829                         goto failed;
3830                 }
3831         }
3832
3833         if (adev->is_atom_fw) {
3834                 /* Initialize clocks */
3835                 r = amdgpu_atomfirmware_get_clock_info(adev);
3836                 if (r) {
3837                         dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
3838                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3839                         goto failed;
3840                 }
3841         } else {
3842                 /* Initialize clocks */
3843                 r = amdgpu_atombios_get_clock_info(adev);
3844                 if (r) {
3845                         dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
3846                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, 0, 0);
3847                         goto failed;
3848                 }
3849                 /* init i2c buses */
3850                 if (!amdgpu_device_has_dc_support(adev))
3851                         amdgpu_atombios_i2c_init(adev);
3852         }
3853
3854 fence_driver_init:
3855         /* Fence driver */
3856         r = amdgpu_fence_driver_sw_init(adev);
3857         if (r) {
3858                 dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
3859                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0, 0);
3860                 goto failed;
3861         }
3862
3863         /* init the mode config */
3864         drm_mode_config_init(adev_to_drm(adev));
3865
3866         r = amdgpu_device_ip_init(adev);
3867         if (r) {
3868                 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
3869                 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
3870                 goto release_ras_con;
3871         }
3872
3873         amdgpu_fence_driver_hw_init(adev);
3874
3875         dev_info(adev->dev,
3876                 "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
3877                         adev->gfx.config.max_shader_engines,
3878                         adev->gfx.config.max_sh_per_se,
3879                         adev->gfx.config.max_cu_per_sh,
3880                         adev->gfx.cu_info.number);
3881
3882         adev->accel_working = true;
3883
3884         amdgpu_vm_check_compute_bug(adev);
3885
3886         /* Initialize the buffer migration limit. */
3887         if (amdgpu_moverate >= 0)
3888                 max_MBps = amdgpu_moverate;
3889         else
3890                 max_MBps = 8; /* Allow 8 MB/s. */
3891         /* Get a log2 for easy divisions. */
3892         adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps));
3893
3894         r = amdgpu_pm_sysfs_init(adev);
3895         if (r)
3896                 DRM_ERROR("registering pm sysfs failed (%d).\n", r);
3897
3898         r = amdgpu_ucode_sysfs_init(adev);
3899         if (r) {
3900                 adev->ucode_sysfs_en = false;
3901                 DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
3902         } else
3903                 adev->ucode_sysfs_en = true;
3904
3905         r = amdgpu_psp_sysfs_init(adev);
3906         if (r) {
3907                 adev->psp_sysfs_en = false;
3908                 if (!amdgpu_sriov_vf(adev))
3909                         DRM_ERROR("Creating psp sysfs failed\n");
3910         } else
3911                 adev->psp_sysfs_en = true;
3912
3913         /*
3914          * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
3915          * Otherwise the mgpu fan boost feature will be skipped due to the
3916          * gpu instance is counted less.
3917          */
3918         amdgpu_register_gpu_instance(adev);
3919
3920         /* enable clockgating, etc. after ib tests, etc. since some blocks require
3921          * explicit gating rather than handling it automatically.
3922          */
3923         if (!adev->gmc.xgmi.pending_reset) {
3924                 r = amdgpu_device_ip_late_init(adev);
3925                 if (r) {
3926                         dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
3927                         amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, 0, r);
3928                         goto release_ras_con;
3929                 }
3930                 /* must succeed. */
3931                 amdgpu_ras_resume(adev);
3932                 queue_delayed_work(system_wq, &adev->delayed_init_work,
3933                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3934         }
3935
3936         if (amdgpu_sriov_vf(adev)) {
3937                 amdgpu_virt_release_full_gpu(adev, true);
3938                 flush_delayed_work(&adev->delayed_init_work);
3939         }
3940
3941         r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes);
3942         if (r)
3943                 dev_err(adev->dev, "Could not create amdgpu device attr\n");
3944
3945         if (IS_ENABLED(CONFIG_PERF_EVENTS))
3946                 r = amdgpu_pmu_init(adev);
3947         if (r)
3948                 dev_err(adev->dev, "amdgpu_pmu_init failed\n");
3949
3950         /* Have stored pci confspace at hand for restore in sudden PCI error */
3951         if (amdgpu_device_cache_pci_state(adev->pdev))
3952                 pci_restore_state(pdev);
3953
3954         /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */
3955         /* this will fail for cards that aren't VGA class devices, just
3956          * ignore it */
3957         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
3958                 vga_client_register(adev->pdev, amdgpu_device_vga_set_decode);
3959
3960         px = amdgpu_device_supports_px(ddev);
3961
3962         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
3963                                 apple_gmux_detect(NULL, NULL)))
3964                 vga_switcheroo_register_client(adev->pdev,
3965                                                &amdgpu_switcheroo_ops, px);
3966
3967         if (px)
3968                 vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
3969
3970         if (adev->gmc.xgmi.pending_reset)
3971                 queue_delayed_work(system_wq, &mgpu_info.delayed_reset_work,
3972                                    msecs_to_jiffies(AMDGPU_RESUME_MS));
3973
3974         amdgpu_device_check_iommu_direct_map(adev);
3975
3976         return 0;
3977
3978 release_ras_con:
3979         if (amdgpu_sriov_vf(adev))
3980                 amdgpu_virt_release_full_gpu(adev, true);
3981
3982         /* failed in exclusive mode due to timeout */
3983         if (amdgpu_sriov_vf(adev) &&
3984                 !amdgpu_sriov_runtime(adev) &&
3985                 amdgpu_virt_mmio_blocked(adev) &&
3986                 !amdgpu_virt_wait_reset(adev)) {
3987                 dev_err(adev->dev, "VF exclusive mode timeout\n");
3988                 /* Don't send request since VF is inactive. */
3989                 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
3990                 adev->virt.ops = NULL;
3991                 r = -EAGAIN;
3992         }
3993         amdgpu_release_ras_context(adev);
3994
3995 failed:
3996         amdgpu_vf_error_trans_all(adev);
3997
3998         return r;
3999 }
4000
4001 static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4002 {
4003
4004         /* Clear all CPU mappings pointing to this device */
4005         unmap_mapping_range(adev->ddev.anon_inode->i_mapping, 0, 0, 1);
4006
4007         /* Unmap all mapped bars - Doorbell, registers and VRAM */
4008         amdgpu_device_doorbell_fini(adev);
4009
4010         iounmap(adev->rmmio);
4011         adev->rmmio = NULL;
4012         if (adev->mman.aper_base_kaddr)
4013                 iounmap(adev->mman.aper_base_kaddr);
4014         adev->mman.aper_base_kaddr = NULL;
4015
4016         /* Memory manager related */
4017         if (!adev->gmc.xgmi.connected_to_cpu) {
4018                 arch_phys_wc_del(adev->gmc.vram_mtrr);
4019                 arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
4020         }
4021 }
4022
4023 /**
4024  * amdgpu_device_fini_hw - tear down the driver
4025  *
4026  * @adev: amdgpu_device pointer
4027  *
4028  * Tear down the driver info (all asics).
4029  * Called at driver shutdown.
4030  */
4031 void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4032 {
4033         dev_info(adev->dev, "amdgpu: finishing device.\n");
4034         flush_delayed_work(&adev->delayed_init_work);
4035         adev->shutdown = true;
4036
4037         /* make sure IB test finished before entering exclusive mode
4038          * to avoid preemption on IB test
4039          * */
4040         if (amdgpu_sriov_vf(adev)) {
4041                 amdgpu_virt_request_full_gpu(adev, false);
4042                 amdgpu_virt_fini_data_exchange(adev);
4043         }
4044
4045         /* disable all interrupts */
4046         amdgpu_irq_disable_all(adev);
4047         if (adev->mode_info.mode_config_initialized) {
4048                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(adev)))
4049                         drm_helper_force_disable_all(adev_to_drm(adev));
4050                 else
4051                         drm_atomic_helper_shutdown(adev_to_drm(adev));
4052         }
4053         amdgpu_fence_driver_hw_fini(adev);
4054
4055         if (adev->mman.initialized)
4056                 drain_workqueue(adev->mman.bdev.wq);
4057
4058         if (adev->pm.sysfs_initialized)
4059                 amdgpu_pm_sysfs_fini(adev);
4060         if (adev->ucode_sysfs_en)
4061                 amdgpu_ucode_sysfs_fini(adev);
4062         if (adev->psp_sysfs_en)
4063                 amdgpu_psp_sysfs_fini(adev);
4064         sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes);
4065
4066         /* disable ras feature must before hw fini */
4067         amdgpu_ras_pre_fini(adev);
4068
4069         amdgpu_device_ip_fini_early(adev);
4070
4071         amdgpu_irq_fini_hw(adev);
4072
4073         if (adev->mman.initialized)
4074                 ttm_device_clear_dma_mappings(&adev->mman.bdev);
4075
4076         amdgpu_gart_dummy_page_fini(adev);
4077
4078         if (drm_dev_is_unplugged(adev_to_drm(adev)))
4079                 amdgpu_device_unmap_mmio(adev);
4080
4081 }
4082
4083 void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4084 {
4085         int idx;
4086         bool px;
4087
4088         amdgpu_fence_driver_sw_fini(adev);
4089         amdgpu_device_ip_fini(adev);
4090         amdgpu_ucode_release(&adev->firmware.gpu_info_fw);
4091         adev->accel_working = false;
4092         dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4093
4094         amdgpu_reset_fini(adev);
4095
4096         /* free i2c buses */
4097         if (!amdgpu_device_has_dc_support(adev))
4098                 amdgpu_i2c_fini(adev);
4099
4100         if (amdgpu_emu_mode != 1)
4101                 amdgpu_atombios_fini(adev);
4102
4103         kfree(adev->bios);
4104         adev->bios = NULL;
4105
4106         px = amdgpu_device_supports_px(adev_to_drm(adev));
4107
4108         if (px || (!pci_is_thunderbolt_attached(adev->pdev) &&
4109                                 apple_gmux_detect(NULL, NULL)))
4110                 vga_switcheroo_unregister_client(adev->pdev);
4111
4112         if (px)
4113                 vga_switcheroo_fini_domain_pm_ops(adev->dev);
4114
4115         if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
4116                 vga_client_unregister(adev->pdev);
4117
4118         if (drm_dev_enter(adev_to_drm(adev), &idx)) {
4119
4120                 iounmap(adev->rmmio);
4121                 adev->rmmio = NULL;
4122                 amdgpu_device_doorbell_fini(adev);
4123                 drm_dev_exit(idx);
4124         }
4125
4126         if (IS_ENABLED(CONFIG_PERF_EVENTS))
4127                 amdgpu_pmu_fini(adev);
4128         if (adev->mman.discovery_bin)
4129                 amdgpu_discovery_fini(adev);
4130
4131         amdgpu_reset_put_reset_domain(adev->reset_domain);
4132         adev->reset_domain = NULL;
4133
4134         kfree(adev->pci_state);
4135
4136 }
4137
4138 /**
4139  * amdgpu_device_evict_resources - evict device resources
4140  * @adev: amdgpu device object
4141  *
4142  * Evicts all ttm device resources(vram BOs, gart table) from the lru list
4143  * of the vram memory type. Mainly used for evicting device resources
4144  * at suspend time.
4145  *
4146  */
4147 static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4148 {
4149         int ret;
4150
4151         /* No need to evict vram on APUs for suspend to ram or s2idle */
4152         if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
4153                 return 0;
4154
4155         ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4156         if (ret)
4157                 DRM_WARN("evicting device resources failed\n");
4158         return ret;
4159 }
4160
4161 /*
4162  * Suspend & resume.
4163  */
4164 /**
4165  * amdgpu_device_suspend - initiate device suspend
4166  *
4167  * @dev: drm dev pointer
4168  * @fbcon : notify the fbdev of suspend
4169  *
4170  * Puts the hw in the suspend state (all asics).
4171  * Returns 0 for success or an error on failure.
4172  * Called at driver suspend.
4173  */
4174 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
4175 {
4176         struct amdgpu_device *adev = drm_to_adev(dev);
4177         int r = 0;
4178
4179         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4180                 return 0;
4181
4182         adev->in_suspend = true;
4183
4184         /* Evict the majority of BOs before grabbing the full access */
4185         r = amdgpu_device_evict_resources(adev);
4186         if (r)
4187                 return r;
4188
4189         if (amdgpu_sriov_vf(adev)) {
4190                 amdgpu_virt_fini_data_exchange(adev);
4191                 r = amdgpu_virt_request_full_gpu(adev, false);
4192                 if (r)
4193                         return r;
4194         }
4195
4196         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
4197                 DRM_WARN("smart shift update failed\n");
4198
4199         if (fbcon)
4200                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
4201
4202         cancel_delayed_work_sync(&adev->delayed_init_work);
4203
4204         amdgpu_ras_suspend(adev);
4205
4206         amdgpu_device_ip_suspend_phase1(adev);
4207
4208         if (!adev->in_s0ix)
4209                 amdgpu_amdkfd_suspend(adev, adev->in_runpm);
4210
4211         r = amdgpu_device_evict_resources(adev);
4212         if (r)
4213                 return r;
4214
4215         amdgpu_fence_driver_hw_fini(adev);
4216
4217         amdgpu_device_ip_suspend_phase2(adev);
4218
4219         if (amdgpu_sriov_vf(adev))
4220                 amdgpu_virt_release_full_gpu(adev, false);
4221
4222         return 0;
4223 }
4224
4225 /**
4226  * amdgpu_device_resume - initiate device resume
4227  *
4228  * @dev: drm dev pointer
4229  * @fbcon : notify the fbdev of resume
4230  *
4231  * Bring the hw back to operating state (all asics).
4232  * Returns 0 for success or an error on failure.
4233  * Called at driver resume.
4234  */
4235 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
4236 {
4237         struct amdgpu_device *adev = drm_to_adev(dev);
4238         int r = 0;
4239
4240         if (amdgpu_sriov_vf(adev)) {
4241                 r = amdgpu_virt_request_full_gpu(adev, true);
4242                 if (r)
4243                         return r;
4244         }
4245
4246         if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
4247                 return 0;
4248
4249         if (adev->in_s0ix)
4250                 amdgpu_dpm_gfx_state_change(adev, sGpuChangeState_D0Entry);
4251
4252         /* post card */
4253         if (amdgpu_device_need_post(adev)) {
4254                 r = amdgpu_device_asic_init(adev);
4255                 if (r)
4256                         dev_err(adev->dev, "amdgpu asic init failed\n");
4257         }
4258
4259         r = amdgpu_device_ip_resume(adev);
4260
4261         if (r) {
4262                 dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
4263                 goto exit;
4264         }
4265         amdgpu_fence_driver_hw_init(adev);
4266
4267         r = amdgpu_device_ip_late_init(adev);
4268         if (r)
4269                 goto exit;
4270
4271         queue_delayed_work(system_wq, &adev->delayed_init_work,
4272                            msecs_to_jiffies(AMDGPU_RESUME_MS));
4273
4274         if (!adev->in_s0ix) {
4275                 r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
4276                 if (r)
4277                         goto exit;
4278         }
4279
4280 exit:
4281         if (amdgpu_sriov_vf(adev)) {
4282                 amdgpu_virt_init_data_exchange(adev);
4283                 amdgpu_virt_release_full_gpu(adev, true);
4284         }
4285
4286         if (r)
4287                 return r;
4288
4289         /* Make sure IB tests flushed */
4290         flush_delayed_work(&adev->delayed_init_work);
4291
4292         if (fbcon)
4293                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
4294
4295         amdgpu_ras_resume(adev);
4296
4297         if (adev->mode_info.num_crtc) {
4298                 /*
4299                  * Most of the connector probing functions try to acquire runtime pm
4300                  * refs to ensure that the GPU is powered on when connector polling is
4301                  * performed. Since we're calling this from a runtime PM callback,
4302                  * trying to acquire rpm refs will cause us to deadlock.
4303                  *
4304                  * Since we're guaranteed to be holding the rpm lock, it's safe to
4305                  * temporarily disable the rpm helpers so this doesn't deadlock us.
4306                  */
4307 #ifdef CONFIG_PM
4308                 dev->dev->power.disable_depth++;
4309 #endif
4310                 if (!adev->dc_enabled)
4311                         drm_helper_hpd_irq_event(dev);
4312                 else
4313                         drm_kms_helper_hotplug_event(dev);
4314 #ifdef CONFIG_PM
4315                 dev->dev->power.disable_depth--;
4316 #endif
4317         }
4318         adev->in_suspend = false;
4319
4320         if (adev->enable_mes)
4321                 amdgpu_mes_self_test(adev);
4322
4323         if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
4324                 DRM_WARN("smart shift update failed\n");
4325
4326         return 0;
4327 }
4328
4329 /**
4330  * amdgpu_device_ip_check_soft_reset - did soft reset succeed
4331  *
4332  * @adev: amdgpu_device pointer
4333  *
4334  * The list of all the hardware IPs that make up the asic is walked and
4335  * the check_soft_reset callbacks are run.  check_soft_reset determines
4336  * if the asic is still hung or not.
4337  * Returns true if any of the IPs are still in a hung state, false if not.
4338  */
4339 static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
4340 {
4341         int i;
4342         bool asic_hang = false;
4343
4344         if (amdgpu_sriov_vf(adev))
4345                 return true;
4346
4347         if (amdgpu_asic_need_full_reset(adev))
4348                 return true;
4349
4350         for (i = 0; i < adev->num_ip_blocks; i++) {
4351                 if (!adev->ip_blocks[i].status.valid)
4352                         continue;
4353                 if (adev->ip_blocks[i].version->funcs->check_soft_reset)
4354                         adev->ip_blocks[i].status.hang =
4355                                 adev->ip_blocks[i].version->funcs->check_soft_reset(adev);
4356                 if (adev->ip_blocks[i].status.hang) {
4357                         dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
4358                         asic_hang = true;
4359                 }
4360         }
4361         return asic_hang;
4362 }
4363
4364 /**
4365  * amdgpu_device_ip_pre_soft_reset - prepare for soft reset
4366  *
4367  * @adev: amdgpu_device pointer
4368  *
4369  * The list of all the hardware IPs that make up the asic is walked and the
4370  * pre_soft_reset callbacks are run if the block is hung.  pre_soft_reset
4371  * handles any IP specific hardware or software state changes that are
4372  * necessary for a soft reset to succeed.
4373  * Returns 0 on success, negative error code on failure.
4374  */
4375 static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
4376 {
4377         int i, r = 0;
4378
4379         for (i = 0; i < adev->num_ip_blocks; i++) {
4380                 if (!adev->ip_blocks[i].status.valid)
4381                         continue;
4382                 if (adev->ip_blocks[i].status.hang &&
4383                     adev->ip_blocks[i].version->funcs->pre_soft_reset) {
4384                         r = adev->ip_blocks[i].version->funcs->pre_soft_reset(adev);
4385                         if (r)
4386                                 return r;
4387                 }
4388         }
4389
4390         return 0;
4391 }
4392
4393 /**
4394  * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
4395  *
4396  * @adev: amdgpu_device pointer
4397  *
4398  * Some hardware IPs cannot be soft reset.  If they are hung, a full gpu
4399  * reset is necessary to recover.
4400  * Returns true if a full asic reset is required, false if not.
4401  */
4402 static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
4403 {
4404         int i;
4405
4406         if (amdgpu_asic_need_full_reset(adev))
4407                 return true;
4408
4409         for (i = 0; i < adev->num_ip_blocks; i++) {
4410                 if (!adev->ip_blocks[i].status.valid)
4411                         continue;
4412                 if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) ||
4413                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) ||
4414                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) ||
4415                     (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) ||
4416                      adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
4417                         if (adev->ip_blocks[i].status.hang) {
4418                                 dev_info(adev->dev, "Some block need full reset!\n");
4419                                 return true;
4420                         }
4421                 }
4422         }
4423         return false;
4424 }
4425
4426 /**
4427  * amdgpu_device_ip_soft_reset - do a soft reset
4428  *
4429  * @adev: amdgpu_device pointer
4430  *
4431  * The list of all the hardware IPs that make up the asic is walked and the
4432  * soft_reset callbacks are run if the block is hung.  soft_reset handles any
4433  * IP specific hardware or software state changes that are necessary to soft
4434  * reset the IP.
4435  * Returns 0 on success, negative error code on failure.
4436  */
4437 static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
4438 {
4439         int i, r = 0;
4440
4441         for (i = 0; i < adev->num_ip_blocks; i++) {
4442                 if (!adev->ip_blocks[i].status.valid)
4443                         continue;
4444                 if (adev->ip_blocks[i].status.hang &&
4445                     adev->ip_blocks[i].version->funcs->soft_reset) {
4446                         r = adev->ip_blocks[i].version->funcs->soft_reset(adev);
4447                         if (r)
4448                                 return r;
4449                 }
4450         }
4451
4452         return 0;
4453 }
4454
4455 /**
4456  * amdgpu_device_ip_post_soft_reset - clean up from soft reset
4457  *
4458  * @adev: amdgpu_device pointer
4459  *
4460  * The list of all the hardware IPs that make up the asic is walked and the
4461  * post_soft_reset callbacks are run if the asic was hung.  post_soft_reset
4462  * handles any IP specific hardware or software state changes that are
4463  * necessary after the IP has been soft reset.
4464  * Returns 0 on success, negative error code on failure.
4465  */
4466 static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
4467 {
4468         int i, r = 0;
4469
4470         for (i = 0; i < adev->num_ip_blocks; i++) {
4471                 if (!adev->ip_blocks[i].status.valid)
4472                         continue;
4473                 if (adev->ip_blocks[i].status.hang &&
4474                     adev->ip_blocks[i].version->funcs->post_soft_reset)
4475                         r = adev->ip_blocks[i].version->funcs->post_soft_reset(adev);
4476                 if (r)
4477                         return r;
4478         }
4479
4480         return 0;
4481 }
4482
4483 /**
4484  * amdgpu_device_recover_vram - Recover some VRAM contents
4485  *
4486  * @adev: amdgpu_device pointer
4487  *
4488  * Restores the contents of VRAM buffers from the shadows in GTT.  Used to
4489  * restore things like GPUVM page tables after a GPU reset where
4490  * the contents of VRAM might be lost.
4491  *
4492  * Returns:
4493  * 0 on success, negative error code on failure.
4494  */
4495 static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
4496 {
4497         struct dma_fence *fence = NULL, *next = NULL;
4498         struct amdgpu_bo *shadow;
4499         struct amdgpu_bo_vm *vmbo;
4500         long r = 1, tmo;
4501
4502         if (amdgpu_sriov_runtime(adev))
4503                 tmo = msecs_to_jiffies(8000);
4504         else
4505                 tmo = msecs_to_jiffies(100);
4506
4507         dev_info(adev->dev, "recover vram bo from shadow start\n");
4508         mutex_lock(&adev->shadow_list_lock);
4509         list_for_each_entry(vmbo, &adev->shadow_list, shadow_list) {
4510                 shadow = &vmbo->bo;
4511                 /* No need to recover an evicted BO */
4512                 if (shadow->tbo.resource->mem_type != TTM_PL_TT ||
4513                     shadow->tbo.resource->start == AMDGPU_BO_INVALID_OFFSET ||
4514                     shadow->parent->tbo.resource->mem_type != TTM_PL_VRAM)
4515                         continue;
4516
4517                 r = amdgpu_bo_restore_shadow(shadow, &next);
4518                 if (r)
4519                         break;
4520
4521                 if (fence) {
4522                         tmo = dma_fence_wait_timeout(fence, false, tmo);
4523                         dma_fence_put(fence);
4524                         fence = next;
4525                         if (tmo == 0) {
4526                                 r = -ETIMEDOUT;
4527                                 break;
4528                         } else if (tmo < 0) {
4529                                 r = tmo;
4530                                 break;
4531                         }
4532                 } else {
4533                         fence = next;
4534                 }
4535         }
4536         mutex_unlock(&adev->shadow_list_lock);
4537
4538         if (fence)
4539                 tmo = dma_fence_wait_timeout(fence, false, tmo);
4540         dma_fence_put(fence);
4541
4542         if (r < 0 || tmo <= 0) {
4543                 dev_err(adev->dev, "recover vram bo from shadow failed, r is %ld, tmo is %ld\n", r, tmo);
4544                 return -EIO;
4545         }
4546
4547         dev_info(adev->dev, "recover vram bo from shadow done\n");
4548         return 0;
4549 }
4550
4551
4552 /**
4553  * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
4554  *
4555  * @adev: amdgpu_device pointer
4556  * @from_hypervisor: request from hypervisor
4557  *
4558  * do VF FLR and reinitialize Asic
4559  * return 0 means succeeded otherwise failed
4560  */
4561 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
4562                                      bool from_hypervisor)
4563 {
4564         int r;
4565         struct amdgpu_hive_info *hive = NULL;
4566         int retry_limit = 0;
4567
4568 retry:
4569         amdgpu_amdkfd_pre_reset(adev);
4570
4571         if (from_hypervisor)
4572                 r = amdgpu_virt_request_full_gpu(adev, true);
4573         else
4574                 r = amdgpu_virt_reset_gpu(adev);
4575         if (r)
4576                 return r;
4577
4578         /* Resume IP prior to SMC */
4579         r = amdgpu_device_ip_reinit_early_sriov(adev);
4580         if (r)
4581                 goto error;
4582
4583         amdgpu_virt_init_data_exchange(adev);
4584
4585         r = amdgpu_device_fw_loading(adev);
4586         if (r)
4587                 return r;
4588
4589         /* now we are okay to resume SMC/CP/SDMA */
4590         r = amdgpu_device_ip_reinit_late_sriov(adev);
4591         if (r)
4592                 goto error;
4593
4594         hive = amdgpu_get_xgmi_hive(adev);
4595         /* Update PSP FW topology after reset */
4596         if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
4597                 r = amdgpu_xgmi_update_topology(hive, adev);
4598
4599         if (hive)
4600                 amdgpu_put_xgmi_hive(hive);
4601
4602         if (!r) {
4603                 amdgpu_irq_gpu_reset_resume_helper(adev);
4604                 r = amdgpu_ib_ring_tests(adev);
4605
4606                 amdgpu_amdkfd_post_reset(adev);
4607         }
4608
4609 error:
4610         if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
4611                 amdgpu_inc_vram_lost(adev);
4612                 r = amdgpu_device_recover_vram(adev);
4613         }
4614         amdgpu_virt_release_full_gpu(adev, true);
4615
4616         if (AMDGPU_RETRY_SRIOV_RESET(r)) {
4617                 if (retry_limit < AMDGPU_MAX_RETRY_LIMIT) {
4618                         retry_limit++;
4619                         goto retry;
4620                 } else
4621                         DRM_ERROR("GPU reset retry is beyond the retry limit\n");
4622         }
4623
4624         return r;
4625 }
4626
4627 /**
4628  * amdgpu_device_has_job_running - check if there is any job in mirror list
4629  *
4630  * @adev: amdgpu_device pointer
4631  *
4632  * check if there is any job in mirror list
4633  */
4634 bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
4635 {
4636         int i;
4637         struct drm_sched_job *job;
4638
4639         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4640                 struct amdgpu_ring *ring = adev->rings[i];
4641
4642                 if (!ring || !ring->sched.thread)
4643                         continue;
4644
4645                 spin_lock(&ring->sched.job_list_lock);
4646                 job = list_first_entry_or_null(&ring->sched.pending_list,
4647                                                struct drm_sched_job, list);
4648                 spin_unlock(&ring->sched.job_list_lock);
4649                 if (job)
4650                         return true;
4651         }
4652         return false;
4653 }
4654
4655 /**
4656  * amdgpu_device_should_recover_gpu - check if we should try GPU recovery
4657  *
4658  * @adev: amdgpu_device pointer
4659  *
4660  * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
4661  * a hung GPU.
4662  */
4663 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
4664 {
4665
4666         if (amdgpu_gpu_recovery == 0)
4667                 goto disabled;
4668
4669         /* Skip soft reset check in fatal error mode */
4670         if (!amdgpu_ras_is_poison_mode_supported(adev))
4671                 return true;
4672
4673         if (amdgpu_sriov_vf(adev))
4674                 return true;
4675
4676         if (amdgpu_gpu_recovery == -1) {
4677                 switch (adev->asic_type) {
4678 #ifdef CONFIG_DRM_AMDGPU_SI
4679                 case CHIP_VERDE:
4680                 case CHIP_TAHITI:
4681                 case CHIP_PITCAIRN:
4682                 case CHIP_OLAND:
4683                 case CHIP_HAINAN:
4684 #endif
4685 #ifdef CONFIG_DRM_AMDGPU_CIK
4686                 case CHIP_KAVERI:
4687                 case CHIP_KABINI:
4688                 case CHIP_MULLINS:
4689 #endif
4690                 case CHIP_CARRIZO:
4691                 case CHIP_STONEY:
4692                 case CHIP_CYAN_SKILLFISH:
4693                         goto disabled;
4694                 default:
4695                         break;
4696                 }
4697         }
4698
4699         return true;
4700
4701 disabled:
4702                 dev_info(adev->dev, "GPU recovery disabled.\n");
4703                 return false;
4704 }
4705
4706 int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
4707 {
4708         u32 i;
4709         int ret = 0;
4710
4711         amdgpu_atombios_scratch_regs_engine_hung(adev, true);
4712
4713         dev_info(adev->dev, "GPU mode1 reset\n");
4714
4715         /* disable BM */
4716         pci_clear_master(adev->pdev);
4717
4718         amdgpu_device_cache_pci_state(adev->pdev);
4719
4720         if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
4721                 dev_info(adev->dev, "GPU smu mode1 reset\n");
4722                 ret = amdgpu_dpm_mode1_reset(adev);
4723         } else {
4724                 dev_info(adev->dev, "GPU psp mode1 reset\n");
4725                 ret = psp_gpu_reset(adev);
4726         }
4727
4728         if (ret)
4729                 dev_err(adev->dev, "GPU mode1 reset failed\n");
4730
4731         amdgpu_device_load_pci_state(adev->pdev);
4732
4733         /* wait for asic to come out of reset */
4734         for (i = 0; i < adev->usec_timeout; i++) {
4735                 u32 memsize = adev->nbio.funcs->get_memsize(adev);
4736
4737                 if (memsize != 0xffffffff)
4738                         break;
4739                 udelay(1);
4740         }
4741
4742         amdgpu_atombios_scratch_regs_engine_hung(adev, false);
4743         return ret;
4744 }
4745
4746 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
4747                                  struct amdgpu_reset_context *reset_context)
4748 {
4749         int i, r = 0;
4750         struct amdgpu_job *job = NULL;
4751         bool need_full_reset =
4752                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4753
4754         if (reset_context->reset_req_dev == adev)
4755                 job = reset_context->job;
4756
4757         if (amdgpu_sriov_vf(adev)) {
4758                 /* stop the data exchange thread */
4759                 amdgpu_virt_fini_data_exchange(adev);
4760         }
4761
4762         amdgpu_fence_driver_isr_toggle(adev, true);
4763
4764         /* block all schedulers and reset given job's ring */
4765         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
4766                 struct amdgpu_ring *ring = adev->rings[i];
4767
4768                 if (!ring || !ring->sched.thread)
4769                         continue;
4770
4771                 /*clear job fence from fence drv to avoid force_completion
4772                  *leave NULL and vm flush fence in fence drv */
4773                 amdgpu_fence_driver_clear_job_fences(ring);
4774
4775                 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
4776                 amdgpu_fence_driver_force_completion(ring);
4777         }
4778
4779         amdgpu_fence_driver_isr_toggle(adev, false);
4780
4781         if (job && job->vm)
4782                 drm_sched_increase_karma(&job->base);
4783
4784         r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
4785         /* If reset handler not implemented, continue; otherwise return */
4786         if (r == -ENOSYS)
4787                 r = 0;
4788         else
4789                 return r;
4790
4791         /* Don't suspend on bare metal if we are not going to HW reset the ASIC */
4792         if (!amdgpu_sriov_vf(adev)) {
4793
4794                 if (!need_full_reset)
4795                         need_full_reset = amdgpu_device_ip_need_full_reset(adev);
4796
4797                 if (!need_full_reset && amdgpu_gpu_recovery &&
4798                     amdgpu_device_ip_check_soft_reset(adev)) {
4799                         amdgpu_device_ip_pre_soft_reset(adev);
4800                         r = amdgpu_device_ip_soft_reset(adev);
4801                         amdgpu_device_ip_post_soft_reset(adev);
4802                         if (r || amdgpu_device_ip_check_soft_reset(adev)) {
4803                                 dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
4804                                 need_full_reset = true;
4805                         }
4806                 }
4807
4808                 if (need_full_reset)
4809                         r = amdgpu_device_ip_suspend(adev);
4810                 if (need_full_reset)
4811                         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4812                 else
4813                         clear_bit(AMDGPU_NEED_FULL_RESET,
4814                                   &reset_context->flags);
4815         }
4816
4817         return r;
4818 }
4819
4820 static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
4821 {
4822         int i;
4823
4824         lockdep_assert_held(&adev->reset_domain->sem);
4825
4826         for (i = 0; i < adev->num_regs; i++) {
4827                 adev->reset_dump_reg_value[i] = RREG32(adev->reset_dump_reg_list[i]);
4828                 trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i],
4829                                              adev->reset_dump_reg_value[i]);
4830         }
4831
4832         return 0;
4833 }
4834
4835 #ifdef CONFIG_DEV_COREDUMP
4836 static ssize_t amdgpu_devcoredump_read(char *buffer, loff_t offset,
4837                 size_t count, void *data, size_t datalen)
4838 {
4839         struct drm_printer p;
4840         struct amdgpu_device *adev = data;
4841         struct drm_print_iterator iter;
4842         int i;
4843
4844         iter.data = buffer;
4845         iter.offset = 0;
4846         iter.start = offset;
4847         iter.remain = count;
4848
4849         p = drm_coredump_printer(&iter);
4850
4851         drm_printf(&p, "**** AMDGPU Device Coredump ****\n");
4852         drm_printf(&p, "kernel: " UTS_RELEASE "\n");
4853         drm_printf(&p, "module: " KBUILD_MODNAME "\n");
4854         drm_printf(&p, "time: %lld.%09ld\n", adev->reset_time.tv_sec, adev->reset_time.tv_nsec);
4855         if (adev->reset_task_info.pid)
4856                 drm_printf(&p, "process_name: %s PID: %d\n",
4857                            adev->reset_task_info.process_name,
4858                            adev->reset_task_info.pid);
4859
4860         if (adev->reset_vram_lost)
4861                 drm_printf(&p, "VRAM is lost due to GPU reset!\n");
4862         if (adev->num_regs) {
4863                 drm_printf(&p, "AMDGPU register dumps:\nOffset:     Value:\n");
4864
4865                 for (i = 0; i < adev->num_regs; i++)
4866                         drm_printf(&p, "0x%08x: 0x%08x\n",
4867                                    adev->reset_dump_reg_list[i],
4868                                    adev->reset_dump_reg_value[i]);
4869         }
4870
4871         return count - iter.remain;
4872 }
4873
4874 static void amdgpu_devcoredump_free(void *data)
4875 {
4876 }
4877
4878 static void amdgpu_reset_capture_coredumpm(struct amdgpu_device *adev)
4879 {
4880         struct drm_device *dev = adev_to_drm(adev);
4881
4882         ktime_get_ts64(&adev->reset_time);
4883         dev_coredumpm(dev->dev, THIS_MODULE, adev, 0, GFP_KERNEL,
4884                       amdgpu_devcoredump_read, amdgpu_devcoredump_free);
4885 }
4886 #endif
4887
4888 int amdgpu_do_asic_reset(struct list_head *device_list_handle,
4889                          struct amdgpu_reset_context *reset_context)
4890 {
4891         struct amdgpu_device *tmp_adev = NULL;
4892         bool need_full_reset, skip_hw_reset, vram_lost = false;
4893         int r = 0;
4894         bool gpu_reset_for_dev_remove = 0;
4895
4896         /* Try reset handler method first */
4897         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
4898                                     reset_list);
4899         amdgpu_reset_reg_dumps(tmp_adev);
4900
4901         reset_context->reset_device_list = device_list_handle;
4902         r = amdgpu_reset_perform_reset(tmp_adev, reset_context);
4903         /* If reset handler not implemented, continue; otherwise return */
4904         if (r == -ENOSYS)
4905                 r = 0;
4906         else
4907                 return r;
4908
4909         /* Reset handler not implemented, use the default method */
4910         need_full_reset =
4911                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4912         skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
4913
4914         gpu_reset_for_dev_remove =
4915                 test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
4916                         test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
4917
4918         /*
4919          * ASIC reset has to be done on all XGMI hive nodes ASAP
4920          * to allow proper links negotiation in FW (within 1 sec)
4921          */
4922         if (!skip_hw_reset && need_full_reset) {
4923                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4924                         /* For XGMI run all resets in parallel to speed up the process */
4925                         if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4926                                 tmp_adev->gmc.xgmi.pending_reset = false;
4927                                 if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
4928                                         r = -EALREADY;
4929                         } else
4930                                 r = amdgpu_asic_reset(tmp_adev);
4931
4932                         if (r) {
4933                                 dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
4934                                          r, adev_to_drm(tmp_adev)->unique);
4935                                 break;
4936                         }
4937                 }
4938
4939                 /* For XGMI wait for all resets to complete before proceed */
4940                 if (!r) {
4941                         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4942                                 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
4943                                         flush_work(&tmp_adev->xgmi_reset_work);
4944                                         r = tmp_adev->asic_reset_res;
4945                                         if (r)
4946                                                 break;
4947                                 }
4948                         }
4949                 }
4950         }
4951
4952         if (!r && amdgpu_ras_intr_triggered()) {
4953                 list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4954                         if (tmp_adev->mmhub.ras && tmp_adev->mmhub.ras->ras_block.hw_ops &&
4955                             tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count)
4956                                 tmp_adev->mmhub.ras->ras_block.hw_ops->reset_ras_error_count(tmp_adev);
4957                 }
4958
4959                 amdgpu_ras_intr_cleared();
4960         }
4961
4962         /* Since the mode1 reset affects base ip blocks, the
4963          * phase1 ip blocks need to be resumed. Otherwise there
4964          * will be a BIOS signature error and the psp bootloader
4965          * can't load kdb on the next amdgpu install.
4966          */
4967         if (gpu_reset_for_dev_remove) {
4968                 list_for_each_entry(tmp_adev, device_list_handle, reset_list)
4969                         amdgpu_device_ip_resume_phase1(tmp_adev);
4970
4971                 goto end;
4972         }
4973
4974         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
4975                 if (need_full_reset) {
4976                         /* post card */
4977                         r = amdgpu_device_asic_init(tmp_adev);
4978                         if (r) {
4979                                 dev_warn(tmp_adev->dev, "asic atom init failed!");
4980                         } else {
4981                                 dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
4982                                 r = amdgpu_amdkfd_resume_iommu(tmp_adev);
4983                                 if (r)
4984                                         goto out;
4985
4986                                 r = amdgpu_device_ip_resume_phase1(tmp_adev);
4987                                 if (r)
4988                                         goto out;
4989
4990                                 vram_lost = amdgpu_device_check_vram_lost(tmp_adev);
4991 #ifdef CONFIG_DEV_COREDUMP
4992                                 tmp_adev->reset_vram_lost = vram_lost;
4993                                 memset(&tmp_adev->reset_task_info, 0,
4994                                                 sizeof(tmp_adev->reset_task_info));
4995                                 if (reset_context->job && reset_context->job->vm)
4996                                         tmp_adev->reset_task_info =
4997                                                 reset_context->job->vm->task_info;
4998                                 amdgpu_reset_capture_coredumpm(tmp_adev);
4999 #endif
5000                                 if (vram_lost) {
5001                                         DRM_INFO("VRAM is lost due to GPU reset!\n");
5002                                         amdgpu_inc_vram_lost(tmp_adev);
5003                                 }
5004
5005                                 r = amdgpu_device_fw_loading(tmp_adev);
5006                                 if (r)
5007                                         return r;
5008
5009                                 r = amdgpu_device_ip_resume_phase2(tmp_adev);
5010                                 if (r)
5011                                         goto out;
5012
5013                                 if (vram_lost)
5014                                         amdgpu_device_fill_reset_magic(tmp_adev);
5015
5016                                 /*
5017                                  * Add this ASIC as tracked as reset was already
5018                                  * complete successfully.
5019                                  */
5020                                 amdgpu_register_gpu_instance(tmp_adev);
5021
5022                                 if (!reset_context->hive &&
5023                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5024                                         amdgpu_xgmi_add_device(tmp_adev);
5025
5026                                 r = amdgpu_device_ip_late_init(tmp_adev);
5027                                 if (r)
5028                                         goto out;
5029
5030                                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, false);
5031
5032                                 /*
5033                                  * The GPU enters bad state once faulty pages
5034                                  * by ECC has reached the threshold, and ras
5035                                  * recovery is scheduled next. So add one check
5036                                  * here to break recovery if it indeed exceeds
5037                                  * bad page threshold, and remind user to
5038                                  * retire this GPU or setting one bigger
5039                                  * bad_page_threshold value to fix this once
5040                                  * probing driver again.
5041                                  */
5042                                 if (!amdgpu_ras_eeprom_check_err_threshold(tmp_adev)) {
5043                                         /* must succeed. */
5044                                         amdgpu_ras_resume(tmp_adev);
5045                                 } else {
5046                                         r = -EINVAL;
5047                                         goto out;
5048                                 }
5049
5050                                 /* Update PSP FW topology after reset */
5051                                 if (reset_context->hive &&
5052                                     tmp_adev->gmc.xgmi.num_physical_nodes > 1)
5053                                         r = amdgpu_xgmi_update_topology(
5054                                                 reset_context->hive, tmp_adev);
5055                         }
5056                 }
5057
5058 out:
5059                 if (!r) {
5060                         amdgpu_irq_gpu_reset_resume_helper(tmp_adev);
5061                         r = amdgpu_ib_ring_tests(tmp_adev);
5062                         if (r) {
5063                                 dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5064                                 need_full_reset = true;
5065                                 r = -EAGAIN;
5066                                 goto end;
5067                         }
5068                 }
5069
5070                 if (!r)
5071                         r = amdgpu_device_recover_vram(tmp_adev);
5072                 else
5073                         tmp_adev->asic_reset_res = r;
5074         }
5075
5076 end:
5077         if (need_full_reset)
5078                 set_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5079         else
5080                 clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5081         return r;
5082 }
5083
5084 static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5085 {
5086
5087         switch (amdgpu_asic_reset_method(adev)) {
5088         case AMD_RESET_METHOD_MODE1:
5089                 adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5090                 break;
5091         case AMD_RESET_METHOD_MODE2:
5092                 adev->mp1_state = PP_MP1_STATE_RESET;
5093                 break;
5094         default:
5095                 adev->mp1_state = PP_MP1_STATE_NONE;
5096                 break;
5097         }
5098 }
5099
5100 static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5101 {
5102         amdgpu_vf_error_trans_all(adev);
5103         adev->mp1_state = PP_MP1_STATE_NONE;
5104 }
5105
5106 static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5107 {
5108         struct pci_dev *p = NULL;
5109
5110         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5111                         adev->pdev->bus->number, 1);
5112         if (p) {
5113                 pm_runtime_enable(&(p->dev));
5114                 pm_runtime_resume(&(p->dev));
5115         }
5116
5117         pci_dev_put(p);
5118 }
5119
5120 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5121 {
5122         enum amd_reset_method reset_method;
5123         struct pci_dev *p = NULL;
5124         u64 expires;
5125
5126         /*
5127          * For now, only BACO and mode1 reset are confirmed
5128          * to suffer the audio issue without proper suspended.
5129          */
5130         reset_method = amdgpu_asic_reset_method(adev);
5131         if ((reset_method != AMD_RESET_METHOD_BACO) &&
5132              (reset_method != AMD_RESET_METHOD_MODE1))
5133                 return -EINVAL;
5134
5135         p = pci_get_domain_bus_and_slot(pci_domain_nr(adev->pdev->bus),
5136                         adev->pdev->bus->number, 1);
5137         if (!p)
5138                 return -ENODEV;
5139
5140         expires = pm_runtime_autosuspend_expiration(&(p->dev));
5141         if (!expires)
5142                 /*
5143                  * If we cannot get the audio device autosuspend delay,
5144                  * a fixed 4S interval will be used. Considering 3S is
5145                  * the audio controller default autosuspend delay setting.
5146                  * 4S used here is guaranteed to cover that.
5147                  */
5148                 expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL;
5149
5150         while (!pm_runtime_status_suspended(&(p->dev))) {
5151                 if (!pm_runtime_suspend(&(p->dev)))
5152                         break;
5153
5154                 if (expires < ktime_get_mono_fast_ns()) {
5155                         dev_warn(adev->dev, "failed to suspend display audio\n");
5156                         pci_dev_put(p);
5157                         /* TODO: abort the succeeding gpu reset? */
5158                         return -ETIMEDOUT;
5159                 }
5160         }
5161
5162         pm_runtime_disable(&(p->dev));
5163
5164         pci_dev_put(p);
5165         return 0;
5166 }
5167
5168 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5169 {
5170         struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5171
5172 #if defined(CONFIG_DEBUG_FS)
5173         if (!amdgpu_sriov_vf(adev))
5174                 cancel_work(&adev->reset_work);
5175 #endif
5176
5177         if (adev->kfd.dev)
5178                 cancel_work(&adev->kfd.reset_work);
5179
5180         if (amdgpu_sriov_vf(adev))
5181                 cancel_work(&adev->virt.flr_work);
5182
5183         if (con && adev->ras_enabled)
5184                 cancel_work(&con->recovery_work);
5185
5186 }
5187
5188 /**
5189  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
5190  *
5191  * @adev: amdgpu_device pointer
5192  * @job: which job trigger hang
5193  * @reset_context: amdgpu reset context pointer
5194  *
5195  * Attempt to reset the GPU if it has hung (all asics).
5196  * Attempt to do soft-reset or full-reset and reinitialize Asic
5197  * Returns 0 for success or an error on failure.
5198  */
5199
5200 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
5201                               struct amdgpu_job *job,
5202                               struct amdgpu_reset_context *reset_context)
5203 {
5204         struct list_head device_list, *device_list_handle =  NULL;
5205         bool job_signaled = false;
5206         struct amdgpu_hive_info *hive = NULL;
5207         struct amdgpu_device *tmp_adev = NULL;
5208         int i, r = 0;
5209         bool need_emergency_restart = false;
5210         bool audio_suspended = false;
5211         bool gpu_reset_for_dev_remove = false;
5212
5213         gpu_reset_for_dev_remove =
5214                         test_bit(AMDGPU_RESET_FOR_DEVICE_REMOVE, &reset_context->flags) &&
5215                                 test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5216
5217         /*
5218          * Special case: RAS triggered and full reset isn't supported
5219          */
5220         need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
5221
5222         /*
5223          * Flush RAM to disk so that after reboot
5224          * the user can read log and see why the system rebooted.
5225          */
5226         if (need_emergency_restart && amdgpu_ras_get_context(adev)->reboot) {
5227                 DRM_WARN("Emergency reboot.");
5228
5229                 ksys_sync_helper();
5230                 emergency_restart();
5231         }
5232
5233         dev_info(adev->dev, "GPU %s begin!\n",
5234                 need_emergency_restart ? "jobs stop":"reset");
5235
5236         if (!amdgpu_sriov_vf(adev))
5237                 hive = amdgpu_get_xgmi_hive(adev);
5238         if (hive)
5239                 mutex_lock(&hive->hive_lock);
5240
5241         reset_context->job = job;
5242         reset_context->hive = hive;
5243         /*
5244          * Build list of devices to reset.
5245          * In case we are in XGMI hive mode, resort the device list
5246          * to put adev in the 1st position.
5247          */
5248         INIT_LIST_HEAD(&device_list);
5249         if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1)) {
5250                 list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
5251                         list_add_tail(&tmp_adev->reset_list, &device_list);
5252                         if (gpu_reset_for_dev_remove && adev->shutdown)
5253                                 tmp_adev->shutdown = true;
5254                 }
5255                 if (!list_is_first(&adev->reset_list, &device_list))
5256                         list_rotate_to_front(&adev->reset_list, &device_list);
5257                 device_list_handle = &device_list;
5258         } else {
5259                 list_add_tail(&adev->reset_list, &device_list);
5260                 device_list_handle = &device_list;
5261         }
5262
5263         /* We need to lock reset domain only once both for XGMI and single device */
5264         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5265                                     reset_list);
5266         amdgpu_device_lock_reset_domain(tmp_adev->reset_domain);
5267
5268         /* block all schedulers and reset given job's ring */
5269         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5270
5271                 amdgpu_device_set_mp1_state(tmp_adev);
5272
5273                 /*
5274                  * Try to put the audio codec into suspend state
5275                  * before gpu reset started.
5276                  *
5277                  * Due to the power domain of the graphics device
5278                  * is shared with AZ power domain. Without this,
5279                  * we may change the audio hardware from behind
5280                  * the audio driver's back. That will trigger
5281                  * some audio codec errors.
5282                  */
5283                 if (!amdgpu_device_suspend_display_audio(tmp_adev))
5284                         audio_suspended = true;
5285
5286                 amdgpu_ras_set_error_query_ready(tmp_adev, false);
5287
5288                 cancel_delayed_work_sync(&tmp_adev->delayed_init_work);
5289
5290                 if (!amdgpu_sriov_vf(tmp_adev))
5291                         amdgpu_amdkfd_pre_reset(tmp_adev);
5292
5293                 /*
5294                  * Mark these ASICs to be reseted as untracked first
5295                  * And add them back after reset completed
5296                  */
5297                 amdgpu_unregister_gpu_instance(tmp_adev);
5298
5299                 drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
5300
5301                 /* disable ras on ALL IPs */
5302                 if (!need_emergency_restart &&
5303                       amdgpu_device_ip_need_full_reset(tmp_adev))
5304                         amdgpu_ras_suspend(tmp_adev);
5305
5306                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5307                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5308
5309                         if (!ring || !ring->sched.thread)
5310                                 continue;
5311
5312                         drm_sched_stop(&ring->sched, job ? &job->base : NULL);
5313
5314                         if (need_emergency_restart)
5315                                 amdgpu_job_stop_all_jobs_on_sched(&ring->sched);
5316                 }
5317                 atomic_inc(&tmp_adev->gpu_reset_counter);
5318         }
5319
5320         if (need_emergency_restart)
5321                 goto skip_sched_resume;
5322
5323         /*
5324          * Must check guilty signal here since after this point all old
5325          * HW fences are force signaled.
5326          *
5327          * job->base holds a reference to parent fence
5328          */
5329         if (job && dma_fence_is_signaled(&job->hw_fence)) {
5330                 job_signaled = true;
5331                 dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
5332                 goto skip_hw_reset;
5333         }
5334
5335 retry:  /* Rest of adevs pre asic reset from XGMI hive. */
5336         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5337                 if (gpu_reset_for_dev_remove) {
5338                         /* Workaroud for ASICs need to disable SMC first */
5339                         amdgpu_device_smu_fini_early(tmp_adev);
5340                 }
5341                 r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
5342                 /*TODO Should we stop ?*/
5343                 if (r) {
5344                         dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
5345                                   r, adev_to_drm(tmp_adev)->unique);
5346                         tmp_adev->asic_reset_res = r;
5347                 }
5348
5349                 /*
5350                  * Drop all pending non scheduler resets. Scheduler resets
5351                  * were already dropped during drm_sched_stop
5352                  */
5353                 amdgpu_device_stop_pending_resets(tmp_adev);
5354         }
5355
5356         /* Actual ASIC resets if needed.*/
5357         /* Host driver will handle XGMI hive reset for SRIOV */
5358         if (amdgpu_sriov_vf(adev)) {
5359                 r = amdgpu_device_reset_sriov(adev, job ? false : true);
5360                 if (r)
5361                         adev->asic_reset_res = r;
5362
5363                 /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */
5364                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
5365                     adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
5366                         amdgpu_ras_resume(adev);
5367         } else {
5368                 r = amdgpu_do_asic_reset(device_list_handle, reset_context);
5369                 if (r && r == -EAGAIN)
5370                         goto retry;
5371
5372                 if (!r && gpu_reset_for_dev_remove)
5373                         goto recover_end;
5374         }
5375
5376 skip_hw_reset:
5377
5378         /* Post ASIC reset for all devs .*/
5379         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5380
5381                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5382                         struct amdgpu_ring *ring = tmp_adev->rings[i];
5383
5384                         if (!ring || !ring->sched.thread)
5385                                 continue;
5386
5387                         drm_sched_start(&ring->sched, true);
5388                 }
5389
5390                 if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
5391                         amdgpu_mes_self_test(tmp_adev);
5392
5393                 if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
5394                         drm_helper_resume_force_mode(adev_to_drm(tmp_adev));
5395                 }
5396
5397                 if (tmp_adev->asic_reset_res)
5398                         r = tmp_adev->asic_reset_res;
5399
5400                 tmp_adev->asic_reset_res = 0;
5401
5402                 if (r) {
5403                         /* bad news, how to tell it to userspace ? */
5404                         dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&tmp_adev->gpu_reset_counter));
5405                         amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
5406                 } else {
5407                         dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
5408                         if (amdgpu_acpi_smart_shift_update(adev_to_drm(tmp_adev), AMDGPU_SS_DEV_D0))
5409                                 DRM_WARN("smart shift update failed\n");
5410                 }
5411         }
5412
5413 skip_sched_resume:
5414         list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5415                 /* unlock kfd: SRIOV would do it separately */
5416                 if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
5417                         amdgpu_amdkfd_post_reset(tmp_adev);
5418
5419                 /* kfd_post_reset will do nothing if kfd device is not initialized,
5420                  * need to bring up kfd here if it's not be initialized before
5421                  */
5422                 if (!adev->kfd.init_complete)
5423                         amdgpu_amdkfd_device_init(adev);
5424
5425                 if (audio_suspended)
5426                         amdgpu_device_resume_display_audio(tmp_adev);
5427
5428                 amdgpu_device_unset_mp1_state(tmp_adev);
5429
5430                 amdgpu_ras_set_error_query_ready(tmp_adev, true);
5431         }
5432
5433 recover_end:
5434         tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5435                                             reset_list);
5436         amdgpu_device_unlock_reset_domain(tmp_adev->reset_domain);
5437
5438         if (hive) {
5439                 mutex_unlock(&hive->hive_lock);
5440                 amdgpu_put_xgmi_hive(hive);
5441         }
5442
5443         if (r)
5444                 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
5445
5446         atomic_set(&adev->reset_domain->reset_res, r);
5447         return r;
5448 }
5449
5450 /**
5451  * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
5452  *
5453  * @adev: amdgpu_device pointer
5454  *
5455  * Fetchs and stores in the driver the PCIE capabilities (gen speed
5456  * and lanes) of the slot the device is in. Handles APUs and
5457  * virtualized environments where PCIE config space may not be available.
5458  */
5459 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
5460 {
5461         struct pci_dev *pdev;
5462         enum pci_bus_speed speed_cap, platform_speed_cap;
5463         enum pcie_link_width platform_link_width;
5464
5465         if (amdgpu_pcie_gen_cap)
5466                 adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
5467
5468         if (amdgpu_pcie_lane_cap)
5469                 adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
5470
5471         /* covers APUs as well */
5472         if (pci_is_root_bus(adev->pdev->bus)) {
5473                 if (adev->pm.pcie_gen_mask == 0)
5474                         adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
5475                 if (adev->pm.pcie_mlw_mask == 0)
5476                         adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
5477                 return;
5478         }
5479
5480         if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
5481                 return;
5482
5483         pcie_bandwidth_available(adev->pdev, NULL,
5484                                  &platform_speed_cap, &platform_link_width);
5485
5486         if (adev->pm.pcie_gen_mask == 0) {
5487                 /* asic caps */
5488                 pdev = adev->pdev;
5489                 speed_cap = pcie_get_speed_cap(pdev);
5490                 if (speed_cap == PCI_SPEED_UNKNOWN) {
5491                         adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5492                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5493                                                   CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5494                 } else {
5495                         if (speed_cap == PCIE_SPEED_32_0GT)
5496                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5497                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5498                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5499                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5500                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
5501                         else if (speed_cap == PCIE_SPEED_16_0GT)
5502                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5503                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5504                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5505                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
5506                         else if (speed_cap == PCIE_SPEED_8_0GT)
5507                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5508                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5509                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
5510                         else if (speed_cap == PCIE_SPEED_5_0GT)
5511                                 adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5512                                                           CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
5513                         else
5514                                 adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
5515                 }
5516                 /* platform caps */
5517                 if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
5518                         adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5519                                                    CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5520                 } else {
5521                         if (platform_speed_cap == PCIE_SPEED_32_0GT)
5522                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5523                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5524                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5525                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 |
5526                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
5527                         else if (platform_speed_cap == PCIE_SPEED_16_0GT)
5528                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5529                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5530                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
5531                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
5532                         else if (platform_speed_cap == PCIE_SPEED_8_0GT)
5533                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5534                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
5535                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
5536                         else if (platform_speed_cap == PCIE_SPEED_5_0GT)
5537                                 adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
5538                                                            CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
5539                         else
5540                                 adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
5541
5542                 }
5543         }
5544         if (adev->pm.pcie_mlw_mask == 0) {
5545                 if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
5546                         adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
5547                 } else {
5548                         switch (platform_link_width) {
5549                         case PCIE_LNK_X32:
5550                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
5551                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5552                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5553                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5554                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5555                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5556                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5557                                 break;
5558                         case PCIE_LNK_X16:
5559                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
5560                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5561                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5562                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5563                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5564                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5565                                 break;
5566                         case PCIE_LNK_X12:
5567                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
5568                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5569                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5570                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5571                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5572                                 break;
5573                         case PCIE_LNK_X8:
5574                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
5575                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5576                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5577                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5578                                 break;
5579                         case PCIE_LNK_X4:
5580                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
5581                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5582                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5583                                 break;
5584                         case PCIE_LNK_X2:
5585                                 adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
5586                                                           CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
5587                                 break;
5588                         case PCIE_LNK_X1:
5589                                 adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
5590                                 break;
5591                         default:
5592                                 break;
5593                         }
5594                 }
5595         }
5596 }
5597
5598 /**
5599  * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
5600  *
5601  * @adev: amdgpu_device pointer
5602  * @peer_adev: amdgpu_device pointer for peer device trying to access @adev
5603  *
5604  * Return true if @peer_adev can access (DMA) @adev through the PCIe
5605  * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
5606  * @peer_adev.
5607  */
5608 bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
5609                                       struct amdgpu_device *peer_adev)
5610 {
5611 #ifdef CONFIG_HSA_AMD_P2P
5612         uint64_t address_mask = peer_adev->dev->dma_mask ?
5613                 ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1);
5614         resource_size_t aper_limit =
5615                 adev->gmc.aper_base + adev->gmc.aper_size - 1;
5616         bool p2p_access =
5617                 !adev->gmc.xgmi.connected_to_cpu &&
5618                 !(pci_p2pdma_distance(adev->pdev, peer_adev->dev, false) < 0);
5619
5620         return pcie_p2p && p2p_access && (adev->gmc.visible_vram_size &&
5621                 adev->gmc.real_vram_size == adev->gmc.visible_vram_size &&
5622                 !(adev->gmc.aper_base & address_mask ||
5623                   aper_limit & address_mask));
5624 #else
5625         return false;
5626 #endif
5627 }
5628
5629 int amdgpu_device_baco_enter(struct drm_device *dev)
5630 {
5631         struct amdgpu_device *adev = drm_to_adev(dev);
5632         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5633
5634         if (!amdgpu_device_supports_baco(dev))
5635                 return -ENOTSUPP;
5636
5637         if (ras && adev->ras_enabled &&
5638             adev->nbio.funcs->enable_doorbell_interrupt)
5639                 adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
5640
5641         return amdgpu_dpm_baco_enter(adev);
5642 }
5643
5644 int amdgpu_device_baco_exit(struct drm_device *dev)
5645 {
5646         struct amdgpu_device *adev = drm_to_adev(dev);
5647         struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
5648         int ret = 0;
5649
5650         if (!amdgpu_device_supports_baco(dev))
5651                 return -ENOTSUPP;
5652
5653         ret = amdgpu_dpm_baco_exit(adev);
5654         if (ret)
5655                 return ret;
5656
5657         if (ras && adev->ras_enabled &&
5658             adev->nbio.funcs->enable_doorbell_interrupt)
5659                 adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
5660
5661         if (amdgpu_passthrough(adev) &&
5662             adev->nbio.funcs->clear_doorbell_interrupt)
5663                 adev->nbio.funcs->clear_doorbell_interrupt(adev);
5664
5665         return 0;
5666 }
5667
5668 /**
5669  * amdgpu_pci_error_detected - Called when a PCI error is detected.
5670  * @pdev: PCI device struct
5671  * @state: PCI channel state
5672  *
5673  * Description: Called when a PCI error is detected.
5674  *
5675  * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
5676  */
5677 pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
5678 {
5679         struct drm_device *dev = pci_get_drvdata(pdev);
5680         struct amdgpu_device *adev = drm_to_adev(dev);
5681         int i;
5682
5683         DRM_INFO("PCI error: detected callback, state(%d)!!\n", state);
5684
5685         if (adev->gmc.xgmi.num_physical_nodes > 1) {
5686                 DRM_WARN("No support for XGMI hive yet...");
5687                 return PCI_ERS_RESULT_DISCONNECT;
5688         }
5689
5690         adev->pci_channel_state = state;
5691
5692         switch (state) {
5693         case pci_channel_io_normal:
5694                 return PCI_ERS_RESULT_CAN_RECOVER;
5695         /* Fatal error, prepare for slot reset */
5696         case pci_channel_io_frozen:
5697                 /*
5698                  * Locking adev->reset_domain->sem will prevent any external access
5699                  * to GPU during PCI error recovery
5700                  */
5701                 amdgpu_device_lock_reset_domain(adev->reset_domain);
5702                 amdgpu_device_set_mp1_state(adev);
5703
5704                 /*
5705                  * Block any work scheduling as we do for regular GPU reset
5706                  * for the duration of the recovery
5707                  */
5708                 for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5709                         struct amdgpu_ring *ring = adev->rings[i];
5710
5711                         if (!ring || !ring->sched.thread)
5712                                 continue;
5713
5714                         drm_sched_stop(&ring->sched, NULL);
5715                 }
5716                 atomic_inc(&adev->gpu_reset_counter);
5717                 return PCI_ERS_RESULT_NEED_RESET;
5718         case pci_channel_io_perm_failure:
5719                 /* Permanent error, prepare for device removal */
5720                 return PCI_ERS_RESULT_DISCONNECT;
5721         }
5722
5723         return PCI_ERS_RESULT_NEED_RESET;
5724 }
5725
5726 /**
5727  * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
5728  * @pdev: pointer to PCI device
5729  */
5730 pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
5731 {
5732
5733         DRM_INFO("PCI error: mmio enabled callback!!\n");
5734
5735         /* TODO - dump whatever for debugging purposes */
5736
5737         /* This called only if amdgpu_pci_error_detected returns
5738          * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
5739          * works, no need to reset slot.
5740          */
5741
5742         return PCI_ERS_RESULT_RECOVERED;
5743 }
5744
5745 /**
5746  * amdgpu_pci_slot_reset - Called when PCI slot has been reset.
5747  * @pdev: PCI device struct
5748  *
5749  * Description: This routine is called by the pci error recovery
5750  * code after the PCI slot has been reset, just before we
5751  * should resume normal operations.
5752  */
5753 pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
5754 {
5755         struct drm_device *dev = pci_get_drvdata(pdev);
5756         struct amdgpu_device *adev = drm_to_adev(dev);
5757         int r, i;
5758         struct amdgpu_reset_context reset_context;
5759         u32 memsize;
5760         struct list_head device_list;
5761
5762         DRM_INFO("PCI error: slot reset callback!!\n");
5763
5764         memset(&reset_context, 0, sizeof(reset_context));
5765
5766         INIT_LIST_HEAD(&device_list);
5767         list_add_tail(&adev->reset_list, &device_list);
5768
5769         /* wait for asic to come out of reset */
5770         msleep(500);
5771
5772         /* Restore PCI confspace */
5773         amdgpu_device_load_pci_state(pdev);
5774
5775         /* confirm  ASIC came out of reset */
5776         for (i = 0; i < adev->usec_timeout; i++) {
5777                 memsize = amdgpu_asic_get_config_memsize(adev);
5778
5779                 if (memsize != 0xffffffff)
5780                         break;
5781                 udelay(1);
5782         }
5783         if (memsize == 0xffffffff) {
5784                 r = -ETIME;
5785                 goto out;
5786         }
5787
5788         reset_context.method = AMD_RESET_METHOD_NONE;
5789         reset_context.reset_req_dev = adev;
5790         set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
5791         set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
5792
5793         adev->no_hw_access = true;
5794         r = amdgpu_device_pre_asic_reset(adev, &reset_context);
5795         adev->no_hw_access = false;
5796         if (r)
5797                 goto out;
5798
5799         r = amdgpu_do_asic_reset(&device_list, &reset_context);
5800
5801 out:
5802         if (!r) {
5803                 if (amdgpu_device_cache_pci_state(adev->pdev))
5804                         pci_restore_state(adev->pdev);
5805
5806                 DRM_INFO("PCIe error recovery succeeded\n");
5807         } else {
5808                 DRM_ERROR("PCIe error recovery failed, err:%d", r);
5809                 amdgpu_device_unset_mp1_state(adev);
5810                 amdgpu_device_unlock_reset_domain(adev->reset_domain);
5811         }
5812
5813         return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
5814 }
5815
5816 /**
5817  * amdgpu_pci_resume() - resume normal ops after PCI reset
5818  * @pdev: pointer to PCI device
5819  *
5820  * Called when the error recovery driver tells us that its
5821  * OK to resume normal operation.
5822  */
5823 void amdgpu_pci_resume(struct pci_dev *pdev)
5824 {
5825         struct drm_device *dev = pci_get_drvdata(pdev);
5826         struct amdgpu_device *adev = drm_to_adev(dev);
5827         int i;
5828
5829
5830         DRM_INFO("PCI error: resume callback!!\n");
5831
5832         /* Only continue execution for the case of pci_channel_io_frozen */
5833         if (adev->pci_channel_state != pci_channel_io_frozen)
5834                 return;
5835
5836         for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
5837                 struct amdgpu_ring *ring = adev->rings[i];
5838
5839                 if (!ring || !ring->sched.thread)
5840                         continue;
5841
5842                 drm_sched_start(&ring->sched, true);
5843         }
5844
5845         amdgpu_device_unset_mp1_state(adev);
5846         amdgpu_device_unlock_reset_domain(adev->reset_domain);
5847 }
5848
5849 bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
5850 {
5851         struct drm_device *dev = pci_get_drvdata(pdev);
5852         struct amdgpu_device *adev = drm_to_adev(dev);
5853         int r;
5854
5855         r = pci_save_state(pdev);
5856         if (!r) {
5857                 kfree(adev->pci_state);
5858
5859                 adev->pci_state = pci_store_saved_state(pdev);
5860
5861                 if (!adev->pci_state) {
5862                         DRM_ERROR("Failed to store PCI saved state");
5863                         return false;
5864                 }
5865         } else {
5866                 DRM_WARN("Failed to save PCI state, err:%d\n", r);
5867                 return false;
5868         }
5869
5870         return true;
5871 }
5872
5873 bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
5874 {
5875         struct drm_device *dev = pci_get_drvdata(pdev);
5876         struct amdgpu_device *adev = drm_to_adev(dev);
5877         int r;
5878
5879         if (!adev->pci_state)
5880                 return false;
5881
5882         r = pci_load_saved_state(pdev, adev->pci_state);
5883
5884         if (!r) {
5885                 pci_restore_state(pdev);
5886         } else {
5887                 DRM_WARN("Failed to load PCI state, err:%d\n", r);
5888                 return false;
5889         }
5890
5891         return true;
5892 }
5893
5894 void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
5895                 struct amdgpu_ring *ring)
5896 {
5897 #ifdef CONFIG_X86_64
5898         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5899                 return;
5900 #endif
5901         if (adev->gmc.xgmi.connected_to_cpu)
5902                 return;
5903
5904         if (ring && ring->funcs->emit_hdp_flush)
5905                 amdgpu_ring_emit_hdp_flush(ring);
5906         else
5907                 amdgpu_asic_flush_hdp(adev, ring);
5908 }
5909
5910 void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
5911                 struct amdgpu_ring *ring)
5912 {
5913 #ifdef CONFIG_X86_64
5914         if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
5915                 return;
5916 #endif
5917         if (adev->gmc.xgmi.connected_to_cpu)
5918                 return;
5919
5920         amdgpu_asic_invalidate_hdp(adev, ring);
5921 }
5922
5923 int amdgpu_in_reset(struct amdgpu_device *adev)
5924 {
5925         return atomic_read(&adev->reset_domain->in_gpu_reset);
5926 }
5927
5928 /**
5929  * amdgpu_device_halt() - bring hardware to some kind of halt state
5930  *
5931  * @adev: amdgpu_device pointer
5932  *
5933  * Bring hardware to some kind of halt state so that no one can touch it
5934  * any more. It will help to maintain error context when error occurred.
5935  * Compare to a simple hang, the system will keep stable at least for SSH
5936  * access. Then it should be trivial to inspect the hardware state and
5937  * see what's going on. Implemented as following:
5938  *
5939  * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
5940  *    clears all CPU mappings to device, disallows remappings through page faults
5941  * 2. amdgpu_irq_disable_all() disables all interrupts
5942  * 3. amdgpu_fence_driver_hw_fini() signals all HW fences
5943  * 4. set adev->no_hw_access to avoid potential crashes after setp 5
5944  * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
5945  * 6. pci_disable_device() and pci_wait_for_pending_transaction()
5946  *    flush any in flight DMA operations
5947  */
5948 void amdgpu_device_halt(struct amdgpu_device *adev)
5949 {
5950         struct pci_dev *pdev = adev->pdev;
5951         struct drm_device *ddev = adev_to_drm(adev);
5952
5953         drm_dev_unplug(ddev);
5954
5955         amdgpu_irq_disable_all(adev);
5956
5957         amdgpu_fence_driver_hw_fini(adev);
5958
5959         adev->no_hw_access = true;
5960
5961         amdgpu_device_unmap_mmio(adev);
5962
5963         pci_disable_device(pdev);
5964         pci_wait_for_pending_transaction(pdev);
5965 }
5966
5967 u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
5968                                 u32 reg)
5969 {
5970         unsigned long flags, address, data;
5971         u32 r;
5972
5973         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5974         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5975
5976         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5977         WREG32(address, reg * 4);
5978         (void)RREG32(address);
5979         r = RREG32(data);
5980         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5981         return r;
5982 }
5983
5984 void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
5985                                 u32 reg, u32 v)
5986 {
5987         unsigned long flags, address, data;
5988
5989         address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
5990         data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
5991
5992         spin_lock_irqsave(&adev->pcie_idx_lock, flags);
5993         WREG32(address, reg * 4);
5994         (void)RREG32(address);
5995         WREG32(data, v);
5996         (void)RREG32(data);
5997         spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
5998 }
5999
6000 /**
6001  * amdgpu_device_switch_gang - switch to a new gang
6002  * @adev: amdgpu_device pointer
6003  * @gang: the gang to switch to
6004  *
6005  * Try to switch to a new gang.
6006  * Returns: NULL if we switched to the new gang or a reference to the current
6007  * gang leader.
6008  */
6009 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
6010                                             struct dma_fence *gang)
6011 {
6012         struct dma_fence *old = NULL;
6013
6014         do {
6015                 dma_fence_put(old);
6016                 rcu_read_lock();
6017                 old = dma_fence_get_rcu_safe(&adev->gang_submit);
6018                 rcu_read_unlock();
6019
6020                 if (old == gang)
6021                         break;
6022
6023                 if (!dma_fence_is_signaled(old))
6024                         return old;
6025
6026         } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
6027                          old, gang) != old);
6028
6029         dma_fence_put(old);
6030         return NULL;
6031 }
6032
6033 bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
6034 {
6035         switch (adev->asic_type) {
6036 #ifdef CONFIG_DRM_AMDGPU_SI
6037         case CHIP_HAINAN:
6038 #endif
6039         case CHIP_TOPAZ:
6040                 /* chips with no display hardware */
6041                 return false;
6042 #ifdef CONFIG_DRM_AMDGPU_SI
6043         case CHIP_TAHITI:
6044         case CHIP_PITCAIRN:
6045         case CHIP_VERDE:
6046         case CHIP_OLAND:
6047 #endif
6048 #ifdef CONFIG_DRM_AMDGPU_CIK
6049         case CHIP_BONAIRE:
6050         case CHIP_HAWAII:
6051         case CHIP_KAVERI:
6052         case CHIP_KABINI:
6053         case CHIP_MULLINS:
6054 #endif
6055         case CHIP_TONGA:
6056         case CHIP_FIJI:
6057         case CHIP_POLARIS10:
6058         case CHIP_POLARIS11:
6059         case CHIP_POLARIS12:
6060         case CHIP_VEGAM:
6061         case CHIP_CARRIZO:
6062         case CHIP_STONEY:
6063                 /* chips with display hardware */
6064                 return true;
6065         default:
6066                 /* IP discovery */
6067                 if (!adev->ip_versions[DCE_HWIP][0] ||
6068                     (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
6069                         return false;
6070                 return true;
6071         }
6072 }