drm/amdgpu/ras: fix and update the documentation for RAS

author Alex Deucher <alexander.deucher@amd.com>

Thu, 19 Sep 2019 20:09:56 +0000 (15:09 -0500)

committer Alex Deucher <alexander.deucher@amd.com>

Thu, 3 Oct 2019 14:11:00 +0000 (09:11 -0500)
author Alex Deucher <alexander.deucher@amd.com>
Thu, 19 Sep 2019 20:09:56 +0000 (15:09 -0500)
committer Alex Deucher <alexander.deucher@amd.com>
Thu, 3 Oct 2019 14:11:00 +0000 (09:11 -0500)
diff --git a/Documentation/gpu/amdgpu.rst b/Documentation/gpu/amdgpu.rst

index 80db5d8..5b9eaf2 100644 (file)
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@@ -79,12 +79,32 @@ AMDGPU XGMI Support
  .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
     :internal:
  
-AMDGPU RAS debugfs control interface
-====================================
+AMDGPU RAS Support
+==================
+
+RAS debugfs/sysfs Control and Error Injection Interfaces
+--------------------------------------------------------
  
  .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
     :doc: AMDGPU RAS debugfs control interface
  
+RAS Error Count sysfs Interface
+-------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS sysfs Error Count Interface
+
+RAS EEPROM debugfs Interface
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS debugfs EEPROM table reset interface
+
+RAS VRAM Bad Pages sysfs Interface
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
  
  .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
     :internal:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 83b681a..14f3f8d 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -310,7 +310,18 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *
  /**
   * DOC: AMDGPU RAS debugfs EEPROM table reset interface
   *
- * Usage: echo 1 > ../ras/ras_eeprom_reset will reset EEPROM table to 0 entries.
+ * Some boards contain an EEPROM which is used to persistently store a list of
+ * bad pages containing ECC errors detected in vram.  This interface provides
+ * a way to reset the EEPROM, e.g., after testing error injection.
+ *
+ * Usage:
+ *
+ * .. code-block:: bash
+ *
+ *     echo 1 > ../ras/ras_eeprom_reset
+ *
+ * will reset EEPROM table to 0 entries.
+ *
   */
  static ssize_t amdgpu_ras_debugfs_eeprom_write(struct file *f, const char __user *buf,
                 size_t size, loff_t *pos)
@@ -337,6 +348,27 @@ static const struct file_operations amdgpu_ras_debugfs_eeprom_ops = {
         .llseek = default_llseek
  };
  
+/**
+ * DOC: AMDGPU RAS sysfs Error Count Interface
+ *
+ * It allows user to read the error count for each IP block on the gpu through
+ * /sys/class/drm/card[0/1/2...]/device/ras/[gfx/sdma/...]_err_count
+ *
+ * It outputs the multiple lines which report the uncorrected (ue) and corrected
+ * (ce) error counts.
+ *
+ * The format of one line is below,
+ *
+ * [ce|ue]: count
+ *
+ * Example:
+ *
+ * .. code-block:: bash
+ *
+ *     ue: 0
+ *     ce: 1
+ *
+ */
  static ssize_t amdgpu_ras_sysfs_read(struct device *dev,
                 struct device_attribute *attr, char *buf)
  {
@@ -781,8 +813,8 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
         };
  }
  
-/*
- * DOC: ras sysfs gpu_vram_bad_pages interface
+/**
+ * DOC: AMDGPU RAS sysfs gpu_vram_bad_pages Interface
   *
   * It allows user to read the bad pages of vram on the gpu through
   * /sys/class/drm/card[0/1/2...]/device/ras/gpu_vram_bad_pages
@@ -794,14 +826,21 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags)
   *
   * gpu pfn and gpu page size are printed in hex format.
   * flags can be one of below character,
+ *
   * R: reserved, this gpu page is reserved and not able to use.
+ *
   * P: pending for reserve, this gpu page is marked as bad, will be reserved
- *    in next window of page_reserve.
+ * in next window of page_reserve.
+ *
   * F: unable to reserve. this gpu page can't be reserved due to some reasons.
   *
- * examples:
- * 0x00000001 : 0x00001000 : R
- * 0x00000002 : 0x00001000 : P
+ * Examples:
+ *
+ * .. code-block:: bash
+ *
+ *     0x00000001 : 0x00001000 : R
+ *     0x00000002 : 0x00001000 : P
+ *
   */
  
  static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f,
author	Alex Deucher <alexander.deucher@amd.com>
	Thu, 19 Sep 2019 20:09:56 +0000 (15:09 -0500)
committer	Alex Deucher <alexander.deucher@amd.com>
	Thu, 3 Oct 2019 14:11:00 +0000 (09:11 -0500)
Documentation/gpu/amdgpu.rst		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c		patch \| blob \| history