tools/testing/nvdimm: smart alarm/threshold control
authorDan Williams <dan.j.williams@intel.com>
Fri, 24 Nov 2017 22:32:27 +0000 (14:32 -0800)
committerDan Williams <dan.j.williams@intel.com>
Mon, 4 Dec 2017 18:19:31 +0000 (10:19 -0800)
Allow the smart_threshold values to be changed via the 'set smart
threshold command' and trigger notifications when the thresholds are
met.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
tools/testing/nvdimm/test/nfit.c
tools/testing/nvdimm/test/nfit_test.h

index 640c02b..2b57254 100644 (file)
@@ -168,6 +168,8 @@ struct nfit_test {
                spinlock_t lock;
        } ars_state;
        struct device *dimm_dev[NUM_DCR];
+       struct nd_intel_smart *smart;
+       struct nd_intel_smart_threshold *smart_threshold;
        struct badrange badrange;
        struct work_struct work;
 };
@@ -440,50 +442,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
        return 0;
 }
 
-static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len)
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+               struct nd_intel_smart *smart_data)
 {
-       static const struct nd_intel_smart smart_data = {
-               .flags = ND_INTEL_SMART_HEALTH_VALID
-                       | ND_INTEL_SMART_SPARES_VALID
-                       | ND_INTEL_SMART_ALARM_VALID
-                       | ND_INTEL_SMART_USED_VALID
-                       | ND_INTEL_SMART_SHUTDOWN_VALID
-                       | ND_INTEL_SMART_MTEMP_VALID,
-               .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
-               .media_temperature = 23 * 16,
-               .ctrl_temperature = 30 * 16,
-               .pmic_temperature = 40 * 16,
-               .spares = 75,
-               .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
-                       | ND_INTEL_SMART_TEMP_TRIP,
-               .ait_status = 1,
-               .life_used = 5,
-               .shutdown_state = 0,
-               .vendor_size = 0,
-               .shutdown_count = 100,
-       };
-
        if (buf_len < sizeof(*smart))
                return -EINVAL;
-       memcpy(smart, &smart_data, sizeof(smart_data));
+       memcpy(smart, smart_data, sizeof(*smart));
        return 0;
 }
 
 static int nfit_test_cmd_smart_threshold(
-               struct nd_intel_smart_threshold *smart_t,
-               unsigned int buf_len)
+               struct nd_intel_smart_threshold *out,
+               unsigned int buf_len,
+               struct nd_intel_smart_threshold *smart_t)
 {
-       static const struct nd_intel_smart_threshold smart_t_data = {
-               .alarm_control = ND_INTEL_SMART_SPARE_TRIP
-                       | ND_INTEL_SMART_TEMP_TRIP,
-               .media_temperature = 40 * 16,
-               .ctrl_temperature = 30 * 16,
-               .spares = 5,
-       };
-
        if (buf_len < sizeof(*smart_t))
                return -EINVAL;
-       memcpy(smart_t, &smart_t_data, sizeof(smart_t_data));
+       memcpy(out, smart_t, sizeof(*smart_t));
+       return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+               struct device *dimm_dev, struct nd_intel_smart *smart,
+               struct nd_intel_smart_threshold *thresh)
+{
+       dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+                       __func__, thresh->alarm_control, thresh->spares,
+                       smart->spares, thresh->media_temperature,
+                       smart->media_temperature, thresh->ctrl_temperature,
+                       smart->ctrl_temperature);
+       if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+                               && smart->spares
+                               <= thresh->spares)
+                       || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+                               && smart->media_temperature
+                               >= thresh->media_temperature)
+                       || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+                               && smart->ctrl_temperature
+                               >= thresh->ctrl_temperature)) {
+               device_lock(bus_dev);
+               __acpi_nvdimm_notify(dimm_dev, 0x81);
+               device_unlock(bus_dev);
+       }
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+               struct nd_intel_smart_set_threshold *in,
+               unsigned int buf_len,
+               struct nd_intel_smart_threshold *thresh,
+               struct nd_intel_smart *smart,
+               struct device *bus_dev, struct device *dimm_dev)
+{
+       unsigned int size;
+
+       size = sizeof(*in) - 4;
+       if (buf_len < size)
+               return -EINVAL;
+       memcpy(thresh->data, in, size);
+       in->status = 0;
+       smart_notify(bus_dev, dimm_dev, smart, thresh);
+
        return 0;
 }
 
@@ -608,7 +626,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                                || !test_bit(func, &nfit_mem->dsm_mask))
                        return -ENOTTY;
 
-               /* lookup label space for the given dimm */
+               /* lookup per-dimm data */
                for (i = 0; i < ARRAY_SIZE(handle); i++)
                        if (__to_nfit_memdev(nfit_mem)->device_handle ==
                                        handle[i])
@@ -631,14 +649,19 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
                        rc = nfit_test_cmd_set_config_data(buf, buf_len,
                                t->label[i - t->dcr_idx]);
                        break;
-               case ND_CMD_SMART:
-                       rc = nfit_test_cmd_smart(buf, buf_len);
+               case ND_INTEL_SMART:
+                       rc = nfit_test_cmd_smart(buf, buf_len,
+                                       &t->smart[i - t->dcr_idx]);
+                       break;
+               case ND_INTEL_SMART_THRESHOLD:
+                       rc = nfit_test_cmd_smart_threshold(buf, buf_len,
+                                       &t->smart_threshold[i - t->dcr_idx]);
                        break;
-               case ND_CMD_SMART_THRESHOLD:
-                       rc = nfit_test_cmd_smart_threshold(buf, buf_len);
-                       device_lock(&t->pdev.dev);
-                       __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
-                       device_unlock(&t->pdev.dev);
+               case ND_INTEL_SMART_SET_THRESHOLD:
+                       rc = nfit_test_cmd_smart_set_threshold(buf, buf_len,
+                                       &t->smart_threshold[i - t->dcr_idx],
+                                       &t->smart[i - t->dcr_idx],
+                                       &t->pdev.dev, t->dimm_dev[i]);
                        break;
                default:
                        return -ENOTTY;
@@ -883,6 +906,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
        NULL,
 };
 
+static void smart_init(struct nfit_test *t)
+{
+       int i;
+       const struct nd_intel_smart_threshold smart_t_data = {
+               .alarm_control = ND_INTEL_SMART_SPARE_TRIP
+                       | ND_INTEL_SMART_TEMP_TRIP,
+               .media_temperature = 40 * 16,
+               .ctrl_temperature = 30 * 16,
+               .spares = 5,
+       };
+       const struct nd_intel_smart smart_data = {
+               .flags = ND_INTEL_SMART_HEALTH_VALID
+                       | ND_INTEL_SMART_SPARES_VALID
+                       | ND_INTEL_SMART_ALARM_VALID
+                       | ND_INTEL_SMART_USED_VALID
+                       | ND_INTEL_SMART_SHUTDOWN_VALID
+                       | ND_INTEL_SMART_MTEMP_VALID,
+               .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+               .media_temperature = 23 * 16,
+               .ctrl_temperature = 30 * 16,
+               .pmic_temperature = 40 * 16,
+               .spares = 75,
+               .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+                       | ND_INTEL_SMART_TEMP_TRIP,
+               .ait_status = 1,
+               .life_used = 5,
+               .shutdown_state = 0,
+               .vendor_size = 0,
+               .shutdown_count = 100,
+       };
+
+       for (i = 0; i < t->num_dcr; i++) {
+               memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
+               memcpy(&t->smart_threshold[i], &smart_t_data,
+                               sizeof(smart_t_data));
+       }
+}
+
 static int nfit_test0_alloc(struct nfit_test *t)
 {
        size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -950,6 +1011,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
                        return -ENOMEM;
        }
 
+       smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -980,6 +1042,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
        if (!t->spa_set[1])
                return -ENOMEM;
 
+       smart_init(t);
        return ars_state_init(&t->pdev.dev, &t->ars_state);
 }
 
@@ -1653,13 +1716,14 @@ static void nfit_test0_setup(struct nfit_test *t)
        set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
-       set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+       set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
        set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
        set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
-       set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
        set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
        set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
        set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
@@ -2065,6 +2129,11 @@ static int nfit_test_probe(struct platform_device *pdev)
                                sizeof(struct nfit_test_dcr *), GFP_KERNEL);
                nfit_test->dcr_dma = devm_kcalloc(dev, num,
                                sizeof(dma_addr_t), GFP_KERNEL);
+               nfit_test->smart = devm_kcalloc(dev, num,
+                               sizeof(struct nd_intel_smart), GFP_KERNEL);
+               nfit_test->smart_threshold = devm_kcalloc(dev, num,
+                               sizeof(struct nd_intel_smart_threshold),
+                               GFP_KERNEL);
                if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
                                && nfit_test->label_dma && nfit_test->dcr
                                && nfit_test->dcr_dma && nfit_test->flush
index b85fba2..ba230f6 100644 (file)
@@ -86,6 +86,7 @@ struct nd_cmd_ars_err_inj_stat {
 
 #define ND_INTEL_SMART 1
 #define ND_INTEL_SMART_THRESHOLD 2
+#define ND_INTEL_SMART_SET_THRESHOLD 17
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -143,6 +144,14 @@ struct nd_intel_smart_threshold {
        };
 } __packed;
 
+struct nd_intel_smart_set_threshold {
+       __u16 alarm_control;
+       __u8 spares;
+       __u16 media_temperature;
+       __u16 ctrl_temperature;
+       __u32 status;
+} __packed;
+
 union acpi_object;
 typedef void *acpi_handle;