RAS/CEC: Check count_threshold unconditionally
authorBorislav Petkov <bp@suse.de>
Sat, 20 Apr 2019 12:06:37 +0000 (14:06 +0200)
committerBorislav Petkov <bp@suse.de>
Sat, 8 Jun 2019 15:33:10 +0000 (17:33 +0200)
The count_threshold should be checked unconditionally, after insertion
too, so that a count_threshold value of 1 can cause an immediate
offlining. I.e., offline the page on the *first* error encountered.

Add comments to make it clear what cec_add_elem() does, while at it.

Reported-by: WANG Chao <chao.wang@ucloud.cn>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-edac@vger.kernel.org
Link: https://lkml.kernel.org/r/20190418034115.75954-3-chao.wang@ucloud.cn
drivers/ras/cec.c

index f5795ad..73a975c 100644 (file)
@@ -294,6 +294,7 @@ int cec_add_elem(u64 pfn)
 
        ca->ces_entered++;
 
+       /* Array full, free the LRU slot. */
        if (ca->n == MAX_ELEMS)
                WARN_ON(!del_lru_elem_unlocked(ca));
 
@@ -306,24 +307,17 @@ int cec_add_elem(u64 pfn)
                        (void *)&ca->array[to],
                        (ca->n - to) * sizeof(u64));
 
-               ca->array[to] = (pfn << PAGE_SHIFT) |
-                               (DECAY_MASK << COUNT_BITS) | 1;
-
+               ca->array[to] = pfn << PAGE_SHIFT;
                ca->n++;
-
-               ret = 0;
-
-               goto decay;
        }
 
-       count = COUNT(ca->array[to]);
-
-       if (count < count_threshold) {
-               ca->array[to] |= (DECAY_MASK << COUNT_BITS);
-               ca->array[to]++;
+       /* Add/refresh element generation and increment count */
+       ca->array[to] |= DECAY_MASK << COUNT_BITS;
+       ca->array[to]++;
 
-               ret = 0;
-       } else {
+       /* Check action threshold and soft-offline, if reached. */
+       count = COUNT(ca->array[to]);
+       if (count >= count_threshold) {
                u64 pfn = ca->array[to] >> PAGE_SHIFT;
 
                if (!pfn_valid(pfn)) {
@@ -338,15 +332,14 @@ int cec_add_elem(u64 pfn)
                del_elem(ca, to);
 
                /*
-                * Return a >0 value to denote that we've reached the offlining
-                * threshold.
+                * Return a >0 value to callers, to denote that we've reached
+                * the offlining threshold.
                 */
                ret = 1;
 
                goto unlock;
        }
 
-decay:
        ca->decay_count++;
 
        if (ca->decay_count >= CLEAN_ELEMS)