RISC-V: rework comments in ISA string parser
authorConor Dooley <conor.dooley@microchip.com>
Wed, 7 Jun 2023 20:28:28 +0000 (21:28 +0100)
committerPalmer Dabbelt <palmer@rivosinc.com>
Wed, 21 Jun 2023 14:45:16 +0000 (07:45 -0700)
I have found these comments to not be at all helpful whenever I look at
the parser. Further, the comments in the default case (single letter
parser) are not quite right either.
Group the comments into a larger one at the start of each case, that
attempts to explain things at a higher level.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230607-headpiece-tannery-83ed5cc4856a@spud
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/kernel/cpufeature.c

index c3851c8cfa9c963ad36498058c08af35ca2b2eb0..7dd4589e79a4b4a59fd974b25c4e3f57fc320e19 100644 (file)
@@ -164,7 +164,7 @@ void __init riscv_fill_hwcap(void)
 
                        switch (*ext) {
                        case 's':
-                               /**
+                               /*
                                 * Workaround for invalid single-letter 's' & 'u'(QEMU).
                                 * No need to set the bit in riscv_isa as 's' & 'u' are
                                 * not valid ISA extensions. It works until multi-letter
@@ -181,53 +181,101 @@ void __init riscv_fill_hwcap(void)
                        case 'X':
                        case 'z':
                        case 'Z':
+                               /*
+                                * Before attempting to parse the extension itself, we find its end.
+                                * As multi-letter extensions must be split from other multi-letter
+                                * extensions with an "_", the end of a multi-letter extension will
+                                * either be the null character or the "_" at the start of the next
+                                * multi-letter extension.
+                                *
+                                * Next, as the extensions version is currently ignored, we
+                                * eliminate that portion. This is done by parsing backwards from
+                                * the end of the extension, removing any numbers. This may be a
+                                * major or minor number however, so the process is repeated if a
+                                * minor number was found.
+                                *
+                                * ext_end is intended to represent the first character *after* the
+                                * name portion of an extension, but will be decremented to the last
+                                * character itself while eliminating the extensions version number.
+                                * A simple re-increment solves this problem.
+                                */
                                ext_long = true;
-                               /* Multi-letter extension must be delimited */
                                for (; *isa && *isa != '_'; ++isa)
                                        if (unlikely(!isalnum(*isa)))
                                                ext_err = true;
-                               /* Parse backwards */
+
                                ext_end = isa;
                                if (unlikely(ext_err))
                                        break;
+
                                if (!isdigit(ext_end[-1]))
                                        break;
-                               /* Skip the minor version */
+
                                while (isdigit(*--ext_end))
                                        ;
-                               if (tolower(ext_end[0]) != 'p'
-                                   || !isdigit(ext_end[-1])) {
-                                       /* Advance it to offset the pre-decrement */
+
+                               if (tolower(ext_end[0]) != 'p' || !isdigit(ext_end[-1])) {
                                        ++ext_end;
                                        break;
                                }
-                               /* Skip the major version */
+
                                while (isdigit(*--ext_end))
                                        ;
+
                                ++ext_end;
                                break;
                        default:
+                               /*
+                                * Things are a little easier for single-letter extensions, as they
+                                * are parsed forwards.
+                                *
+                                * After checking that our starting position is valid, we need to
+                                * ensure that, when isa was incremented at the start of the loop,
+                                * that it arrived at the start of the next extension.
+                                *
+                                * If we are already on a non-digit, there is nothing to do. Either
+                                * we have a multi-letter extension's _, or the start of an
+                                * extension.
+                                *
+                                * Otherwise we have found the current extension's major version
+                                * number. Parse past it, and a subsequent p/minor version number
+                                * if present. The `p` extension must not appear immediately after
+                                * a number, so there is no fear of missing it.
+                                *
+                                */
                                if (unlikely(!isalpha(*ext))) {
                                        ext_err = true;
                                        break;
                                }
-                               /* Find next extension */
+
                                if (!isdigit(*isa))
                                        break;
-                               /* Skip the minor version */
+
                                while (isdigit(*++isa))
                                        ;
+
                                if (tolower(*isa) != 'p')
                                        break;
+
                                if (!isdigit(*++isa)) {
                                        --isa;
                                        break;
                                }
-                               /* Skip the major version */
+
                                while (isdigit(*++isa))
                                        ;
+
                                break;
                        }
+
+                       /*
+                        * The parser expects that at the start of an iteration isa points to the
+                        * character before the start of the next extension. This will not be the
+                        * case if we have just parsed a single-letter extension and the next
+                        * extension is not a multi-letter extension prefixed with an "_". It is
+                        * also not the case at the end of the string, where it will point to the
+                        * terminating null character.
+                        */
                        if (*isa != '_')
                                --isa;