scripts/sort-makefile-lines.py

   1 #!/usr/bin/python3
   2 # Sort Makefile lines as expected by project policy.
   3 # Copyright (C) 2023-2024 Free Software Foundation, Inc.
   4 # This file is part of the GNU C Library.
   5 #
   6 # The GNU C Library is free software; you can redistribute it and/or
   7 # modify it under the terms of the GNU Lesser General Public
   8 # License as published by the Free Software Foundation; either
   9 # version 2.1 of the License, or (at your option) any later version.
  10 #
  11 # The GNU C Library is distributed in the hope that it will be useful,
  12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # Lesser General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU Lesser General Public
  17 # License along with the GNU C Library; if not, see
  18 # <https://www.gnu.org/licenses/>.
  19
  20 # The project consensus is to split Makefile variable assignment
  21 # across multiple lines with one value per line.  The values are
  22 # then sorted as described below, and terminated with a special
  23 # list termination marker.  This splitting makes it much easier
  24 # to add new tests to the list since they become just a single
  25 # line insertion.  It also makes backports and merges easier
  26 # since the new test may not conflict due to the ordering.
  27 #
  28 # Consensus discussion:
  29 # https://inbox.sourceware.org/libc-alpha/f6406204-84f5-adb1-d00e-979ebeebbbde@redhat.com/
  30 #
  31 # To support cleaning up Makefiles we created this program to
  32 # help sort existing lists converted to the new format.
  33 #
  34 # The program takes as input the Makefile to sort correctly,
  35 # and the output file to write the correctly sorted output
  36 # (it can be the same file).
  37 #
  38 # Sorting is only carried out between two special markers:
  39 # (a) Marker start is '<variable> += \' (or '= \', or ':= \')
  40 # (b) Marker end is '  # <variable>' (whitespace matters)
  41 # With everything between (a) and (b) being sorted accordingly.
  42 #
  43 # You can use it like this:
  44 # $ scripts/sort-makefile-lines.py < elf/Makefile > elf/Makefile.tmp
  45 # $ mv elf/Makefile.tmp elf/Makefile
  46 #
  47 # The Makefile lines in the project are sorted using the
  48 # following rules:
  49 # - All lines are sorted as-if `LC_COLLATE=C sort`
  50 # - Lines that have a numeric suffix and whose leading prefix
  51 #   matches exactly are sorted according the numeric suffix
  52 #   in increasing numerical order.
  53 #
  54 # For example:
  55 # ~~~
  56 # tests += \
  57 #   test-a \
  58 #   test-b \
  59 #   test-b1 \
  60 #   test-b2 \
  61 #   test-b10 \
  62 #   test-b20 \
  63 #   test-b100 \
  64 #   # tests
  65 # ~~~
  66 # This example shows tests sorted alphabetically, followed
  67 # by a numeric suffix sort in increasing numeric order.
  68 #
  69 # Cleanups:
  70 # - Tests that end in "a" or "b" variants should be renamed to
  71 #   end in just the numerical value. For example 'tst-mutex7robust'
  72 #   should be renamed to 'tst-mutex12' (the highest numbered test)
  73 #   or 'tst-robust11' (the highest numbered test) in order to get
  74 #   reasonable ordering.
  75 # - Modules that end in "mod" or "mod1" should be renamed. For
  76 #   example 'tst-atfork2mod' should be renamed to 'tst-mod-atfork2'
  77 #   (test module for atfork2). If there are more than one module
  78 #   then they should be named with a suffix that uses [0-9] first
  79 #   then [A-Z] next for a total of 36 possible modules per test.
  80 #   No manually listed test currently uses more than that (though
  81 #   automatically generated tests may; they don't need sorting).
  82 # - Avoid including another test and instead refactor into common
  83 #   code with all tests including the common code, then give the
  84 #   tests unique names.
  85 #
  86 # If you have a Makefile that needs converting, then you can
  87 # quickly split the values into one-per-line, ensure the start
  88 # and end markers are in place, and then run the script to
  89 # sort the values.
  90
  91 import sys
  92 import locale
  93 import re
  94 import functools
  95
  96 def glibc_makefile_numeric(string1, string2):
  97     # Check if string1 has a numeric suffix.
  98     var1 = re.search(r'([0-9]+) \\$', string1)
  99     var2 = re.search(r'([0-9]+) \\$', string2)
 100     if var1 and var2:
 101         if string1[0:var1.span()[0]] == string2[0:var2.span()[0]]:
 102             # string1 and string2 both share a prefix and
 103             # have a numeric suffix that can be compared.
 104             # Sort order is based on the numeric suffix.
 105             # If the suffix is the same return 0, otherwise
 106             # > 0 for greater-than, and < 0 for less-than.
 107             # This is equivalent to the numerical difference.
 108             return int(var1.group(1)) - int(var2.group(1))
 109     # Default to strcoll.
 110     return locale.strcoll(string1, string2)
 111
 112 def sort_lines(lines):
 113
 114     # Use the C locale for language independent collation.
 115     locale.setlocale (locale.LC_ALL, "C")
 116
 117     # Sort using a glibc-specific sorting function.
 118     lines = sorted(lines, key=functools.cmp_to_key(glibc_makefile_numeric))
 119
 120     return lines
 121
 122 def sort_makefile_lines():
 123
 124     # Read the whole Makefile.
 125     lines = sys.stdin.readlines()
 126
 127     # Build a list of all start markers (tuple includes name).
 128     startmarks = []
 129     for i in range(len(lines)):
 130         # Look for things like "var = \", "var := \" or "var += \"
 131         # to start the sorted list.
 132         var = re.search(r'^([a-zA-Z0-9-]*) [\+:]?\= \\$', lines[i])
 133         if var:
 134             # Remember the index and the name.
 135             startmarks.append((i, var.group(1)))
 136
 137     # For each start marker try to find a matching end mark
 138     # and build a block that needs sorting.  The end marker
 139     # must have the matching comment name for it to be valid.
 140     rangemarks = []
 141     for sm in startmarks:
 142         # Look for things like "  # var" to end the sorted list.
 143         reg = r'^  # ' + sm[1] + r'$'
 144         for j in range(sm[0] + 1, len(lines)):
 145             if re.search(reg, lines[j]):
 146                 # Remember the block to sort (inclusive).
 147                 rangemarks.append((sm[0] + 1, j))
 148                 break
 149
 150     # We now have a list of all ranges that need sorting.
 151     # Sort those ranges (inclusive).
 152     for r in rangemarks:
 153         lines[r[0]:r[1]] = sort_lines(lines[r[0]:r[1]])
 154
 155     # Output the whole list with sorted lines to stdout.
 156     [sys.stdout.write(line) for line in lines]
 157
 158
 159 def main(argv):
 160     sort_makefile_lines ()
 161
 162 if __name__ == '__main__':
 163     main(sys.argv[1:])