bin/repo-font-audit

   1 #!/bin/sh
   2 # Quick and dirty script to audit font use in a yum package repository
   3 #
   4 # It is slow, it is ugly, and it requires a good network connection
   5
   6 # Function declarations
   7
   8 usage() {
   9 cat >&2 << EOF_USAGE
  10 Usage: $0 <id> <location>
  11 <id>:       identifier of the package repository to check
  12 <location>: location of the package repository to check
  13
  14 Examples:
  15 $0 tmp /tmp/rpm
  16 $0 tmp file://tmp/rpm
  17 $0 rawhide http://example.com/mirrors/fedora.redhat.com/fedora/linux/development/x86_64/os/
  18
  19 EOF_USAGE
  20 exit 1
  21 }
  22
  23 DATADIR="$(dirname $0)/../private"
  24
  25 # Filenames with whitespace or & + makefile = !!!
  26 bad_pattern="[[:space:]&\:]"
  27
  28 # Tools and humans make mistakes; try to fix some obvious ones
  29 fix_url() {
  30 sed 's=^file:\(\/*\)=/=' | sed 's=^\(ftp\|http\|https\):\(\/*\)=\1://='
  31 }
  32
  33 pretty_indent() {
  34   fold -s -w $(($(tput cols) - 2)) \
  35   | while read line ; do echo "  $line" ; done
  36 }
  37
  38 # $1 = CSV file to evaluate
  39 tally() {
  40
  41 awk -F "|" '
  42   {
  43     filesize   += $17
  44     filenumber += 1
  45     rpmsz[$3 "-" $4 "." $5] = $6
  46     srpms[$2] = 1
  47   }
  48   END {
  49     for (rpm in rpmsz) {
  50       rpmsize   += rpmsz[rpm]
  51       rpmnumber +=1
  52     }
  53     for (srpm in srpms) {
  54       srpmnumber +=1
  55     }
  56     rpmsize  = rpmsize  / (1024*1024)
  57     filesize = filesize / (1024*1024)
  58     print filenumber "|" rpmnumber "|" srpmnumber "|" filesize "|" rpmsize
  59   }' "$1"
  60 }
  61
  62
  63 summary() {
  64 if [ $(cat "$1" | wc -l) -gt 0 ] ; then
  65   tally "$1" | awk -F '|' '{ print "⇒ " $1 " file(s) (" $4 " MiB) in " $2 \
  66         " package(s) (" $5 " MiB) generated from " $3 " source package(s)." }'\
  67         | pretty_indent
  68 else
  69   echo "⇒  None!" | pretty_indent
  70 fi
  71 echo ""
  72 }
  73
  74
  75 # $1 = summary file
  76 # $2 = temporary data file
  77 # $3 = test id
  78 complete_csv_summary() {
  79   awk -F '|' -v t_datafile="$2" -v testlabel="$3" \
  80       'FILENAME==t_datafile { fail[$1"|"$2"|"$3"|"$4"|"$5"|"$10] = 1 }
  81        FILENAME!=t_datafile && FNR==1 {
  82         max = NF + 1
  83         header = $1
  84         for (i = 2 ; i <= max ; i++) {
  85            header = header "|" $i
  86          }
  87         print header testlabel
  88        }
  89        FILENAME!=t_datafile && FNR>1 {
  90          line = $1 "|" $2 "|" $3 "|" $4 "|" $5 "|" $6
  91          newline = $1
  92          for (i = 2 ; i <= max ; i++) {
  93            newline = newline "|" $i
  94          }
  95          print newline fail[line]
  96          processed[line] = 1
  97        }
  98        END {
  99          for ( line in fail ) {
 100            if ( ! processed[line] ) {
 101              newline = line
 102              for (i = 7 ; i <= max ; i++) {
 103                newline = newline "|"
 104              }
 105              print newline fail[line]
 106            }
 107          }
 108        }' "$2" "$1" > tmp/tmp.ccs.csv
 109   awk 'FNR==1' tmp/tmp.ccs.csv > "$1"
 110   awk 'FNR>1'  tmp/tmp.ccs.csv | sort -f -t '|' >> "$1"
 111   rm tmp/tmp.ccs.csv
 112 }
 113
 114
 115 substats() {
 116 ss_datafile="$1"
 117
 118 awk -F '|' '{ print $NF }' "$ss_datafile" | sort | uniq \
 119   | while read key ; do
 120     echo -n "$key|"
 121     awk -F '|' -v key="$key" '$NF==key' "$ss_datafile" > tmp/tmp.ss.csv
 122     tally tmp/tmp.ss.csv
 123     rm tmp/tmp.ss.csv
 124   done
 125 }
 126
 127
 128 # $1 = CSV file to sum_up
 129 stats() {
 130 s_datafile="$1"
 131
 132 summary "$s_datafile"
 133
 134 if [ $(cat "$s_datafile" | wc -l) -gt 0 ] ; then
 135   (echo "Format|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
 136   awk -F '|' '$13 != "" { print $0 "|" $15 }' "$s_datafile" > tmp/tmp.s.csv
 137   substats tmp/tmp.s.csv) | column -t -s '|' | pretty_indent
 138   echo ""
 139
 140   (echo "Arch|Files|rpm|srpm|Files (MiB)|rpm (MiB)"
 141   awk -F '|' '{ print $0 "|" $5 }' "$s_datafile" > tmp/tmp.s.csv
 142   substats tmp/tmp.s.csv)| column -t -s '|' | pretty_indent
 143   echo ""
 144
 145   rm tmp/tmp.s.csv
 146 fi
 147 }
 148
 149
 150 # $1 = CSV file to sum_up
 151 list_rpm() {
 152
 153 [ $(cat "$1" | wc -l) -gt 0 ] && \
 154 awk -F '|' '
 155   {
 156     rpm = $3
 157     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 158     total[rpm] += 1
 159   }
 160   END {
 161     n = asorti(total, sorted)
 162     for ( i = 1 ; i <= n ; i++ ) {
 163       line = line sorted[i] "(" total[sorted[i]] "), "
 164     }
 165     sub(", $","",line)
 166     print line
 167   }' "$1" | pretty_indent
 168
 169 }
 170
 171 # Pipe CSV file to sum_up
 172 # It is assumed to be of the form
 173 # rpm | pattern to match | pattern to display
 174 # patterns may include # as separators
 175 enum_rpm() {
 176 awk -F '|' '
 177   BEGIN { print "" }
 178   {
 179     rpm = $1
 180     key = $2
 181     pat = $3
 182     files[key] = files[key] pat "#" rpm "|"
 183   }
 184   END {
 185     for (key in files) {
 186       sub("\\|$", "", files[key])
 187       total = split(files[key], lines,"|")
 188       pb[total] = pb[total] key "|"
 189     }
 190     n = asorti(pb, sorted)
 191     for (i = n; i >= 1; i--) {
 192       total = sorted[i]
 193       sub("\\|$", "", pb[total])
 194       split(pb[total], keys,"|")
 195       nk = asort(keys, sortedkeys)
 196       for ( k = 1 ; k <= nk ; k++ ) {
 197         key = sortedkeys[k]
 198         print gensub("#", ", ", "g", key)
 199         split(files[key], lines, "|")
 200         nl = asort(lines, sortedlines)
 201         for ( l = 1 ; l <= nl ; l++ ) {
 202           print "  " gensub("#", "\t", "G", lines[l])
 203         }
 204         print ""
 205       }
 206     }
 207   }' | pretty_indent
 208 }
 209
 210 # $1 = this test id
 211 # $2 = csv file with the results of this test
 212 # $3 = csv file that consolidates the results of all tests
 213 test_wrapup() {
 214
 215 complete_csv_summary "$3" "$2" "$1"
 216 summary "$2"
 217
 218 }
 219
 220
 221 # $1 = csv file with test results
 222 test_score() {
 223 if [ $(cat "$1" | wc -l) -gt 1 ] ; then
 224   awk -F '|' '
 225     BEGIN {
 226       lh=7
 227       headerline = "P#"
 228     }
 229     FNR==1 {
 230       max = NF
 231     }
 232     FNR>1 {
 233       id = ""
 234       for (i = 1 ; i < (lh-1) ; i++) {
 235         if ($i != "") id = id $i "|"
 236         else          id = id   " |"
 237       }
 238       sub("\\|$","",id)
 239       fail[id] = 1
 240       for (i = lh ; i <= max ; i++) {
 241         score[id"|"i] = score[id"|"i] + $i
 242         totalscore[i] = totalscore[i] + $i
 243       }
 244     }
 245     END {
 246       for (i = lh ; i <= max ; i++) {
 247           if ( totalscore[i] ) { headerline  = headerline "|" "t" i+1-lh }
 248         }
 249       print headerline
 250       n = asorti(fail)
 251       for (l = 1; l <= n; l++) {
 252         newline = l
 253         for (c = lh ; c <= max ; c++) {
 254           if ( totalscore[c] ) {
 255             if ( score[fail[l]"|"c] )
 256               { newline = newline "|" score[fail[l]"|"c] }
 257             else
 258               { newline = newline "|‧" }
 259           }
 260         }
 261         print newline
 262       }
 263       totalline = "Total"
 264       for (i = lh ; i <= max ; i++) {
 265         if ( totalscore[i] ) { totalline = totalline "|"  totalscore[i] }
 266       }
 267       print totalline
 268     }' "$1" | column -t -s '|'
 269
 270   echo ""
 271
 272   awk -F '|' '
 273     BEGIN { lh=7 }
 274     FNR>1 {
 275       id = ""
 276       for (i = 1 ; i < (lh-1) ; i++) {
 277         if ($i != "") id = id $i "|"
 278         else          id = id   " |"
 279       }
 280       sub("\\|$","",id)
 281       fail[id] = 1
 282     }
 283     END {
 284       print "P#|Maintainer|SRPM|RPM|EVR|Arch"
 285       n = asorti(fail)
 286       for (l = 1; l <= n; l++) {
 287       print l "|" fail[l]
 288       }
 289     }' "$1" | column -t -s '|'
 290
 291   echo ""
 292   echo "Test explanation:"
 293   echo ""
 294
 295   awk -F '|' '
 296     BEGIN { lh = 7 }
 297     FNR==1 {
 298       max = NF
 299       for (i = lh ; i <= max ; i++) {
 300         legend[i] = i+1-lh "|" $i }
 301       }
 302     FNR>1 {
 303       for (i = lh ; i <= max ; i++) {
 304         totalscore[i] = totalscore[i] + $i
 305       }
 306     }
 307     END {
 308       for (i = lh ; i <= max ; i++) {
 309         if ( totalscore[i] ) { print legend[i] }
 310       }
 311     }' "$1" | while read line ; do
 312       testnumber=$(echo $line | awk -F '|' '{ print "t"$1 }')
 313       testid=$(echo $line | awk -F '|' '{ print $2 }')
 314       echo -n "$testnumber. "
 315       "$DATADIR/test-info" $testid title
 316       echo ""
 317       ( "$DATADIR/test-info" $testid help ) | pretty_indent
 318     done
 319
 320 else
 321   echo "⇒  None!"
 322 fi
 323
 324 }
 325
 326
 327 collect() {
 328 start_time=$(date "+%s")
 329
 330 # This is very ugly but the safest syntax to use with any repoquery version
 331 FREPOURL=$(echo "$REPOURL" | sed 's=^/=file:///=')
 332
 333 echo "Looking for packages:"
 334 echo "— with font metadata…"
 335 repoquery --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
 336           --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|M" \
 337           --whatprovides "font(*)" --quiet \
 338           | sort | uniq \
 339           | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
 340           > "$FPL"
 341
 342 echo "— that include files with common font extensions…"
 343 repoquery --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
 344           --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|E" \
 345           -f '*.ttf' -f '*.otf' -f '*.ttc' \
 346           -f '*.pfb' -f '*.pfa' \
 347           -f '*.pcf.gz' -f '*.pcf' -f '*.bdf' --quiet \
 348           | sort | uniq \
 349           | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
 350           > "$PWFL"
 351
 352 echo "— that use the core X11 protocol…"
 353 repoquery --repofrompath=$REPOID,$FREPOURL --repoid=$REPOID \
 354           --qf "%{sourcerpm}|%{name}|%{epoch}:%{version}-%{release}|%{arch}|%{packagesize}|X" \
 355           --whatrequires 'libX11.so*' \
 356           | sort | uniq \
 357           | sed 's=^\([^|]\+\?\)\-\([^-|]\+\?\)\-\([^-|]\+\?\)|=\1|=g' \
 358           > "$XPL"
 359
 360 if [ $(cat "$FPL" "$PWFL" "$XPL" | wc -l) -eq 0 ] ; then
 361   echo "Nothing to do!"
 362   exit
 363 fi
 364
 365 cat "$FPL" "$PWFL" "$XPL" \
 366   | awk -F '|' '
 367       { tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] = tag[$1 "|" $2 "|" $3 "|" $4 "|" $5] $6 "," }
 368       END {
 369         n =  asorti(tag,rpm)
 370         for (l = 1; l <= n; l++) {
 371           sub(",$", "", tag[rpm[l]])
 372           print rpm[l] "|" tag[rpm[l]]
 373         }
 374       }' > "tmp/$FL.1"
 375
 376 echo ""
 377 echo "Inspecting packages:"
 378 cd "tmp"
 379 rm -f "$FL.2"
 380
 381 cat "$FL.1" | while read line ; do
 382   rpm=$(echo "$line" | awk -F '|' '{ print $2 "-" $3 "." $4 }')
 383   rpmline=$(echo "$line" | awk -F '|' '{ print $1 "|" $2 "|" $3 "|" $4 "|" $5 }')
 384   rpmd=$(echo "$rpm" | sed "s+$bad_pattern+_+g")
 385   echo "$line" | cut -d "|" -f 6 | grep -q "M" && metadata="M" || metadata=""
 386   echo -n "– $rpm ◔"
 387   mkdir "$rpmd"
 388   cd "$rpmd"
 389   rpm_loc=$(repoquery --repofrompath=$REPOID,$REPOURL --repoid=$REPOID \
 390                       --location "$rpm" 2>/dev/null \
 391               | fix_url)
 392   if $(echo "$rpm_loc" | grep -q -e '^[\./~]') ; then
 393     ln -s "$rpm_loc" .
 394   else
 395     curl -O --silent --show-error --retry 3 --connect-timeout 10 --retry-delay 15 --url "$rpm_loc"
 396   fi
 397   echo -ne "\b◑"
 398   rpm2cpio *.rpm > "$rpm.cpio"
 399   echo -ne "\b◕"
 400   cat "$rpm.cpio" | cpio --quiet -it > "$rpm.lst"
 401   nofont_score=$(cat "$rpm.lst" | sed 's=^\./=/=g' \
 402     | grep -viE -e "^/usr/share/fonts" \
 403                 -e "^/usr/share/fontconfig/conf.avail/" \
 404                 -e "^/etc/fonts/conf.d/" \
 405                 -e "^/usr/share/doc/" \
 406                 -e "^/etc/X11/fontpath.d/" \
 407                 -e '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
 408     | wc -l)
 409   echo "$rpmline|$metadata|$nofont_score" > rpm-info.txt
 410   if $(echo $line | cut -d "|" -f 6 | grep -q "E") ; then
 411     cat "$rpm.lst" \
 412       | grep -iE '\.((ttf)|(ttc)|(otf)|(pfa)|(pfb)|(bdf)|(pcf)|(pcf\.gz))$' \
 413       | sort > "$rpm.fonts.lst"
 414   fi
 415   if $(echo $line | cut -d "|" -f 6 | grep -q "X") ; then
 416     cat "$rpm.lst" \
 417       | grep -i  -e "^./sbin/" \
 418                  -e "^./usr/sbin/" \
 419                  -e "^./usr/kerberos/sbin" \
 420                  -e "^./bin/" \
 421                  -e "^./usr/bin/" \
 422                  -e "^./usr/kerberos/bin/" \
 423                  -e "^./lib.*/" \
 424                  -e "^./usr/lib.*/" \
 425                  -e "^./opt/" \
 426                  -e "^./usr/X11R6/" \
 427                  -e "^./usr/games/" \
 428                  -e "^./usr/local/" \
 429       | grep -vi -e "^./usr/bin/dmxwininfo" \
 430                  -e "^./usr/bin/Xdmx" \
 431                  -e "^./usr/bin/xfontsel" \
 432                  -e "^./usr/bin/xlsfonts" \
 433                  -e "^./usr/bin/Xnest" \
 434                  -e "^./usr/bin/xprop" \
 435                  -e "^./usr/bin/xsetroot" \
 436                  -e "^./usr/bin/xwininfo" \
 437                  -e "^./usr/bin/x11vnc" \
 438                  -e "^./usr/bin/x2vnc" \
 439                  -e "^./usr/lib.*/libXcursor.so" \
 440       | sort > "$rpm.bin.lst"
 441   fi
 442   touch "$rpm.fonts.lst" "$rpm.bin.lst"
 443   cat "$rpm.fonts.lst" "$rpm.bin.lst"  | sort | uniq > "$rpm.files.lst"
 444   mkdir src
 445   cd src
 446   cpio -idm --quiet -E "../$rpm.files.lst" < "../$rpm.cpio"
 447   cd ..
 448   rm "$rpm.cpio"
 449   echo -ne "\b● "
 450   sed 's=^\./==g' "$rpm.fonts.lst" \
 451     | while read file; do
 452     unset target
 453     type=$(file -bzh "src/$file")
 454     case $(echo "$type" | sed 's+ (\(.*\)++g' \
 455                              | sed 's+ `\(.*\)++g' \
 456                              | sed 's+,\(.*\)++g' \
 457                              | sed 's+\( \)*$++g' ) in
 458       "TrueType font data")
 459         echo -n "t"
 460         ;;
 461       "TrueType font collection data")
 462         echo -n "T"
 463         ;;
 464       "OpenType font data")
 465         echo -n "o"
 466         ;;
 467       "X11 Portable Compiled Font data")
 468         echo -n "b"
 469         ;;
 470       "X11 BDF font text")
 471         echo -n "B"
 472         ;;
 473       # PostScript files are a mess
 474       "PostScript Type 1 font text")
 475         echo -n "P"
 476         ;;
 477       "PostScript Type 1 font program data")
 478         echo -n "p"
 479         ;;
 480       "PostScript document text conforming DSC level 3.0")
 481         type="ignored"
 482         echo -n "-"
 483         ;;
 484       "PostScript document text"|"8086 relocatable")
 485         echo -n "!" >&2
 486         ;;
 487       "symbolic link to"|"broken symbolic link to")
 488         target=$(readlink -m "src/$file" | sed "s+^$PWD/src++g")
 489         if $(echo "$target" | grep -q "^/usr/share/fonts") ; then
 490           type="Link"
 491           echo -n "l"
 492         else
 493           type="ignored"
 494           echo -n "-"
 495         fi
 496         ;;
 497       *)
 498         echo -n "?"
 499         ;;
 500     esac
 501     if [ "$type" != "ignored" -a ! -h "src/$file" ] ; then
 502        mkdir -p $(dirname "tmp/$file")
 503        echo "/$file|$type" > "tmp/$file.rfo.fonts.info"
 504        if [ "$type" != "TrueType font data" -a \
 505             "$type" != "OpenType font data" ] ; then
 506          touch "tmp/$file.rfo.unicover"
 507        fi
 508     fi
 509     if [ "$type" = "Link" ] ; then
 510       echo "/$file|$target" >> font-links.txt
 511     fi
 512   done
 513
 514   sed 's=^\./==g' "$rpm.bin.lst" \
 515     | while read file ; do
 516     type=$(file -bzh "src/$file")
 517     if $(echo "$type" | grep -q -e '^ELF ' -e ' ELF ') ; then
 518       echo -n "‧"
 519       mkdir -p $(dirname "tmp/$file")
 520       echo "/$file|$type" > "tmp/$file.rfo.core-fonts.info"
 521     fi
 522   done
 523
 524   echo ""
 525   cd ..
 526 done
 527
 528 echo ""
 529 echo "Analysing files…"
 530
 531 find . -type d | grep "$bad_pattern" \
 532   | while read dir ; do
 533       mkdir -p $(echo "$dir" | sed "s+$bad_pattern+_+g")
 534     done
 535
 536 find . -type f | grep "$bad_pattern" \
 537   | while read file ; do
 538       mv "$file" $(echo "$file"| sed "s+$bad_pattern+_+g")
 539     done
 540
 541 # There must be a cleaner way to do this
 542 parallelism=$(($(cat /proc/cpuinfo | grep processor | wc -l)+1))
 543 make -s -r -j $parallelism -f "$DATADIR/repo-font-audit.mk"
 544
 545 find .  -name "*\.rfo\.fonts" \
 546      -o -name "*\.rfo\.core-fonts" \
 547      -o -name processed-font-links.txt \
 548   | while read datafile ; do
 549       cat "$datafile" >> "$FL.2"
 550     done
 551
 552 mkdir ../data
 553 for rpmlint in */rpmlint.txt ; do
 554   mkdir "../data/$(dirname $rpmlint)"
 555   mv "$rpmlint" "../data/$rpmlint"
 556 done
 557
 558 for ext in fontlint unicover fc-query ; do
 559   find . -name "*\.rfo\.$ext" \
 560      | while read file ; do
 561           dest=$(echo "$file" \
 562             | sed "s+./\([^/]*\)/tmp/\(.*\).rfo.$ext+\1|\2.$ext.txt+" \
 563             | sed 's+/+_+g' | sed 's+|+/+g')
 564           [ -s "$file" ] && mv "$file" "../data/$dest"
 565        done
 566 done
 567
 568 rm -fr "*/"
 569 echo " ♻"
 570 cd ..
 571 }
 572
 573
 574 consolidate() {
 575 echo ""
 576 echo "Consolidating data…"
 577
 578 echo "Maintainer|SRPM|RPM|EVR|Arch|RPM size|RPM metadata|foreign data|\
 579 rpmlint score|filename|family name|face name|fixed family name|\
 580 fixed face name|format (fontconfig)|format (libmagic)|file size|checksum|\
 581 partial scripts|partial blocks|fontlint|symlink target|symlink provider|\
 582 core fonts use" > "$FL"
 583
 584 if  [ -s "tmp/$FL.2" -a -r "tmp/$FL.2" ] ; then
 585
 586   # FIXME: make this distro-agnostic
 587   cut -d "|" -f 1 "tmp/$FL.2" | sort | uniq \
 588     | /usr/bin/fedoradev-pkgowners 2>/dev/null \
 589     | sed 's+ +|+g' \
 590     | awk -F "|" -v file="tmp/$FL.2" '
 591         { owner[$2]=$1 }
 592         END {
 593           while ((getline < file) > 0) {
 594             print owner[$1] "|" $0
 595           }
 596         }' | sort -df -t "|" -k 1 -k 2 -k 3 > "tmp/$FL.3"
 597
 598   awk -F "|" '
 599     ($16 != "Link") { source[$10] = $3 "-" $4
 600                       out[$0] = $0 "|" }
 601     ($16 == "Link") { link[$0] = $22 }
 602     END {
 603       for (line in link) {
 604         out[line] = line "|" source[link[line]]
 605       }
 606       n = asort(out)
 607       for (i = 1; i <= n; i++) print out[i]
 608     }' "tmp/$FL.3" > "tmp/$FL.4"
 609
 610   cat "tmp/$FL.4" \
 611     | sort -df -t "|" -k 1 -k 2 -k 3 -k 4n -k 13 -k 14 -k 10 \
 612     | sed 's=$=|=g' | sed 's=|0|=||=g' | sed 's=|0|=||=g' | sed 's=|\+$==g' \
 613     >> "$FL"
 614
 615 fi
 616
 617 echo "Maintainer|SRPM|RPM|EVR|Arch|File" > "$TSUM"
 618 }
 619
 620
 621 analyse() {
 622
 623 echo ""
 624 echo "Conducting tests:"
 625 echo ""
 626
 627
 628 echo -n "— "
 629 "$DATADIR/test-info" outside-usr-share-fonts title }
 630
 631 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
 632             ($10 !~ /^\/usr\/share\/fonts\//)' "$FL" \
 633   | sort -df -t "|" > tmp/tmp.csv
 634
 635 list_rpm tmp/tmp.csv
 636 test_wrapup outside-usr-share-fonts tmp/tmp.csv "$TSUM"
 637
 638
 639 echo -n "— "
 640 "$DATADIR/test-info" without-rpm-metadata title
 641
 642 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
 643             ($7 != "M")' "$FL" \
 644   | sort -df -t "|" > tmp/tmp.csv
 645
 646 list_rpm tmp/tmp.csv
 647 test_wrapup without-rpm-metadata tmp/tmp.csv "$TSUM"
 648
 649
 650 echo -n "— "
 651 "$DATADIR/test-info" family-mixing title
 652
 653 awk -F '|' '
 654   (NR > 1) && ($11 != "") {
 655     rpm = $3 "-" $4 "." $5
 656     files[rpm] = files[rpm] $0 "#"
 657     if ( ! family[rpm] ) {
 658       family[rpm] = $11
 659     }
 660     else {
 661       if (family[rpm] != $11) {
 662         mixed[rpm] = 1
 663       }
 664     }
 665   }
 666   END {
 667     for (rpm in mixed) {
 668       sub("#$", "", files[rpm])
 669       split(files[rpm], lines,"#")
 670       for (l in lines) {
 671         print lines[l]
 672       }
 673     }
 674   }' "$FL" \
 675   | sort -df -t "|" > tmp/tmp.csv
 676
 677 list_rpm tmp/tmp.csv
 678 test_wrapup family-mixing tmp/tmp.csv "$TSUM"
 679
 680
 681 echo -n "— "
 682 "$DATADIR/test-info" duplicated-file title
 683
 684 awk -F '|' '
 685   (NR > 1) && ($24 != "X") && ($16 != "Link") {
 686     files[$18] = files[$18] $0 "#"
 687   }
 688   END {
 689     for (checksum in files) {
 690       sub("#$", "", files[checksum])
 691       count = split(files[checksum], lines,"#")
 692       if (count > 1) {
 693         for (l in lines) {
 694           print lines[l]
 695         }
 696       }
 697     }
 698   }' "$FL" \
 699   | sort -g -t "|" -k 18 > tmp/tmp.csv
 700
 701 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 702 awk -F '|' '
 703   {
 704     rpm = $3
 705     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 706     sub(".*/", "", $10)
 707     if ( $11 != "" ) { sig = $11 }
 708     if ( $12 != "") {
 709       if ( sig != "" ) { sig = sig ", " }
 710       sig = sig $12
 711     }
 712     if ( sig != "" ) { sig = "#(" sig ")" }
 713     print rpm "|" $18 "|" $10 sig
 714   }' tmp/tmp.csv | enum_rpm
 715
 716 test_wrapup duplicated-file tmp/tmp.csv "$TSUM"
 717
 718 echo -n "— "
 719 "$DATADIR/test-info" duplicated-face-ext title
 720
 721 awk -F '|' '
 722   (NR > 1) && ($11 != "") && ($12 != "") {
 723     face = $11 "|" $12
 724     rpm  = $3  "-" $4 "." $5
 725     files[face] = files[face] $0 "#"
 726     if ( ! loc[face] ) { loc[face] = rpm }
 727     else {
 728       if (loc[face] != rpm) {
 729         duplicated[face] = 1
 730       }
 731     }
 732   }
 733   END {
 734     for (face in duplicated) {
 735       sub("#$", "", files[face])
 736       split(files[face], lines,"#")
 737       for (l in lines) {
 738         print lines[l]
 739       }
 740     }
 741   }' "$FL" \
 742   | sort -df -t "|" -k 11 -k 12 > tmp/tmp.csv
 743
 744 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 745 awk -F '|' '
 746   {
 747     rpm = $3
 748     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 749     sub(".*/", "", $10)
 750     print rpm "|" $11 ", " $12 "|" $10
 751   }' tmp/tmp.csv | enum_rpm
 752
 753 test_wrapup duplicated-face-ext tmp/tmp.csv "$TSUM"
 754
 755
 756 echo -n "— "
 757 "$DATADIR/test-info" fc-query title
 758
 759 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
 760             ($15 == "")' "$FL" > tmp/tmp.csv
 761
 762 list_rpm tmp/tmp.csv
 763 test_wrapup fc-query tmp/tmp.csv "$TSUM"
 764
 765
 766 echo -n "— "
 767 "$DATADIR/test-info" libmagic title
 768
 769 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24!="X") && \
 770             (tolower(" " $16 " ") !~ / font /)' "$FL" > tmp/tmp.csv
 771
 772 list_rpm tmp/tmp.csv
 773 test_wrapup libmagic tmp/tmp.csv "$TSUM"
 774
 775
 776 echo -n "— "
 777 "$DATADIR/test-info" broken-symlink title
 778
 779 awk -F '|' '(NR > 1) && ($16 == "Link") && ($23 == "")' "$FL" > tmp/tmp.csv
 780
 781 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 782 awk -F '|' '
 783   {
 784     rpm = $3
 785     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 786     sub(".*/", "", $10)
 787     print rpm "|" $10 " ↛ " $22
 788   }' tmp/tmp.csv | enum_rpm
 789
 790 test_wrapup broken-symlink tmp/tmp.csv "$TSUM"
 791
 792 echo -n "— "
 793 "$DATADIR/test-info" rpmlint title
 794
 795 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
 796             ($9 != "")' "$FL" > tmp/tmp.csv
 797
 798 list_rpm tmp/tmp.csv
 799 test_wrapup rpmlint tmp/tmp.csv "$TSUM"
 800
 801
 802 echo -n "— "
 803 "$DATADIR/test-info" mixed-with-non-font-data title
 804
 805 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
 806             ($8 != "")' "$FL" > tmp/tmp.csv
 807
 808 list_rpm tmp/tmp.csv
 809 test_wrapup mixed-with-non-font-data tmp/tmp.csv "$TSUM"
 810
 811
 812 echo -n "— "
 813 "$DATADIR/test-info" arch-package title
 814
 815 awk -F '|' '(NR > 1) && ($16 != "Link") && ($24 != "X") && \
 816             ($5 != "noarch")' "$FL" > tmp/tmp.csv
 817
 818 list_rpm tmp/tmp.csv
 819 test_wrapup arch-package tmp/tmp.csv "$TSUM"
 820
 821
 822 echo -n "— "
 823 "$DATADIR/test-info" bad-rpm-naming title
 824
 825 awk -F "|" '(NR > 1) && ($16 != "Link") && ($24!="X") && \
 826             $3 !~ /^[0-9abcdefghijklmnopqrstuvwxyz\.-]*-fonts$/' "$FL" \
 827             > tmp/tmp.csv
 828
 829 list_rpm tmp/tmp.csv
 830 test_wrapup bad-rpm-naming tmp/tmp.csv "$TSUM"
 831
 832
 833 echo -n "— "
 834 "$DATADIR/test-info" bad-naming title
 835
 836 awk -F '|' '(NR > 1) && \
 837             ((tolower($11)!=tolower($13)) || (tolower($12)!=tolower($14)))' \
 838     "$FL" > tmp/tmp.csv
 839
 840 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 841 awk -F '|' '
 842   {
 843     rpm = $3
 844     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 845     sub(".*/", "", $10)
 846     print rpm "|" $11 ", " $12 " → " $13 ", " $14 "|" $10
 847   }' tmp/tmp.csv | enum_rpm
 848
 849 test_wrapup bad-naming tmp/tmp.csv "$TSUM"
 850
 851
 852 echo -n "— "
 853 "$DATADIR/test-info" core-fonts title
 854
 855 awk -F '|' '(NR > 1) && ($24=="X")' "$FL" > tmp/tmp.csv
 856
 857 list_rpm tmp/tmp.csv
 858 test_wrapup core-fonts tmp/tmp.csv "$TSUM"
 859
 860
 861 echo -n "— "
 862 "$DATADIR/test-info" font-linking title
 863
 864 awk -F '|' '(NR > 1) && ($16=="Link")' "$FL" > tmp/tmp.csv
 865
 866 list_rpm tmp/tmp.csv
 867 test_wrapup font-linking tmp/tmp.csv "$TSUM"
 868
 869
 870 echo -n "— "
 871 "$DATADIR/test-info" duplicated-face-int title
 872
 873 awk -F '|' '
 874   (NR > 1) && ($24!="X") && \
 875     ($15 != "PCF") && ($15 != "Type 1") && \
 876     ($11 != "") && ($12 != "") {
 877     sig = $3 "|" $4 "|" $5 "|" $11 "|" $12
 878     files[sig] = files[sig] $0 "#"
 879   }
 880   END {
 881     for (sig in files) {
 882       sub("#$", "", files[sig])
 883       n = split(files[sig],lines,"#")
 884       if (n > 1) {
 885         for (l in lines) { print lines[l] }
 886       }
 887     }
 888   }' "$FL" \
 889   | sort -df -t "|" -k 11 -k 12 -k 3 -k 2 > tmp/tmp.csv
 890
 891 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 892 awk -F '|' '
 893   {
 894     rpm = $3
 895     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 896     sub(".*/", "", $10)
 897     print rpm "|" $11 ", " $12 "|" $10
 898   }' tmp/tmp.csv | enum_rpm
 899
 900 test_wrapup duplicated-face-int tmp/tmp.csv "$TSUM"
 901
 902
 903 echo -n "— "
 904 "$DATADIR/test-info" fontlint title
 905
 906 awk -F '|' '(NR > 1) && ($24!="X") && ($21 > 0)' "$FL" > tmp/tmp.csv
 907
 908 list_rpm tmp/tmp.csv
 909 test_wrapup fontlint tmp/tmp.csv "$TSUM"
 910
 911
 912 echo -n "— "
 913 "$DATADIR/test-info" no-english-metadata title
 914
 915 awk -F '|' '(NR > 1) && ($24!="X") && \
 916             ($15 != "") && (($11 == "") || ($12 == ""))' "$FL" > tmp/tmp.csv
 917
 918 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 919 awk -F '|' '
 920   {
 921     rpm = $3 "-" $4 "." $5
 922     if ( $7 == "M" ) { rpm = "[" rpm "]" }
 923     print $10 "|" rpm
 924   }' tmp/tmp.csv \
 925   | column -t -s '|' | pretty_indent
 926
 927 test_wrapup no-english-metadata tmp/tmp.csv "$TSUM"
 928
 929
 930 echo -n "— "
 931 "$DATADIR/test-info" partial-scripts title
 932
 933 awk -F '|' '(NR > 1) && ($24!="X") && ($19 > 0)' "$FL" > tmp/tmp.csv
 934
 935 list_rpm tmp/tmp.csv
 936 test_wrapup partial-scripts tmp/tmp.csv "$TSUM"
 937
 938
 939 echo -n "— "
 940 "$DATADIR/test-info" partial-blocks title
 941
 942 awk -F '|' '(NR > 1) && ($24!="X") && ($20 > 0)' "$FL" > tmp/tmp.csv
 943
 944 list_rpm tmp/tmp.csv
 945 test_wrapup partial-blocks tmp/tmp.csv "$TSUM"
 946 }
 947
 948
 949 sum_up() {
 950 echo ""
 951 echo "Audit results:"
 952 echo ""
 953
 954 echo "– packages that declare font metadata:"
 955
 956 awk -F '|' '(NR > 1) && ($7 == "M") && ($24!="X")' "$FL" > tmp/tmp.csv
 957 stats tmp/tmp.csv
 958
 959 echo "☛ File size is computed as extracted, while rpm is a compressed \
 960 format." | pretty_indent
 961 echo "☛ Mid-term, files in legacy PCF or Type1 formats need to be converted \
 962 or removed." | pretty_indent
 963 echo ""
 964
 965 echo "– font files in other packages (we should not find any!)"
 966
 967 awk -F '|' '(NR > 1) && ($7 != "M") && ($16 != "Link") && ($24!="X")' "$FL" > tmp/tmp.csv
 968 stats tmp/tmp.csv
 969
 970 [ $(cat tmp/tmp.csv | wc -l) -gt 0 ] && \
 971 echo "☛ Bad packaging may result in arched packages or mixed content." \
 972 | pretty_indent && echo ""
 973
 974 echo "– errors, warnings and suggestions:"
 975 echo ""
 976
 977 test_score "$TSUM"
 978
 979 }
 980
 981
 982 prepare_spam() {
 983 echo ""
 984 echo "Packing mail data…"
 985
 986 mkdir spam
 987
 988 for srpm in $(awk -F '|' 'FNR>1 { print $2 }' "$TSUM" | uniq) ; do
 989    [ ! -d "tmp/$srpm" ] && mkdir "tmp/$srpm"
 990   cp -p report.txt "tmp/$srpm/repo-report.txt"
 991   for file in "$FL" "$TSUM" ; do
 992     awk 'FNR==1' "$file" > "tmp/$srpm/$file"
 993     awk -F '|' -v srpm="$srpm" 'FNR>1 && $2==srpm' "$file" >> "tmp/$srpm/$file"
 994   done
 995   for rpm in $(awk -F '|' 'FNR>1 { print $3 "-" $4 "." $5 }' "tmp/$srpm/$TSUM" | uniq | sed "s+$bad_pattern+_+g") ; do
 996     cp -pr "data/$rpm" "tmp/$srpm"
 997   done
 998   cat > tmp/$srpm/message.txt << EOF
 999 Dear packager,
1000
1001 At $TIMESTAMP, your “$srpm” package failed one or more of the tests
1002 I was performing on the “$ID” repository located at:
1003 $REPOURL
1004
1005 There are three different reasons that may cause this message:
1006 1. your package is including one or more font files, but not packaging
1007    them properly;
1008 2. your package is including one or more font files, and I've found
1009    issues in some of them;
1010 3. your package is not shipping any font file, but the way it accesses
1011    fonts in other packages is not satisfying.
1012
1013 To stop receiving this message, you need to:
1014 1. drop the font files or fix their packaging;
1015 2. relay the fonts issues to the fonts upstream to get them revised;
1016 3. work with the code upstream to improve the way it accesses font
1017    files (usually by making it use fontconfig through a higher-level
1018    text library such as pango, pango-cairo, harfbuzz, or QT)
1019
1020 You can self-check your packages at any time by:
1021 1. installing createrepo and fontpackages-tools:
1022 # yum install createrepo fontpackages-tools
1023 2. putting your packages and any font package they depends on in a
1024    test directory
1025 3. indexing this directory with createrepo:
1026 $ createrepo path-to-test-directory
1027 4. running repo-font-audit:
1028 $ repo-font-audit test absolute-path-to-test-directory
1029
1030 A summary of the issues I detected is appended here. For your
1031 convenience a more comprehensive analysis is also attached to this
1032 message.
1033
1034 Errors, warnings and suggestions:
1035
1036 $(test_score "tmp/$srpm/$TSUM")
1037
1038 Please take the appropriate measures to fix the “$srpm” package.
1039 I will warn you again if it is still necessary next time I am ran.
1040
1041 This report was generated by the repo-font-audit command from:
1042 http://fedoraproject.org/wiki/fontpackages
1043
1044 Please post questions, suggestions, patches or bug reports to:
1045 https://admin.fedoraproject.org/mailman/listinfo/fonts
1046 (subscription required)
1047
1048 Your friendly QA robot,
1049
1050 --
1051 repo-font-audit
1052 EOF
1053   cd tmp
1054   tar cf "$srpm.tar" "$srpm"
1055   cd ..
1056   xz -9  "tmp/$srpm.tar"
1057   mv "tmp/$srpm.tar.xz" spam
1058 done
1059 cat > spam/send-messages.sh << EOF
1060 #!/bin/sh
1061 # Send warnings to problem package owners
1062 # This is a bit distribution-specific, people from other distributions are
1063 # welcome to suggest how to make it more agnostic
1064
1065 #EMAIL="repo-font-audit <your@mail>"
1066 #REPLYTO=another@mail
1067
1068 #export EMAIL REPLYTO
1069
1070 for srpm in \\
1071 $(awk -F '|' 'FNR>1 { print $2 " \\" }' "$TSUM" | uniq)
1072 ; do
1073   tar -xf \$srpm.tar.xz */message.txt -O \\
1074     | mutt -s "[RFA] Your \$srpm $ID package did not pass QA" \\
1075            -a \$srpm.tar.xz -- \$srpm-owner@fedoraproject.org
1076   sleep 5
1077 done
1078
1079 EOF
1080 chmod +x spam/send-messages.sh
1081 }
1082
1083 pack_data() {
1084 echo "Packing result data…"
1085
1086
1087 mv spam "$SPAM"
1088
1089 mkdir -p "$RES/data"
1090 cp "$TSUM" summary.txt report.txt "$FL" "$RES/"
1091 cp -pr data/* "$RES/data" 2>/dev/null
1092
1093 mkdir "$SRES/"
1094 cp "$TSUM" summary.txt report.txt "$SRES/"
1095
1096 for report in "$RES" "$SRES" "$SPAM" ; do
1097   tar cf "$report.tar" "$report"
1098   xz -9  "$report.tar"
1099   mv "$report.tar.xz" "$ORIGDIR/"
1100 done
1101
1102 echo ""
1103 echo "Audit complete!"
1104
1105 awk 'NR > 1' "$FL" > "tmp/$FL"
1106
1107 end_time=$(date "+%s")
1108 t=$(($end_time-$start_time))
1109 th=$(($t/(60*60)))
1110 tm=$((($t/60)-(th*60)))
1111 ts=$(($t-(($th*60+$tm)*60)))
1112 [ $th -gt 0 ] && elapsed_time="$th h"
1113 if [ $tm -gt 0 ] ; then
1114   [ "$elapsed_time" != "" ] \
1115     && elapsed_time="$elapsed_time $tm min" \
1116     || elapsed_time="$tm min"
1117 fi
1118 if [ $ts -gt 0 ] ; then
1119   [ "$elapsed_time" != "" ] \
1120     && elapsed_time="$elapsed_time $ts s" \
1121     || elapsed_time="$ts s"
1122 fi
1123
1124 (
1125 cat << EOF
1126
1127 Run time: $elapsed_time.
1128 Number of items processed:
1129 $(summary "tmp/$FL")
1130
1131 1. Extracted data: $ORIGDIR/$RES.tar.xz
1132 2. Short summary: $ORIGDIR/$SRES.tar.xz
1133 3. Mail data: $ORIGDIR/$SPAM.tar.xz
1134
1135 This report was generated by the repo-font-audit command from:
1136 http://fedoraproject.org/wiki/fontpackages
1137
1138 Please post questions, suggestions, patches or bug reports to:
1139 https://admin.fedoraproject.org/mailman/listinfo/fonts
1140 (subscription required)
1141 EOF
1142 ) | pretty_indent
1143
1144 }
1145
1146 # End of function declarations
1147
1148 [ "$#" -lt "2" ] && usage
1149
1150 TIMESTAMP=$(date -u +%Y%m%dT%H%M%SZ)
1151 ID=$1
1152 REPOID="$1-rfa-$TIMESTAMP"
1153 REPOURL=$(echo "$2" | fix_url)
1154 ORIGDIR="$PWD"
1155
1156 FPL="font-packages.csv"
1157 PWFL="packages-with-fonts.csv"
1158 XPL="package-using-x11.csv"
1159 CSL="checksums.csv"
1160 FL="consolidated-data.csv"
1161 TSUM="test-summary.csv"
1162 RES="repo-font-audit-$ID-$TIMESTAMP"
1163 SRES="repo-font-audit-$ID-$TIMESTAMP-short"
1164 SPAM="repo-font-audit-$ID-$TIMESTAMP-mail"
1165
1166 TMPDIR=$(mktemp -d --tmpdir=/tmp $RES-XXXXXXXXXX)
1167 cd $TMPDIR
1168
1169 mkdir tmp
1170
1171 collect
1172 consolidate
1173 analyse | tee report.txt
1174 sum_up  | tee summary.txt
1175 prepare_spam
1176 pack_data
1177
1178 cd "$ORIGDIR"
1179 rm -fr "$TMPDIR"
1180 echo "♻"