linux/scripts/checkkconfigsymbols.py
<<
>>
Prefs
   1#!/usr/bin/env python3
   2
   3"""Find Kconfig symbols that are referenced but not defined."""
   4
   5# (c) 2014-2017 Valentin Rothberg <valentinrothberg@gmail.com>
   6# (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de>
   7#
   8# Licensed under the terms of the GNU GPL License version 2
   9
  10
  11import argparse
  12import difflib
  13import os
  14import re
  15import signal
  16import subprocess
  17import sys
  18from multiprocessing import Pool, cpu_count
  19
  20
  21# regex expressions
  22OPERATORS = r"&|\(|\)|\||\!"
  23SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}"
  24DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*"
  25EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+"
  26DEFAULT = r"default\s+.*?(?:if\s.+){,1}"
  27STMT = r"^\s*(?:if|select|imply|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR
  28SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")"
  29
  30# regex objects
  31REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$")
  32REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)')
  33REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL)
  34REGEX_KCONFIG_DEF = re.compile(DEF)
  35REGEX_KCONFIG_EXPR = re.compile(EXPR)
  36REGEX_KCONFIG_STMT = re.compile(STMT)
  37REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$")
  38REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$")
  39REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+")
  40REGEX_QUOTES = re.compile("(\"(.*?)\")")
  41
  42
  43def parse_options():
  44    """The user interface of this module."""
  45    usage = "Run this tool to detect Kconfig symbols that are referenced but " \
  46            "not defined in Kconfig.  If no option is specified, "             \
  47            "checkkconfigsymbols defaults to check your current tree.  "       \
  48            "Please note that specifying commits will 'git reset --hard\' "    \
  49            "your current tree!  You may save uncommitted changes to avoid "   \
  50            "losing data."
  51
  52    parser = argparse.ArgumentParser(description=usage)
  53
  54    parser.add_argument('-c', '--commit', dest='commit', action='store',
  55                        default="",
  56                        help="check if the specified commit (hash) introduces "
  57                             "undefined Kconfig symbols")
  58
  59    parser.add_argument('-d', '--diff', dest='diff', action='store',
  60                        default="",
  61                        help="diff undefined symbols between two commits "
  62                             "(e.g., -d commmit1..commit2)")
  63
  64    parser.add_argument('-f', '--find', dest='find', action='store_true',
  65                        default=False,
  66                        help="find and show commits that may cause symbols to be "
  67                             "missing (required to run with --diff)")
  68
  69    parser.add_argument('-i', '--ignore', dest='ignore', action='store',
  70                        default="",
  71                        help="ignore files matching this Python regex "
  72                             "(e.g., -i '.*defconfig')")
  73
  74    parser.add_argument('-s', '--sim', dest='sim', action='store', default="",
  75                        help="print a list of max. 10 string-similar symbols")
  76
  77    parser.add_argument('--force', dest='force', action='store_true',
  78                        default=False,
  79                        help="reset current Git tree even when it's dirty")
  80
  81    parser.add_argument('--no-color', dest='color', action='store_false',
  82                        default=True,
  83                        help="don't print colored output (default when not "
  84                             "outputting to a terminal)")
  85
  86    args = parser.parse_args()
  87
  88    if args.commit and args.diff:
  89        sys.exit("Please specify only one option at once.")
  90
  91    if args.diff and not re.match(r"^[\w\-\.\^]+\.\.[\w\-\.\^]+$", args.diff):
  92        sys.exit("Please specify valid input in the following format: "
  93                 "\'commit1..commit2\'")
  94
  95    if args.commit or args.diff:
  96        if not args.force and tree_is_dirty():
  97            sys.exit("The current Git tree is dirty (see 'git status').  "
  98                     "Running this script may\ndelete important data since it "
  99                     "calls 'git reset --hard' for some performance\nreasons. "
 100                     " Please run this script in a clean Git tree or pass "
 101                     "'--force' if you\nwant to ignore this warning and "
 102                     "continue.")
 103
 104    if args.commit:
 105        args.find = False
 106
 107    if args.ignore:
 108        try:
 109            re.match(args.ignore, "this/is/just/a/test.c")
 110        except:
 111            sys.exit("Please specify a valid Python regex.")
 112
 113    return args
 114
 115
 116def main():
 117    """Main function of this module."""
 118    args = parse_options()
 119
 120    global COLOR
 121    COLOR = args.color and sys.stdout.isatty()
 122
 123    if args.sim and not args.commit and not args.diff:
 124        sims = find_sims(args.sim, args.ignore)
 125        if sims:
 126            print("%s: %s" % (yel("Similar symbols"), ', '.join(sims)))
 127        else:
 128            print("%s: no similar symbols found" % yel("Similar symbols"))
 129        sys.exit(0)
 130
 131    # dictionary of (un)defined symbols
 132    defined = {}
 133    undefined = {}
 134
 135    if args.commit or args.diff:
 136        head = get_head()
 137
 138        # get commit range
 139        commit_a = None
 140        commit_b = None
 141        if args.commit:
 142            commit_a = args.commit + "~"
 143            commit_b = args.commit
 144        elif args.diff:
 145            split = args.diff.split("..")
 146            commit_a = split[0]
 147            commit_b = split[1]
 148            undefined_a = {}
 149            undefined_b = {}
 150
 151        # get undefined items before the commit
 152        reset(commit_a)
 153        undefined_a, _ = check_symbols(args.ignore)
 154
 155        # get undefined items for the commit
 156        reset(commit_b)
 157        undefined_b, defined = check_symbols(args.ignore)
 158
 159        # report cases that are present for the commit but not before
 160        for symbol in sorted(undefined_b):
 161            # symbol has not been undefined before
 162            if symbol not in undefined_a:
 163                files = sorted(undefined_b.get(symbol))
 164                undefined[symbol] = files
 165            # check if there are new files that reference the undefined symbol
 166            else:
 167                files = sorted(undefined_b.get(symbol) -
 168                               undefined_a.get(symbol))
 169                if files:
 170                    undefined[symbol] = files
 171
 172        # reset to head
 173        reset(head)
 174
 175    # default to check the entire tree
 176    else:
 177        undefined, defined = check_symbols(args.ignore)
 178
 179    # now print the output
 180    for symbol in sorted(undefined):
 181        print(red(symbol))
 182
 183        files = sorted(undefined.get(symbol))
 184        print("%s: %s" % (yel("Referencing files"), ", ".join(files)))
 185
 186        sims = find_sims(symbol, args.ignore, defined)
 187        sims_out = yel("Similar symbols")
 188        if sims:
 189            print("%s: %s" % (sims_out, ', '.join(sims)))
 190        else:
 191            print("%s: %s" % (sims_out, "no similar symbols found"))
 192
 193        if args.find:
 194            print("%s:" % yel("Commits changing symbol"))
 195            commits = find_commits(symbol, args.diff)
 196            if commits:
 197                for commit in commits:
 198                    commit = commit.split(" ", 1)
 199                    print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1]))
 200            else:
 201                print("\t- no commit found")
 202        print()  # new line
 203
 204
 205def reset(commit):
 206    """Reset current git tree to %commit."""
 207    execute(["git", "reset", "--hard", commit])
 208
 209
 210def yel(string):
 211    """
 212    Color %string yellow.
 213    """
 214    return "\033[33m%s\033[0m" % string if COLOR else string
 215
 216
 217def red(string):
 218    """
 219    Color %string red.
 220    """
 221    return "\033[31m%s\033[0m" % string if COLOR else string
 222
 223
 224def execute(cmd):
 225    """Execute %cmd and return stdout.  Exit in case of error."""
 226    try:
 227        stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
 228        stdout = stdout.decode(errors='replace')
 229    except subprocess.CalledProcessError as fail:
 230        exit(fail)
 231    return stdout
 232
 233
 234def find_commits(symbol, diff):
 235    """Find commits changing %symbol in the given range of %diff."""
 236    commits = execute(["git", "log", "--pretty=oneline",
 237                       "--abbrev-commit", "-G",
 238                       symbol, diff])
 239    return [x for x in commits.split("\n") if x]
 240
 241
 242def tree_is_dirty():
 243    """Return true if the current working tree is dirty (i.e., if any file has
 244    been added, deleted, modified, renamed or copied but not committed)."""
 245    stdout = execute(["git", "status", "--porcelain"])
 246    for line in stdout:
 247        if re.findall(r"[URMADC]{1}", line[:2]):
 248            return True
 249    return False
 250
 251
 252def get_head():
 253    """Return commit hash of current HEAD."""
 254    stdout = execute(["git", "rev-parse", "HEAD"])
 255    return stdout.strip('\n')
 256
 257
 258def partition(lst, size):
 259    """Partition list @lst into eveni-sized lists of size @size."""
 260    return [lst[i::size] for i in range(size)]
 261
 262
 263def init_worker():
 264    """Set signal handler to ignore SIGINT."""
 265    signal.signal(signal.SIGINT, signal.SIG_IGN)
 266
 267
 268def find_sims(symbol, ignore, defined=[]):
 269    """Return a list of max. ten Kconfig symbols that are string-similar to
 270    @symbol."""
 271    if defined:
 272        return difflib.get_close_matches(symbol, set(defined), 10)
 273
 274    pool = Pool(cpu_count(), init_worker)
 275    kfiles = []
 276    for gitfile in get_files():
 277        if REGEX_FILE_KCONFIG.match(gitfile):
 278            kfiles.append(gitfile)
 279
 280    arglist = []
 281    for part in partition(kfiles, cpu_count()):
 282        arglist.append((part, ignore))
 283
 284    for res in pool.map(parse_kconfig_files, arglist):
 285        defined.extend(res[0])
 286
 287    return difflib.get_close_matches(symbol, set(defined), 10)
 288
 289
 290def get_files():
 291    """Return a list of all files in the current git directory."""
 292    # use 'git ls-files' to get the worklist
 293    stdout = execute(["git", "ls-files"])
 294    if len(stdout) > 0 and stdout[-1] == "\n":
 295        stdout = stdout[:-1]
 296
 297    files = []
 298    for gitfile in stdout.rsplit("\n"):
 299        if ".git" in gitfile or "ChangeLog" in gitfile or      \
 300                ".log" in gitfile or os.path.isdir(gitfile) or \
 301                gitfile.startswith("tools/"):
 302            continue
 303        files.append(gitfile)
 304    return files
 305
 306
 307def check_symbols(ignore):
 308    """Find undefined Kconfig symbols and return a dict with the symbol as key
 309    and a list of referencing files as value.  Files matching %ignore are not
 310    checked for undefined symbols."""
 311    pool = Pool(cpu_count(), init_worker)
 312    try:
 313        return check_symbols_helper(pool, ignore)
 314    except KeyboardInterrupt:
 315        pool.terminate()
 316        pool.join()
 317        sys.exit(1)
 318
 319
 320def check_symbols_helper(pool, ignore):
 321    """Helper method for check_symbols().  Used to catch keyboard interrupts in
 322    check_symbols() in order to properly terminate running worker processes."""
 323    source_files = []
 324    kconfig_files = []
 325    defined_symbols = []
 326    referenced_symbols = dict()  # {file: [symbols]}
 327
 328    for gitfile in get_files():
 329        if REGEX_FILE_KCONFIG.match(gitfile):
 330            kconfig_files.append(gitfile)
 331        else:
 332            if ignore and not re.match(ignore, gitfile):
 333                continue
 334            # add source files that do not match the ignore pattern
 335            source_files.append(gitfile)
 336
 337    # parse source files
 338    arglist = partition(source_files, cpu_count())
 339    for res in pool.map(parse_source_files, arglist):
 340        referenced_symbols.update(res)
 341
 342    # parse kconfig files
 343    arglist = []
 344    for part in partition(kconfig_files, cpu_count()):
 345        arglist.append((part, ignore))
 346    for res in pool.map(parse_kconfig_files, arglist):
 347        defined_symbols.extend(res[0])
 348        referenced_symbols.update(res[1])
 349    defined_symbols = set(defined_symbols)
 350
 351    # inverse mapping of referenced_symbols to dict(symbol: [files])
 352    inv_map = dict()
 353    for _file, symbols in referenced_symbols.items():
 354        for symbol in symbols:
 355            inv_map[symbol] = inv_map.get(symbol, set())
 356            inv_map[symbol].add(_file)
 357    referenced_symbols = inv_map
 358
 359    undefined = {}  # {symbol: [files]}
 360    for symbol in sorted(referenced_symbols):
 361        # filter some false positives
 362        if symbol == "FOO" or symbol == "BAR" or \
 363                symbol == "FOO_BAR" or symbol == "XXX":
 364            continue
 365        if symbol not in defined_symbols:
 366            if symbol.endswith("_MODULE"):
 367                # avoid false positives for kernel modules
 368                if symbol[:-len("_MODULE")] in defined_symbols:
 369                    continue
 370            undefined[symbol] = referenced_symbols.get(symbol)
 371    return undefined, defined_symbols
 372
 373
 374def parse_source_files(source_files):
 375    """Parse each source file in @source_files and return dictionary with source
 376    files as keys and lists of references Kconfig symbols as values."""
 377    referenced_symbols = dict()
 378    for sfile in source_files:
 379        referenced_symbols[sfile] = parse_source_file(sfile)
 380    return referenced_symbols
 381
 382
 383def parse_source_file(sfile):
 384    """Parse @sfile and return a list of referenced Kconfig symbols."""
 385    lines = []
 386    references = []
 387
 388    if not os.path.exists(sfile):
 389        return references
 390
 391    with open(sfile, "r", encoding='utf-8', errors='replace') as stream:
 392        lines = stream.readlines()
 393
 394    for line in lines:
 395        if "CONFIG_" not in line:
 396            continue
 397        symbols = REGEX_SOURCE_SYMBOL.findall(line)
 398        for symbol in symbols:
 399            if not REGEX_FILTER_SYMBOLS.search(symbol):
 400                continue
 401            references.append(symbol)
 402
 403    return references
 404
 405
 406def get_symbols_in_line(line):
 407    """Return mentioned Kconfig symbols in @line."""
 408    return REGEX_SYMBOL.findall(line)
 409
 410
 411def parse_kconfig_files(args):
 412    """Parse kconfig files and return tuple of defined and references Kconfig
 413    symbols.  Note, @args is a tuple of a list of files and the @ignore
 414    pattern."""
 415    kconfig_files = args[0]
 416    ignore = args[1]
 417    defined_symbols = []
 418    referenced_symbols = dict()
 419
 420    for kfile in kconfig_files:
 421        defined, references = parse_kconfig_file(kfile)
 422        defined_symbols.extend(defined)
 423        if ignore and re.match(ignore, kfile):
 424            # do not collect references for files that match the ignore pattern
 425            continue
 426        referenced_symbols[kfile] = references
 427    return (defined_symbols, referenced_symbols)
 428
 429
 430def parse_kconfig_file(kfile):
 431    """Parse @kfile and update symbol definitions and references."""
 432    lines = []
 433    defined = []
 434    references = []
 435    skip = False
 436
 437    if not os.path.exists(kfile):
 438        return defined, references
 439
 440    with open(kfile, "r", encoding='utf-8', errors='replace') as stream:
 441        lines = stream.readlines()
 442
 443    for i in range(len(lines)):
 444        line = lines[i]
 445        line = line.strip('\n')
 446        line = line.split("#")[0]  # ignore comments
 447
 448        if REGEX_KCONFIG_DEF.match(line):
 449            symbol_def = REGEX_KCONFIG_DEF.findall(line)
 450            defined.append(symbol_def[0])
 451            skip = False
 452        elif REGEX_KCONFIG_HELP.match(line):
 453            skip = True
 454        elif skip:
 455            # ignore content of help messages
 456            pass
 457        elif REGEX_KCONFIG_STMT.match(line):
 458            line = REGEX_QUOTES.sub("", line)
 459            symbols = get_symbols_in_line(line)
 460            # multi-line statements
 461            while line.endswith("\\"):
 462                i += 1
 463                line = lines[i]
 464                line = line.strip('\n')
 465                symbols.extend(get_symbols_in_line(line))
 466            for symbol in set(symbols):
 467                if REGEX_NUMERIC.match(symbol):
 468                    # ignore numeric values
 469                    continue
 470                references.append(symbol)
 471
 472    return defined, references
 473
 474
 475if __name__ == "__main__":
 476    main()
 477