uboot/scripts/mailmapper
<<
>>
Prefs
   1#!/usr/bin/env python3
   2# SPDX-License-Identifier: GPL-2.0+
   3#
   4# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com>
   5
   6'''
   7A tool to create/update the mailmap file
   8
   9The command 'git shortlog' summarizes git log output in a format suitable
  10for inclusion in release announcements. Each commit will be grouped by
  11author and title.
  12
  13One problem is that the authors' name and/or email address is sometimes
  14spelled differently. The .mailmap feature can be used to coalesce together
  15commits by the same persion.
  16(See 'man git-shortlog' for furthur information of this feature.)
  17
  18This tool helps to create/update the mailmap file.
  19
  20It runs 'git shortlog' internally and searches differently spelled author
  21names which share the same email address. The author name with the most
  22commits is asuumed to be a canonical real name. If the number of commits
  23from the cananonical name is equal to or greater than 'MIN_COMMITS',
  24the entry for the cananical name will be output. ('MIN_COMMITS' is used
  25here because we do not want to create a fat mailmap by adding every author
  26with only a few commits.)
  27
  28If there exists a mailmap file specified by the mailmap.file configuration
  29options or '.mailmap' at the toplevel of the repository, it is used as
  30a base file. (The mailmap.file configuration takes precedence over the
  31'.mailmap' file if both exist.)
  32
  33The base file and the newly added entries are merged together and sorted
  34alphabetically (but the comment block is kept untouched), and then printed
  35to standard output.
  36
  37Usage
  38-----
  39
  40  scripts/mailmapper
  41
  42prints the mailmapping to standard output.
  43
  44  scripts/mailmapper > tmp; mv tmp .mailmap
  45
  46will be useful for updating '.mailmap' file.
  47'''
  48
  49import sys
  50import os
  51import subprocess
  52
  53# The entries only for the canonical names with MIN_COMMITS or more commits.
  54# This limitation is used so as not to create a too big mailmap file.
  55MIN_COMMITS = 50
  56
  57try:
  58    toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel'])
  59except subprocess.CalledProcessError:
  60    sys.exit('Please run in a git repository.')
  61
  62# strip '\n'
  63toplevel = toplevel.rstrip()
  64
  65# Change the current working directory to the toplevel of the respository
  66# for our easier life.
  67os.chdir(toplevel)
  68
  69# First, create 'auther name' vs 'number of commits' database.
  70# We assume the name with the most commits as the canonical real name.
  71shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n'])
  72
  73commits_per_name = {}
  74
  75for line in shortlog.splitlines():
  76    try:
  77        commits, name = line.split(None, 1)
  78    except ValueError:
  79        # ignore lines with an empty author name
  80        pass
  81    commits_per_name[name] = int(commits)
  82
  83# Next, coalesce the auther names with the same email address
  84shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e'])
  85
  86mail_vs_name = {}
  87output = {}
  88
  89for line in shortlog.splitlines():
  90    # tmp, mail = line.rsplit(None, 1) is not safe
  91    # because weird email addresses might include whitespaces
  92    try:
  93        line = line.decode("utf-8")
  94        tmp, mail = line.split('<')
  95        mail = '<' + mail.rstrip()
  96        _, name = tmp.rstrip().split(None, 1)
  97    except ValueError:
  98        # author name is empty
  99        name = ''
 100    if mail in mail_vs_name:
 101        # another name for the same email address
 102        prev_name = mail_vs_name[mail]
 103        # Take the name with more commits
 104        try:
 105            major_name = sorted([prev_name, name],
 106                                key=lambda x: commits_per_name[x] if x else 0)[1]
 107        except:
 108            continue
 109        mail_vs_name[mail] = major_name
 110        if commits_per_name[major_name] > MIN_COMMITS:
 111            output[mail] = major_name
 112    else:
 113        mail_vs_name[mail] = name
 114
 115# [1] If there exists a mailmap file at the location pointed to
 116#     by the mailmap.file configuration option, update it.
 117# [2] If the file .mailmap exists at the toplevel of the repository, update it.
 118# [3] Otherwise, create a new mailmap file.
 119mailmap_files = []
 120
 121try:
 122    config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file'])
 123except subprocess.CalledProcessError:
 124    config_mailmap = ''
 125
 126config_mailmap = config_mailmap.rstrip()
 127if config_mailmap:
 128    mailmap_files.append(config_mailmap)
 129
 130mailmap_files.append('.mailmap')
 131
 132infile = None
 133
 134for map_file in mailmap_files:
 135    try:
 136        infile = open(map_file)
 137    except:
 138        # Failed to open. Try next.
 139        continue
 140    break
 141
 142comment_block = []
 143output_lines = []
 144
 145if infile:
 146    for line in infile:
 147        if line[0] == '#' or line[0] == '\n':
 148            comment_block.append(line)
 149        else:
 150            output_lines.append(line)
 151            break
 152    for line in infile:
 153        output_lines.append(line)
 154    infile.close()
 155
 156for mail, name in output.items():
 157    output_lines.append(name + ' ' + mail + '\n')
 158
 159output_lines.sort()
 160
 161sys.stdout.write(''.join(comment_block + output_lines))
 162