1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0+ 3# 4# Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com> 5 6''' 7A tool to create/update the mailmap file 8 9The command 'git shortlog' summarizes git log output in a format suitable 10for inclusion in release announcements. Each commit will be grouped by 11author and title. 12 13One problem is that the authors' name and/or email address is sometimes 14spelled differently. The .mailmap feature can be used to coalesce together 15commits by the same persion. 16(See 'man git-shortlog' for furthur information of this feature.) 17 18This tool helps to create/update the mailmap file. 19 20It runs 'git shortlog' internally and searches differently spelled author 21names which share the same email address. The author name with the most 22commits is asuumed to be a canonical real name. If the number of commits 23from the cananonical name is equal to or greater than 'MIN_COMMITS', 24the entry for the cananical name will be output. ('MIN_COMMITS' is used 25here because we do not want to create a fat mailmap by adding every author 26with only a few commits.) 27 28If there exists a mailmap file specified by the mailmap.file configuration 29options or '.mailmap' at the toplevel of the repository, it is used as 30a base file. (The mailmap.file configuration takes precedence over the 31'.mailmap' file if both exist.) 32 33The base file and the newly added entries are merged together and sorted 34alphabetically (but the comment block is kept untouched), and then printed 35to standard output. 36 37Usage 38----- 39 40 scripts/mailmapper 41 42prints the mailmapping to standard output. 43 44 scripts/mailmapper > tmp; mv tmp .mailmap 45 46will be useful for updating '.mailmap' file. 47''' 48 49import sys 50import os 51import subprocess 52 53# The entries only for the canonical names with MIN_COMMITS or more commits. 54# This limitation is used so as not to create a too big mailmap file. 55MIN_COMMITS = 50 56 57try: 58 toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) 59except subprocess.CalledProcessError: 60 sys.exit('Please run in a git repository.') 61 62# strip '\n' 63toplevel = toplevel.rstrip() 64 65# Change the current working directory to the toplevel of the respository 66# for our easier life. 67os.chdir(toplevel) 68 69# First, create 'auther name' vs 'number of commits' database. 70# We assume the name with the most commits as the canonical real name. 71shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) 72 73commits_per_name = {} 74 75for line in shortlog.splitlines(): 76 try: 77 commits, name = line.split(None, 1) 78 except ValueError: 79 # ignore lines with an empty author name 80 pass 81 commits_per_name[name] = int(commits) 82 83# Next, coalesce the auther names with the same email address 84shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) 85 86mail_vs_name = {} 87output = {} 88 89for line in shortlog.splitlines(): 90 # tmp, mail = line.rsplit(None, 1) is not safe 91 # because weird email addresses might include whitespaces 92 try: 93 line = line.decode("utf-8") 94 tmp, mail = line.split('<') 95 mail = '<' + mail.rstrip() 96 _, name = tmp.rstrip().split(None, 1) 97 except ValueError: 98 # author name is empty 99 name = '' 100 if mail in mail_vs_name: 101 # another name for the same email address 102 prev_name = mail_vs_name[mail] 103 # Take the name with more commits 104 try: 105 major_name = sorted([prev_name, name], 106 key=lambda x: commits_per_name[x] if x else 0)[1] 107 except: 108 continue 109 mail_vs_name[mail] = major_name 110 if commits_per_name[major_name] > MIN_COMMITS: 111 output[mail] = major_name 112 else: 113 mail_vs_name[mail] = name 114 115# [1] If there exists a mailmap file at the location pointed to 116# by the mailmap.file configuration option, update it. 117# [2] If the file .mailmap exists at the toplevel of the repository, update it. 118# [3] Otherwise, create a new mailmap file. 119mailmap_files = [] 120 121try: 122 config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) 123except subprocess.CalledProcessError: 124 config_mailmap = '' 125 126config_mailmap = config_mailmap.rstrip() 127if config_mailmap: 128 mailmap_files.append(config_mailmap) 129 130mailmap_files.append('.mailmap') 131 132infile = None 133 134for map_file in mailmap_files: 135 try: 136 infile = open(map_file) 137 except: 138 # Failed to open. Try next. 139 continue 140 break 141 142comment_block = [] 143output_lines = [] 144 145if infile: 146 for line in infile: 147 if line[0] == '#' or line[0] == '\n': 148 comment_block.append(line) 149 else: 150 output_lines.append(line) 151 break 152 for line in infile: 153 output_lines.append(line) 154 infile.close() 155 156for mail, name in output.items(): 157 output_lines.append(name + ' ' + mail + '\n') 158 159output_lines.sort() 160 161sys.stdout.write(''.join(comment_block + output_lines)) 162