| #!/usr/bin/env python |
| # |
| # Copyright (C) 2014, Masahiro Yamada <yamada.m@jp.panasonic.com> |
| # |
| # SPDX-License-Identifier: GPL-2.0+ |
| # |
| |
| ''' |
| A tool to create/update the mailmap file |
| |
| The command 'git shortlog' summarizes git log output in a format suitable |
| for inclusion in release announcements. Each commit will be grouped by |
| author and title. |
| |
| One problem is that the authors' name and/or email address is sometimes |
| spelled differently. The .mailmap feature can be used to coalesce together |
| commits by the same persion. |
| (See 'man git-shortlog' for furthur information of this feature.) |
| |
| This tool helps to create/update the mailmap file. |
| |
| It runs 'git shortlog' internally and searches differently spelled author |
| names which share the same email address. The author name with the most |
| commits is asuumed to be a canonical real name. If the number of commits |
| from the cananonical name is equal to or greater than 'MIN_COMMITS', |
| the entry for the cananical name will be output. ('MIN_COMMITS' is used |
| here because we do not want to create a fat mailmap by adding every author |
| with only a few commits.) |
| |
| If there exists a mailmap file specified by the mailmap.file configuration |
| options or '.mailmap' at the toplevel of the repository, it is used as |
| a base file. (The mailmap.file configuration takes precedence over the |
| '.mailmap' file if both exist.) |
| |
| The base file and the newly added entries are merged together and sorted |
| alphabetically (but the comment block is kept untouched), and then printed |
| to standard output. |
| |
| Usage |
| ----- |
| |
| scripts/mailmapper |
| |
| prints the mailmapping to standard output. |
| |
| scripts/mailmapper > tmp; mv tmp .mailmap |
| |
| will be useful for updating '.mailmap' file. |
| ''' |
| |
| import sys |
| import os |
| import subprocess |
| |
| # The entries only for the canonical names with MIN_COMMITS or more commits. |
| # This limitation is used so as not to create a too big mailmap file. |
| MIN_COMMITS = 50 |
| |
| try: |
| toplevel = subprocess.check_output(['git', 'rev-parse', '--show-toplevel']) |
| except subprocess.CalledProcessError: |
| sys.exit('Please run in a git repository.') |
| |
| # strip '\n' |
| toplevel = toplevel.rstrip() |
| |
| # Change the current working directory to the toplevel of the respository |
| # for our easier life. |
| os.chdir(toplevel) |
| |
| # First, create 'auther name' vs 'number of commits' database. |
| # We assume the name with the most commits as the canonical real name. |
| shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n']) |
| |
| commits_per_name = {} |
| |
| for line in shortlog.splitlines(): |
| try: |
| commits, name = line.split(None, 1) |
| except ValueError: |
| # ignore lines with an empty author name |
| pass |
| commits_per_name[name] = int(commits) |
| |
| # Next, coalesce the auther names with the same email address |
| shortlog = subprocess.check_output(['git', 'shortlog', '-s', '-n', '-e']) |
| |
| mail_vs_name = {} |
| output = {} |
| |
| for line in shortlog.splitlines(): |
| # tmp, mail = line.rsplit(None, 1) is not safe |
| # because weird email addresses might include whitespaces |
| tmp, mail = line.split('<') |
| mail = '<' + mail.rstrip() |
| try: |
| _, name = tmp.rstrip().split(None, 1) |
| except ValueError: |
| # author name is empty |
| name = '' |
| if mail in mail_vs_name: |
| # another name for the same email address |
| prev_name = mail_vs_name[mail] |
| # Take the name with more commits |
| major_name = sorted([prev_name, name], |
| key=lambda x: commits_per_name[x] if x else 0)[1] |
| mail_vs_name[mail] = major_name |
| if commits_per_name[major_name] > MIN_COMMITS: |
| output[mail] = major_name |
| else: |
| mail_vs_name[mail] = name |
| |
| # [1] If there exists a mailmap file at the location pointed to |
| # by the mailmap.file configuration option, update it. |
| # [2] If the file .mailmap exists at the toplevel of the repository, update it. |
| # [3] Otherwise, create a new mailmap file. |
| mailmap_files = [] |
| |
| try: |
| config_mailmap = subprocess.check_output(['git', 'config', 'mailmap.file']) |
| except subprocess.CalledProcessError: |
| config_mailmap = '' |
| |
| config_mailmap = config_mailmap.rstrip() |
| if config_mailmap: |
| mailmap_files.append(config_mailmap) |
| |
| mailmap_files.append('.mailmap') |
| |
| infile = None |
| |
| for map_file in mailmap_files: |
| try: |
| infile = open(map_file) |
| except: |
| # Failed to open. Try next. |
| continue |
| break |
| |
| comment_block = [] |
| output_lines = [] |
| |
| if infile: |
| for line in infile: |
| if line[0] == '#' or line[0] == '\n': |
| comment_block.append(line) |
| else: |
| output_lines.append(line) |
| break |
| for line in infile: |
| output_lines.append(line) |
| infile.close() |
| |
| for mail, name in output.items(): |
| output_lines.append(name + ' ' + mail + '\n') |
| |
| output_lines.sort() |
| |
| sys.stdout.write(''.join(comment_block + output_lines)) |