summaryrefslogtreecommitdiff
path: root/util/maint/git-patch-to-hg-patch
blob: 1b9e05d0959d03e196c8ec71fe65473ac98dfc2a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python2
#
# This file originated from the moz-git-tools repo on GitHub
# (https://github.com/mozilla/moz-git-tools), which contains the
# following LICENSE notice:
#
# <quote>
# Except for git-new-workdir, which is covered under GPLv2, the code
# in this repository is placed into the public domain via CC0.
#
# http://creativecommons.org/publicdomain/zero/1.0/legalcode
# </quote>

r"""Git format-patch to hg importable patch.

(Who knew this was so complicated?)

>>> process(StringIO('From 3ce1ccc06 Mon Sep 17 00:00:00 2001\nFrom: fromuser\nSubject: subject\n\nRest of patch.\nMore patch.\n'))
'# HG changeset patch\n# User fromuser\n\nsubject\n\nRest of patch.\nMore patch.\n'

>>> process(StringIO('From: fromuser\nSubject: A very long subject line.  Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'))
'# HG changeset patch\n# User fromuser\n\nA very long subject line.  Lorem ipsum dolor sit amet, consectetur adipiscing elit. Morbi faucibus, arcu sit amet\n\nRest of patch.\nMore patch.\n'

>>> process(StringIO('From: f\nSubject: =?UTF-8?q?Bug=20655877=20-=20Dont=20treat=20SVG=20text=20frames=20?= =?UTF-8?q?as=20being=20positioned.=20r=3D=3F?=\n\nPatch.'))
'# HG changeset patch\n# User f\n\nBug 655877 - Dont treat SVG text frames as being positioned. r=?\n\nPatch.'
"""

# Original author: bholley

import sys
import re
import fileinput
import email, email.parser, email.header, email.utils
import math
from cStringIO import StringIO
from itertools import takewhile

def decode_header(hdr_string):
  r"""Clean up weird encoding crap.

  >>> clean_header('[PATCH] =?UTF-8?q?Bug=20655877=20r=3D=3F?=')
  '[PATCH] Bug 655877 r=?'
  """
  rv = []
  hdr = email.header.Header(hdr_string, maxlinelen=float('inf'))
  for (part, encoding) in email.header.decode_header(hdr):
    if encoding is None:
      rv.append(part)
    else:
      rv.append(part.decode(encoding).encode('utf-8'))
  return ' '.join(rv)

def clean_header(hdr_string):
  r"""Transform a header split over many lines into a header split only where
  linebreaks are intended.  This is important because hg cares about the first
  line of the commit message.

  Also clean up weird encoding crap.

  >>> clean_header('Foo\n bar\n baz')
  'Foo bar baz'
  >>> clean_header('Foo\n bar\nSpam\nEggs')
  'Foo bar\nSpam\nEggs'
  """

  lines = []
  curline = ''
  for line in decode_header(hdr_string).split('\n'):
    if not line.startswith(' '):
      lines.append(curline)
      curline = ''
    curline += line
  lines.append(curline)
  return '\n'.join(lines[1:])

def process(git_patch_file):
  parser = email.parser.Parser()
  msg = parser.parse(git_patch_file)
  from_hdr = clean_header(msg['From'])
  commit_title = clean_header(msg['subject'])
  if not len(commit_title) or not len(from_hdr):
    sys.stderr.write("%s does not look like a valid git patch file, skipping\n"
                     % git_patch_file.name)
    return

  parsed_from = email.utils.parseaddr(from_hdr)
  nuke_prefix = r"\[PATCH( \d+/\d+)?\] "
  match = re.match(nuke_prefix, commit_title)
  if match:
    commit_title = commit_title[match.end():]

  patch_body = msg.get_payload()

  # git format-patch wraps the diff (including trailing whitespace):
  #   ---
  #   <diff>
  #   --
  #   2.0.3
  # This doesn't hurt parsing the diff at all, but the version number is
  # nonsense once the git specific items have been stripped
  patch_body = re.sub(r'--\s?\n[0-9\.]+\n$', '', patch_body)

  return '\n'.join(['# HG changeset patch',
                    '# User %s <%s>' % parsed_from,
                    '',
                    commit_title,
                    '',
                    patch_body])

if __name__ == "__main__":
  if len(sys.argv) > 1 and sys.argv[1] == '--test':
    import doctest
    doctest.testmod()
    sys.exit(0)

  # If there were no arguments, do stdin->stdout.
  filelist = sys.argv[1:]
  if not filelist:
    lines = process(sys.stdin)
    sys.stdout.writelines(lines)
    sys.exit(0)

  # Otherwise, we take a list of files.
  for filename in filelist:

    # Read the lines.
    f = open(filename, 'r')
    lines = process(f)
    f.close()

    # Process.

    if lines:
      # Write them back to the same file.
      f = open(filename, 'w')
      f.writelines(lines)
      f.close()