blob: 13a48edfdeb6c74b3920579ad0b95b8a1c16262f [file] [log] [blame]
Peter Wenf1c4cb02021-07-06 17:33:101#!/usr/bin/env vpython3
Mohamed Heikal255c1a22018-10-04 20:41:032#
Avi Drissman73a09d12022-09-08 20:33:383# Copyright 2018 The Chromium Authors
Mohamed Heikal255c1a22018-10-04 20:41:034# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7import argparse
8import collections
Peter Wen7e210b22021-06-29 14:37:369import functools
Mohamed Heikal255c1a22018-10-04 20:41:0310import logging
11import re
12import subprocess
13import sys
14
15DEX_CLASS_NAME_RE = re.compile(r'\'L(?P<class_name>[^;]+);\'')
16DEX_METHOD_NAME_RE = re.compile(r'\'(?P<method_name>[^\']+)\'')
17DEX_METHOD_TYPE_RE = re.compile( # type descriptor method signature re
18 r'\''
19 r'\('
20 r'(?P<method_params>[^)]*)'
21 r'\)'
22 r'(?P<method_return_type>[^\']+)'
23 r'\'')
24DEX_METHOD_LINE_NR_RE = re.compile(r'line=(?P<line_number>\d+)')
25
26PROFILE_METHOD_RE = re.compile(
27 r'(?P<tags>[HSP]+)' # tags such as H/S/P
28 r'(?P<class_name>L[^;]+;)' # class name in type descriptor format
29 r'->(?P<method_name>[^(]+)'
30 r'\((?P<method_params>[^)]*)\)'
31 r'(?P<method_return_type>.+)')
32
33PROGUARD_CLASS_MAPPING_RE = re.compile(
34 r'(?P<original_name>[^ ]+)'
35 r' -> '
36 r'(?P<obfuscated_name>[^:]+):')
37PROGUARD_METHOD_MAPPING_RE = re.compile(
38 # line_start:line_end: (optional)
39 r'((?P<line_start>\d+):(?P<line_end>\d+):)?'
40 r'(?P<return_type>[^ ]+)' # original method return type
41 # original method class name (if exists)
42 r' (?:(?P<original_method_class>[a-zA-Z_\d.$]+)\.)?'
43 r'(?P<original_method_name>[^.\(]+)'
44 r'\((?P<params>[^\)]*)\)' # original method params
45 r'(?:[^ ]*)' # original method line numbers (ignored)
46 r' -> '
47 r'(?P<obfuscated_name>.+)') # obfuscated method name
48
49TYPE_DESCRIPTOR_RE = re.compile(
50 r'(?P<brackets>\[*)'
51 r'(?:'
52 r'(?P<class_name>L[^;]+;)'
53 r'|'
54 r'[VZBSCIJFD]'
55 r')')
56
57DOT_NOTATION_MAP = {
58 '': '',
59 'boolean': 'Z',
60 'byte': 'B',
61 'void': 'V',
62 'short': 'S',
63 'char': 'C',
64 'int': 'I',
65 'long': 'J',
66 'float': 'F',
67 'double': 'D'
68}
69
Peter Wen7e210b22021-06-29 14:37:3670
71@functools.total_ordering
Yoshisato Yanagisawa9f477792021-12-09 00:00:2572class Method:
Mohamed Heikal255c1a22018-10-04 20:41:0373 def __init__(self, name, class_name, param_types=None, return_type=None):
74 self.name = name
75 self.class_name = class_name
76 self.param_types = param_types
77 self.return_type = return_type
78
79 def __str__(self):
80 return '{}->{}({}){}'.format(self.class_name, self.name,
81 self.param_types or '', self.return_type or '')
82
83 def __repr__(self):
84 return 'Method<{}->{}({}){}>'.format(self.class_name, self.name,
85 self.param_types or '', self.return_type or '')
86
Peter Wen7e210b22021-06-29 14:37:3687 @staticmethod
88 def serialize(method):
89 return (method.class_name, method.name, method.param_types,
90 method.return_type)
91
92 def __eq__(self, other):
93 return self.serialize(self) == self.serialize(other)
94
Peter Wen7e210b22021-06-29 14:37:3695 def __lt__(self, other):
96 return self.serialize(self) < self.serialize(other)
Mohamed Heikal255c1a22018-10-04 20:41:0397
98 def __hash__(self):
99 # only hash name and class_name since other fields may not be set yet.
100 return hash((self.name, self.class_name))
101
102
Yoshisato Yanagisawa9f477792021-12-09 00:00:25103class Class:
Mohamed Heikal255c1a22018-10-04 20:41:03104 def __init__(self, name):
105 self.name = name
106 self._methods = []
107
108 def AddMethod(self, method, line_numbers):
109 self._methods.append((method, set(line_numbers)))
110
111 def FindMethodsAtLine(self, method_name, line_start, line_end=None):
112 """Searches through dex class for a method given a name and line numbers
113
114 The dex maps methods to line numbers, this method, given the a method name
115 in this class as well as a start line and an optional end line (which act as
116 hints as to which function in the class is being looked for), returns a list
117 of possible matches (or none if none are found).
118
119 Args:
120 method_name: name of method being searched for
121 line_start: start of hint range for lines in this method
122 line_end: end of hint range for lines in this method (optional)
123
124 Returns:
125 A list of Method objects that could match the hints given, or None if no
126 method is found.
127 """
128 found_methods = []
129 if line_end is None:
130 hint_lines = set([line_start])
131 else:
132 hint_lines = set(range(line_start, line_end+1))
133
134 named_methods = [(method, l) for method, l in self._methods
135 if method.name == method_name]
136
137 if len(named_methods) == 1:
138 return [method for method, l in named_methods]
139 if len(named_methods) == 0:
140 return None
141
142 for method, line_numbers in named_methods:
143 if not hint_lines.isdisjoint(line_numbers):
144 found_methods.append(method)
145
146 if len(found_methods) > 0:
147 if len(found_methods) > 1:
148 logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
149 found_methods, hint_lines, self.name)
150 return found_methods
151
152 for method, line_numbers in named_methods:
153 if (max(hint_lines) >= min(line_numbers)
154 and min(hint_lines) <= max(line_numbers)):
155 found_methods.append(method)
156
157 if len(found_methods) > 0:
158 if len(found_methods) > 1:
159 logging.warning('ambigous methods in dex %s at lines %s in class "%s"',
160 found_methods, hint_lines, self.name)
161 return found_methods
Yoshisato Yanagisawa0f42f102021-11-30 14:04:37162 logging.warning(
163 'No method named "%s" in class "%s" is '
164 'mapped to lines %s', method_name, self.name, hint_lines)
165 return None
Mohamed Heikal255c1a22018-10-04 20:41:03166
167
Yoshisato Yanagisawa9f477792021-12-09 00:00:25168class Profile:
Mohamed Heikal255c1a22018-10-04 20:41:03169 def __init__(self):
170 # {Method: set(char)}
171 self._methods = collections.defaultdict(set)
172 self._classes = []
173
174 def AddMethod(self, method, tags):
175 for tag in tags:
176 self._methods[method].add(tag)
177
178 def AddClass(self, cls):
179 self._classes.append(cls)
180
181 def WriteToFile(self, path):
182 with open(path, 'w') as output_profile:
183 for cls in sorted(self._classes):
184 output_profile.write(cls + '\n')
185 for method in sorted(self._methods):
186 tags = sorted(self._methods[method])
187 line = '{}{}\n'.format(''.join(tags), str(method))
188 output_profile.write(line)
189
190
Yoshisato Yanagisawa9f477792021-12-09 00:00:25191class ProguardMapping:
Mohamed Heikal255c1a22018-10-04 20:41:03192 def __init__(self):
193 # {Method: set(Method)}
194 self._method_mapping = collections.defaultdict(set)
195 # {String: String} String is class name in type descriptor format
196 self._class_mapping = dict()
197
198 def AddMethodMapping(self, from_method, to_method):
199 self._method_mapping[from_method].add(to_method)
200
201 def AddClassMapping(self, from_class, to_class):
202 self._class_mapping[from_class] = to_class
203
204 def GetMethodMapping(self, from_method):
205 return self._method_mapping.get(from_method)
206
207 def GetClassMapping(self, from_class):
208 return self._class_mapping.get(from_class, from_class)
209
210 def MapTypeDescriptor(self, type_descriptor):
211 match = TYPE_DESCRIPTOR_RE.search(type_descriptor)
212 assert match is not None
213 class_name = match.group('class_name')
214 if class_name is not None:
215 return match.group('brackets') + self.GetClassMapping(class_name)
216 # just a native type, return as is
217 return match.group()
218
219 def MapTypeDescriptorList(self, type_descriptor_list):
220 return TYPE_DESCRIPTOR_RE.sub(
221 lambda match: self.MapTypeDescriptor(match.group()),
222 type_descriptor_list)
223
224
225class MalformedLineException(Exception):
226 def __init__(self, message, line_number):
Yoshisato Yanagisawa9f477792021-12-09 00:00:25227 super().__init__(message)
Yoshisato Yanagisawac62a33b2021-12-14 22:30:05228 self.message = message
Mohamed Heikal255c1a22018-10-04 20:41:03229 self.line_number = line_number
230
231 def __str__(self):
232 return self.message + ' at line {}'.format(self.line_number)
233
234
235class MalformedProguardMappingException(MalformedLineException):
236 pass
237
238
239class MalformedProfileException(MalformedLineException):
240 pass
241
242
243def _RunDexDump(dexdump_path, dex_file_path):
Peter Wen7e210b22021-06-29 14:37:36244 return subprocess.check_output([dexdump_path,
245 dex_file_path]).decode('utf-8').splitlines()
Mohamed Heikal255c1a22018-10-04 20:41:03246
247
248def _ReadFile(file_path):
249 with open(file_path, 'r') as f:
250 return f.readlines()
251
252
253def _ToTypeDescriptor(dot_notation):
254 """Parses a dot notation type and returns it in type descriptor format
255
256 eg:
257 org.chromium.browser.ChromeActivity -> Lorg/chromium/browser/ChromeActivity;
258 boolean -> Z
259 int[] -> [I
260
261 Args:
262 dot_notation: trimmed string with a single type in dot notation format
263
264 Returns:
265 A string with the type in type descriptor format
266 """
267 dot_notation = dot_notation.strip()
268 prefix = ''
269 while dot_notation.endswith('[]'):
270 prefix += '['
271 dot_notation = dot_notation[:-2]
272 if dot_notation in DOT_NOTATION_MAP:
273 return prefix + DOT_NOTATION_MAP[dot_notation]
274 return prefix + 'L' + dot_notation.replace('.', '/') + ';'
275
276
277def _DotNotationListToTypeDescriptorList(dot_notation_list_string):
278 """Parses a param list of dot notation format and returns it in type
279 descriptor format
280
281 eg:
282 org.chromium.browser.ChromeActivity,boolean,int[] ->
283 Lorg/chromium/browser/ChromeActivity;Z[I
284
285 Args:
286 dot_notation_list_string: single string with multiple comma separated types
287 in dot notation format
288
289 Returns:
290 A string with the param list in type descriptor format
291 """
292 return ''.join(_ToTypeDescriptor(param) for param in
293 dot_notation_list_string.split(','))
294
295
296def ProcessDex(dex_dump):
297 """Parses dexdump output returning a dict of class names to Class objects
298
299 Parses output of the dexdump command on a dex file and extracts information
300 about classes and their respective methods and which line numbers a method is
301 mapped to.
302
303 Methods that are not mapped to any line number are ignored and not listed
304 inside their respective Class objects.
305
306 Args:
307 dex_dump: An array of lines of dexdump output
308
309 Returns:
310 A dict that maps from class names in type descriptor format (but without the
311 surrounding 'L' and ';') to Class objects.
312 """
313 # class_name: Class
314 classes_by_name = {}
315 current_class = None
316 current_method = None
317 reading_positions = False
318 reading_methods = False
319 method_line_numbers = []
320 for line in dex_dump:
321 line = line.strip()
322 if line.startswith('Class descriptor'):
323 # New class started, no longer reading methods.
324 reading_methods = False
325 current_class = Class(DEX_CLASS_NAME_RE.search(line).group('class_name'))
326 classes_by_name[current_class.name] = current_class
327 elif (line.startswith('Direct methods')
328 or line.startswith('Virtual methods')):
329 reading_methods = True
330 elif reading_methods and line.startswith('name'):
331 assert current_class is not None
332 current_method = Method(
333 DEX_METHOD_NAME_RE.search(line).group('method_name'),
334 "L" + current_class.name + ";")
335 elif reading_methods and line.startswith('type'):
336 assert current_method is not None
337 match = DEX_METHOD_TYPE_RE.search(line)
338 current_method.param_types = match.group('method_params')
339 current_method.return_type = match.group('method_return_type')
340 elif line.startswith('positions'):
341 assert reading_methods
342 reading_positions = True
343 method_line_numbers = []
344 elif reading_positions and line.startswith('0x'):
345 line_number = DEX_METHOD_LINE_NR_RE.search(line).group('line_number')
346 method_line_numbers.append(int(line_number))
347 elif reading_positions and line.startswith('locals'):
348 if len(method_line_numbers) > 0:
349 current_class.AddMethod(current_method, method_line_numbers)
350 # finished reading method line numbers
351 reading_positions = False
352 return classes_by_name
353
354
355def ProcessProguardMapping(proguard_mapping_lines, dex):
356 """Parses a proguard mapping file
357
358 This takes proguard mapping file lines and then uses the obfuscated dex to
359 create a mapping of unobfuscated methods to obfuscated ones and vice versa.
360
361 The dex is used because the proguard mapping file only has the name of the
362 obfuscated methods but not their signature, thus the dex is read to look up
363 which method with a specific name was mapped to the lines mentioned in the
364 proguard mapping file.
365
366 Args:
367 proguard_mapping_lines: Array of strings, each is a line from the proguard
368 mapping file (in order).
369 dex: a dict of class name (in type descriptor format but without the
370 enclosing 'L' and ';') to a Class object.
371 Returns:
372 Two dicts the first maps from obfuscated methods to a set of non-obfuscated
373 ones. It also maps the obfuscated class names to original class names, both
374 in type descriptor format (with the enclosing 'L' and ';')
375 """
376 mapping = ProguardMapping()
377 reverse_mapping = ProguardMapping()
378 to_be_obfuscated = []
379 current_class_orig = None
380 current_class_obfs = None
381 for index, line in enumerate(proguard_mapping_lines):
382 if line.strip() == '':
383 continue
384 if not line.startswith(' '):
385 match = PROGUARD_CLASS_MAPPING_RE.search(line)
386 if match is None:
387 raise MalformedProguardMappingException(
388 'Malformed class mapping', index)
389 current_class_orig = match.group('original_name')
390 current_class_obfs = match.group('obfuscated_name')
391 mapping.AddClassMapping(_ToTypeDescriptor(current_class_obfs),
392 _ToTypeDescriptor(current_class_orig))
393 reverse_mapping.AddClassMapping(_ToTypeDescriptor(current_class_orig),
394 _ToTypeDescriptor(current_class_obfs))
395 continue
396
397 assert current_class_orig is not None
398 assert current_class_obfs is not None
399 line = line.strip()
400 match = PROGUARD_METHOD_MAPPING_RE.search(line)
401 # check if is a method mapping (we ignore field mappings)
402 if match is not None:
403 # check if this line is an inlining by reading ahead 1 line.
404 if index + 1 < len(proguard_mapping_lines):
405 next_match = PROGUARD_METHOD_MAPPING_RE.search(
406 proguard_mapping_lines[index+1].strip())
407 if (next_match and match.group('line_start') is not None
408 and next_match.group('line_start') == match.group('line_start')
409 and next_match.group('line_end') == match.group('line_end')):
410 continue # This is an inlining, skip
411
412 original_method = Method(
413 match.group('original_method_name'),
414 _ToTypeDescriptor(
415 match.group('original_method_class') or current_class_orig),
416 _DotNotationListToTypeDescriptorList(match.group('params')),
417 _ToTypeDescriptor(match.group('return_type')))
418
419 if match.group('line_start') is not None:
420 obfs_methods = (dex[current_class_obfs.replace('.', '/')]
421 .FindMethodsAtLine(
422 match.group('obfuscated_name'),
423 int(match.group('line_start')),
424 int(match.group('line_end'))))
425
426 if obfs_methods is None:
427 continue
428
429 for obfs_method in obfs_methods:
430 mapping.AddMethodMapping(obfs_method, original_method)
431 reverse_mapping.AddMethodMapping(original_method, obfs_method)
432 else:
433 to_be_obfuscated.append(
434 (original_method, match.group('obfuscated_name')))
435
436 for original_method, obfuscated_name in to_be_obfuscated:
437 obfuscated_method = Method(
438 obfuscated_name,
439 reverse_mapping.GetClassMapping(original_method.class_name),
440 reverse_mapping.MapTypeDescriptorList(original_method.param_types),
441 reverse_mapping.MapTypeDescriptor(original_method.return_type))
442 mapping.AddMethodMapping(obfuscated_method, original_method)
443 reverse_mapping.AddMethodMapping(original_method, obfuscated_method)
444 return mapping, reverse_mapping
445
446
447def ProcessProfile(input_profile, proguard_mapping):
448 """Parses an android profile and uses the proguard mapping to (de)obfuscate it
449
450 This takes the android profile lines and for each method or class for the
451 profile, it uses the mapping to either obfuscate or deobfuscate (based on the
452 provided mapping) and returns a Profile object that stores this information.
453
454 Args:
455 input_profile: array of lines of the input profile
456 proguard_mapping: a proguard mapping that would map from the classes and
457 methods in the input profile to the classes and methods
458 that should be in the output profile.
459
460 Returns:
461 A Profile object that stores the information (ie list of mapped classes and
462 methods + tags)
463 """
464 profile = Profile()
465 for index, line in enumerate(input_profile):
466 line = line.strip()
467 if line.startswith('L'):
468 profile.AddClass(proguard_mapping.GetClassMapping(line))
469 continue
470 match = PROFILE_METHOD_RE.search(line)
471 if not match:
472 raise MalformedProfileException("Malformed line", index)
473
474 method = Method(
475 match.group('method_name'),
476 match.group('class_name'),
477 match.group('method_params'),
478 match.group('method_return_type'))
479
480 mapped_methods = proguard_mapping.GetMethodMapping(method)
481 if mapped_methods is None:
482 logging.warning('No method matching "%s" has been found in the proguard '
483 'mapping file', method)
484 continue
485
486 for original_method in mapped_methods:
487 profile.AddMethod(original_method, match.group('tags'))
488
489 return profile
490
491
Matthew Cary9e10a672018-12-12 12:31:58492def ObfuscateProfile(nonobfuscated_profile, dex_file, proguard_mapping,
493 dexdump_path, output_filename):
494 """Helper method for obfuscating a profile.
495
496 Args:
497 nonobfuscated_profile: a profile with nonobfuscated symbols.
498 dex_file: path to the dex file matching the mapping.
499 proguard_mapping: a mapping from nonobfuscated to obfuscated symbols used
500 in the dex file.
501 dexdump_path: path to the dexdump utility.
502 output_filename: output filename in which to write the obfuscated profile.
503 """
504 dexinfo = ProcessDex(_RunDexDump(dexdump_path, dex_file))
505 _, reverse_mapping = ProcessProguardMapping(
506 _ReadFile(proguard_mapping), dexinfo)
507 obfuscated_profile = ProcessProfile(
508 _ReadFile(nonobfuscated_profile), reverse_mapping)
509 obfuscated_profile.WriteToFile(output_filename)
510
511
Mohamed Heikal255c1a22018-10-04 20:41:03512def main(args):
513 parser = argparse.ArgumentParser()
514 parser.add_argument(
515 '--dexdump-path',
516 required=True,
517 help='Path to dexdump binary.')
518 parser.add_argument(
519 '--dex-path',
520 required=True,
521 help='Path to dex file corresponding to the proguard mapping file.')
522 parser.add_argument(
523 '--proguard-mapping-path',
524 required=True,
525 help='Path to input proguard mapping file corresponding to the dex file.')
526 parser.add_argument(
527 '--output-profile-path',
528 required=True,
529 help='Path to output profile.')
530 parser.add_argument(
531 '--input-profile-path',
532 required=True,
533 help='Path to output profile.')
534 parser.add_argument(
535 '--verbose',
536 action='store_true',
537 default=False,
538 help='Print verbose output.')
539 obfuscation = parser.add_mutually_exclusive_group(required=True)
540 obfuscation.add_argument('--obfuscate', action='store_true',
541 help='Indicates to output an obfuscated profile given a deobfuscated '
542 'one.')
543 obfuscation.add_argument('--deobfuscate', dest='obfuscate',
544 action='store_false', help='Indicates to output a deobfuscated profile '
545 'given an obfuscated one.')
546 options = parser.parse_args(args)
547
548 if options.verbose:
549 log_level = logging.WARNING
550 else:
551 log_level = logging.ERROR
552 logging.basicConfig(format='%(levelname)s: %(message)s', level=log_level)
553
554 dex = ProcessDex(_RunDexDump(options.dexdump_path, options.dex_path))
555 proguard_mapping, reverse_proguard_mapping = ProcessProguardMapping(
556 _ReadFile(options.proguard_mapping_path), dex)
557 if options.obfuscate:
558 profile = ProcessProfile(
559 _ReadFile(options.input_profile_path),
560 reverse_proguard_mapping)
561 else:
562 profile = ProcessProfile(
563 _ReadFile(options.input_profile_path),
564 proguard_mapping)
565 profile.WriteToFile(options.output_profile_path)
566
567
568if __name__ == '__main__':
569 main(sys.argv[1:])