blob: f46d54c81a642b5344d439a57ff379c6cbd649f6 [file] [log] [blame]
Tien Mai597a0d1f2020-02-18 16:01:251#!/usr/bin/env python
Avi Drissman7b017a992022-09-07 15:50:382# Copyright 2020 The Chromium Authors
Tien Mai597a0d1f2020-02-18 16:01:253# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
Tien Mai597a0d1f2020-02-18 16:01:255"""Transform CBCM Takeout API Data (Python3)."""
6
Tien Mai8bcfbfc2020-06-30 16:18:167from __future__ import print_function
8from __future__ import unicode_literals
9
Tien Mai597a0d1f2020-02-18 16:01:2510import argparse
11import csv
12import json
13import sys
Tien Maif9a36f3a2020-06-19 21:28:2514import time
Tien Mai597a0d1f2020-02-18 16:01:2515
16import google_auth_httplib2
17
18from httplib2 import Http
19from google.oauth2.service_account import Credentials
Tien Mai8bcfbfc2020-06-30 16:18:1620from builtins import bytes
21from builtins import str
22from io import open
Tien Mai597a0d1f2020-02-18 16:01:2523
24
25def ComputeExtensionsList(extensions_list, data):
26 """Computes list of machines that have an extension.
27
28 This sample function processes the |data| retrieved from the Takeout API and
29 calculates the list of machines that have installed each extension listed in
30 the data.
31
32 Args:
33 extensions_list: the extension list dictionary to fill.
34 data: the data fetched from the Takeout API.
35 """
36 for device in data['browsers']:
37 if 'browsers' not in device:
38 continue
39 for browser in device['browsers']:
40 if 'profiles' not in browser:
41 continue
42 for profile in browser['profiles']:
43 if 'extensions' not in profile:
44 continue
45 for extension in profile['extensions']:
46 key = extension['extensionId']
47 if 'version' in extension:
48 key = key + ' @ ' + extension['version']
49 if key not in extensions_list:
50 current_extension = {
Tien Maif9a36f3a2020-06-19 21:28:2551 'name': extension.get('name', ''),
52 'permissions': extension.get('permissions', ''),
53 'installed': set(),
54 'disabled': set(),
55 'forced': set()
Tien Mai597a0d1f2020-02-18 16:01:2556 }
57 else:
58 current_extension = extensions_list[key]
59
60 machine_name = device['machineName']
61 current_extension['installed'].add(machine_name)
Tien Maif9a36f3a2020-06-19 21:28:2562 if extension.get('installType', '') == 'ADMIN':
Tien Mai597a0d1f2020-02-18 16:01:2563 current_extension['forced'].add(machine_name)
Tien Maif9a36f3a2020-06-19 21:28:2564 if extension.get('disabled', False):
Tien Mai597a0d1f2020-02-18 16:01:2565 current_extension['disabled'].add(machine_name)
66
67 extensions_list[key] = current_extension
68
69
70def DictToList(data, key_name='id'):
71 """Converts a dict into a list.
72
73 The value of each member of |data| must also be a dict. The original key for
74 the value will be inlined into the value, under the |key_name| key.
75
76 Args:
77 data: a dict where every value is a dict
78 key_name: the name given to the key that is inlined into the dict's values
79
80 Yields:
81 The values from |data|, with each value's key inlined into the value.
82 """
83 assert isinstance(data, dict), '|data| must be a dict'
84 for key, value in data.items():
85 assert isinstance(value, dict), '|value| must contain dict items'
86 value[key_name] = key
87 yield value
88
89
Tien Mai8bcfbfc2020-06-30 16:18:1690def Flatten(data, all_columns):
Tien Mai597a0d1f2020-02-18 16:01:2591 """Flattens lists inside |data|, one level deep.
92
93 This function will flatten each dictionary key in |data| into a single row
94 so that it can be written to a CSV file.
95
96 Args:
97 data: the data to be flattened.
Tien Mai8bcfbfc2020-06-30 16:18:1698 all_columns: set of all columns that are found in the result (this will be
99 filled by the function).
Tien Mai597a0d1f2020-02-18 16:01:25100
101 Yields:
102 A list of dict objects whose lists or sets have been flattened.
103 """
Tien Mai8bcfbfc2020-06-30 16:18:16104 SEPARATOR = ', '
105
106 # Max length of a cell in Excel is technically 32767 characters but if we get
107 # too close to this limit Excel seems to create weird results when we open
108 # the CSV file. To protect against this, give a little more buffer to the max
109 # characters.
110 MAX_CELL_LENGTH = 32700
111
Tien Mai597a0d1f2020-02-18 16:01:25112 for item in data:
Tien Mai8bcfbfc2020-06-30 16:18:16113 added_item = {}
Tien Mai597a0d1f2020-02-18 16:01:25114 for prop, value in item.items():
Tien Mai8bcfbfc2020-06-30 16:18:16115 # Non-container properties can be added directly.
116 if not isinstance(value, (list, set)):
117 added_item[prop] = value
118 continue
Tien Mai597a0d1f2020-02-18 16:01:25119
Tien Mai8bcfbfc2020-06-30 16:18:16120 # Otherwise join the container together into a single cell.
121 num_prop = 'num_' + prop
122 added_item[num_prop] = len(value)
123
124 # For long lists, the cell contents may go over MAX_CELL_LENGTH, so
125 # split the list into chunks that will fit into MAX_CELL_LENGTH.
126 flat_list = SEPARATOR.join(sorted(value))
127 overflow_prop_index = 0
128 while True:
129 current_column = prop
130 if overflow_prop_index:
131 current_column = prop + '_' + str(overflow_prop_index)
132
133 flat_list_len = len(flat_list)
134 if flat_list_len > MAX_CELL_LENGTH:
135 last_separator = flat_list.rfind(SEPARATOR, 0,
136 MAX_CELL_LENGTH - flat_list_len)
137 if last_separator != -1:
138 added_item[current_column] = flat_list[0:last_separator]
139 flat_list = flat_list[last_separator + 2:]
140 overflow_prop_index = overflow_prop_index + 1
141 continue
142
143 # Fall-through case where no more splitting is possible, this is the
144 # lass cell to add for this list.
145 added_item[current_column] = flat_list
146 break
147
148 assert isinstance(added_item[prop],
Tien Mai597a0d1f2020-02-18 16:01:25149 (int, bool, str)), ('unexpected type for item: %s' %
Tien Mai8bcfbfc2020-06-30 16:18:16150 type(added_item[prop]).__name__)
Tien Mai597a0d1f2020-02-18 16:01:25151
Tien Mai8bcfbfc2020-06-30 16:18:16152 all_columns.update(added_item.keys())
153 yield added_item
Tien Mai597a0d1f2020-02-18 16:01:25154
155
156def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'):
157 """Saves an extensions list to a CSV file.
158
159 Args:
160 extensions_list: an extensions list as returned by ComputeExtensionsList
161 csv_filename: the name of the CSV file to save
162 sort_column: the name of the column by which to sort the data
163 """
Tien Mai8bcfbfc2020-06-30 16:18:16164 all_columns = set()
165 flattened_list = list(Flatten(DictToList(extensions_list), all_columns))
Tien Mai597a0d1f2020-02-18 16:01:25166
167 desired_column_order = [
168 'id', 'name', 'num_permissions', 'num_installed', 'num_disabled',
169 'num_forced', 'permissions', 'installed', 'disabled', 'forced'
170 ]
171
172 # Order the columns as desired. Columns other than those in
173 # |desired_column_order| will be in an unspecified order after these columns.
Tien Mai8bcfbfc2020-06-30 16:18:16174 ordered_fieldnames = []
175 for c in desired_column_order:
176 matching_columns = []
177 for f in all_columns:
178 if f == c or f.startswith(c):
179 matching_columns.append(f)
180 ordered_fieldnames.extend(sorted(matching_columns))
Tien Mai597a0d1f2020-02-18 16:01:25181
182 ordered_fieldnames.extend(
183 [x for x in desired_column_order if x not in ordered_fieldnames])
Tien Mai8bcfbfc2020-06-30 16:18:16184 with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file:
Tien Mai597a0d1f2020-02-18 16:01:25185 writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames)
186 writer.writeheader()
187 for row in sorted(flattened_list, key=lambda ext: ext[sort_column]):
188 writer.writerow(row)
189
190
191def main(args):
192 if not args.admin_email:
193 print('admin_email must be specified.')
194 sys.exit(1)
195
196 if not args.service_account_key_path:
197 print('service_account_key_path must be specified.')
198 sys.exit(1)
199
200 # Load the json format key that you downloaded from the Google API
201 # Console when you created your service account. For p12 keys, use the
202 # from_p12_keyfile method of ServiceAccountCredentials and specify the
203 # service account email address, p12 keyfile, and scopes.
204 service_credentials = Credentials.from_service_account_file(
205 args.service_account_key_path,
206 scopes=[
Tien Mai41367aa2020-03-12 14:24:24207 'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly'
Tien Mai597a0d1f2020-02-18 16:01:25208 ],
209 subject=args.admin_email)
210
211 try:
212 http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http())
213 extensions_list = {}
214 base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers'
215 request_parameters = ''
216 browsers_processed = 0
217 while True:
218 print('Making request to server ...')
Tien Maif9a36f3a2020-06-19 21:28:25219
220 retrycount = 0
221 while retrycount < 5:
222 response = http.request(base_request_url + '?' + request_parameters,
223 'GET')[1]
224
225 if isinstance(response, bytes):
226 response = response.decode('utf-8')
227 data = json.loads(response)
228 if 'browsers' not in data:
229 print('Response error, retrying...')
230 time.sleep(3)
231 retrycount += 1
232 else:
233 break
Tien Mai597a0d1f2020-02-18 16:01:25234
235 browsers_in_data = len(data['browsers'])
236 print('Request returned %s results, analyzing ...' % (browsers_in_data))
237 ComputeExtensionsList(extensions_list, data)
238 browsers_processed += browsers_in_data
239
240 if 'nextPageToken' not in data or not data['nextPageToken']:
241 break
242
243 print('%s browsers processed.' % (browsers_processed))
244
245 if (args.max_browsers_to_process is not None and
246 args.max_browsers_to_process <= browsers_processed):
247 print('Stopping at %s browsers processed.' % (browsers_processed))
248 break
249
250 request_parameters = ('pageToken={}').format(data['nextPageToken'])
251 finally:
252 print('Analyze results ...')
253 ExtensionListAsCsv(extensions_list, args.extension_list_csv)
254 print("Results written to '%s'" % (args.extension_list_csv))
255
256
257if __name__ == '__main__':
258 parser = argparse.ArgumentParser(description='CBCM Extension Analyzer')
259 parser.add_argument(
260 '-k',
261 '--service_account_key_path',
262 metavar='FILENAME',
263 required=True,
264 help='The service account key file used to make API requests.')
265 parser.add_argument(
266 '-a',
267 '--admin_email',
268 required=True,
269 help='The admin user used to make the API requests.')
270 parser.add_argument(
271 '-x',
272 '--extension_list_csv',
273 metavar='FILENAME',
274 default='./extension_list.csv',
275 help='Generate an extension list to the specified CSV '
276 'file')
277 parser.add_argument(
278 '-m',
279 '--max_browsers_to_process',
280 type=int,
281 help='Maximum number of browsers to process. (Must be > 0).')
282 args = parser.parse_args()
283
284 if (args.max_browsers_to_process is not None and
285 args.max_browsers_to_process <= 0):
286 print('max_browsers_to_process must be > 0.')
287 parser.print_help()
288 sys.exit(1)
289
290 main(args)