Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 1 | #!/usr/bin/env python |
Avi Drissman | 7b017a99 | 2022-09-07 15:50:38 | [diff] [blame] | 2 | # Copyright 2020 The Chromium Authors |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 5 | """Transform CBCM Takeout API Data (Python3).""" |
| 6 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 7 | from __future__ import print_function |
| 8 | from __future__ import unicode_literals |
| 9 | |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 10 | import argparse |
| 11 | import csv |
| 12 | import json |
| 13 | import sys |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 14 | import time |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 15 | |
| 16 | import google_auth_httplib2 |
| 17 | |
| 18 | from httplib2 import Http |
| 19 | from google.oauth2.service_account import Credentials |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 20 | from builtins import bytes |
| 21 | from builtins import str |
| 22 | from io import open |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 23 | |
| 24 | |
| 25 | def ComputeExtensionsList(extensions_list, data): |
| 26 | """Computes list of machines that have an extension. |
| 27 | |
| 28 | This sample function processes the |data| retrieved from the Takeout API and |
| 29 | calculates the list of machines that have installed each extension listed in |
| 30 | the data. |
| 31 | |
| 32 | Args: |
| 33 | extensions_list: the extension list dictionary to fill. |
| 34 | data: the data fetched from the Takeout API. |
| 35 | """ |
| 36 | for device in data['browsers']: |
| 37 | if 'browsers' not in device: |
| 38 | continue |
| 39 | for browser in device['browsers']: |
| 40 | if 'profiles' not in browser: |
| 41 | continue |
| 42 | for profile in browser['profiles']: |
| 43 | if 'extensions' not in profile: |
| 44 | continue |
| 45 | for extension in profile['extensions']: |
| 46 | key = extension['extensionId'] |
| 47 | if 'version' in extension: |
| 48 | key = key + ' @ ' + extension['version'] |
| 49 | if key not in extensions_list: |
| 50 | current_extension = { |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 51 | 'name': extension.get('name', ''), |
| 52 | 'permissions': extension.get('permissions', ''), |
| 53 | 'installed': set(), |
| 54 | 'disabled': set(), |
| 55 | 'forced': set() |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 56 | } |
| 57 | else: |
| 58 | current_extension = extensions_list[key] |
| 59 | |
| 60 | machine_name = device['machineName'] |
| 61 | current_extension['installed'].add(machine_name) |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 62 | if extension.get('installType', '') == 'ADMIN': |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 63 | current_extension['forced'].add(machine_name) |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 64 | if extension.get('disabled', False): |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 65 | current_extension['disabled'].add(machine_name) |
| 66 | |
| 67 | extensions_list[key] = current_extension |
| 68 | |
| 69 | |
| 70 | def DictToList(data, key_name='id'): |
| 71 | """Converts a dict into a list. |
| 72 | |
| 73 | The value of each member of |data| must also be a dict. The original key for |
| 74 | the value will be inlined into the value, under the |key_name| key. |
| 75 | |
| 76 | Args: |
| 77 | data: a dict where every value is a dict |
| 78 | key_name: the name given to the key that is inlined into the dict's values |
| 79 | |
| 80 | Yields: |
| 81 | The values from |data|, with each value's key inlined into the value. |
| 82 | """ |
| 83 | assert isinstance(data, dict), '|data| must be a dict' |
| 84 | for key, value in data.items(): |
| 85 | assert isinstance(value, dict), '|value| must contain dict items' |
| 86 | value[key_name] = key |
| 87 | yield value |
| 88 | |
| 89 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 90 | def Flatten(data, all_columns): |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 91 | """Flattens lists inside |data|, one level deep. |
| 92 | |
| 93 | This function will flatten each dictionary key in |data| into a single row |
| 94 | so that it can be written to a CSV file. |
| 95 | |
| 96 | Args: |
| 97 | data: the data to be flattened. |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 98 | all_columns: set of all columns that are found in the result (this will be |
| 99 | filled by the function). |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 100 | |
| 101 | Yields: |
| 102 | A list of dict objects whose lists or sets have been flattened. |
| 103 | """ |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 104 | SEPARATOR = ', ' |
| 105 | |
| 106 | # Max length of a cell in Excel is technically 32767 characters but if we get |
| 107 | # too close to this limit Excel seems to create weird results when we open |
| 108 | # the CSV file. To protect against this, give a little more buffer to the max |
| 109 | # characters. |
| 110 | MAX_CELL_LENGTH = 32700 |
| 111 | |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 112 | for item in data: |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 113 | added_item = {} |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 114 | for prop, value in item.items(): |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 115 | # Non-container properties can be added directly. |
| 116 | if not isinstance(value, (list, set)): |
| 117 | added_item[prop] = value |
| 118 | continue |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 119 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 120 | # Otherwise join the container together into a single cell. |
| 121 | num_prop = 'num_' + prop |
| 122 | added_item[num_prop] = len(value) |
| 123 | |
| 124 | # For long lists, the cell contents may go over MAX_CELL_LENGTH, so |
| 125 | # split the list into chunks that will fit into MAX_CELL_LENGTH. |
| 126 | flat_list = SEPARATOR.join(sorted(value)) |
| 127 | overflow_prop_index = 0 |
| 128 | while True: |
| 129 | current_column = prop |
| 130 | if overflow_prop_index: |
| 131 | current_column = prop + '_' + str(overflow_prop_index) |
| 132 | |
| 133 | flat_list_len = len(flat_list) |
| 134 | if flat_list_len > MAX_CELL_LENGTH: |
| 135 | last_separator = flat_list.rfind(SEPARATOR, 0, |
| 136 | MAX_CELL_LENGTH - flat_list_len) |
| 137 | if last_separator != -1: |
| 138 | added_item[current_column] = flat_list[0:last_separator] |
| 139 | flat_list = flat_list[last_separator + 2:] |
| 140 | overflow_prop_index = overflow_prop_index + 1 |
| 141 | continue |
| 142 | |
| 143 | # Fall-through case where no more splitting is possible, this is the |
| 144 | # lass cell to add for this list. |
| 145 | added_item[current_column] = flat_list |
| 146 | break |
| 147 | |
| 148 | assert isinstance(added_item[prop], |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 149 | (int, bool, str)), ('unexpected type for item: %s' % |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 150 | type(added_item[prop]).__name__) |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 151 | |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 152 | all_columns.update(added_item.keys()) |
| 153 | yield added_item |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 154 | |
| 155 | |
| 156 | def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'): |
| 157 | """Saves an extensions list to a CSV file. |
| 158 | |
| 159 | Args: |
| 160 | extensions_list: an extensions list as returned by ComputeExtensionsList |
| 161 | csv_filename: the name of the CSV file to save |
| 162 | sort_column: the name of the column by which to sort the data |
| 163 | """ |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 164 | all_columns = set() |
| 165 | flattened_list = list(Flatten(DictToList(extensions_list), all_columns)) |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 166 | |
| 167 | desired_column_order = [ |
| 168 | 'id', 'name', 'num_permissions', 'num_installed', 'num_disabled', |
| 169 | 'num_forced', 'permissions', 'installed', 'disabled', 'forced' |
| 170 | ] |
| 171 | |
| 172 | # Order the columns as desired. Columns other than those in |
| 173 | # |desired_column_order| will be in an unspecified order after these columns. |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 174 | ordered_fieldnames = [] |
| 175 | for c in desired_column_order: |
| 176 | matching_columns = [] |
| 177 | for f in all_columns: |
| 178 | if f == c or f.startswith(c): |
| 179 | matching_columns.append(f) |
| 180 | ordered_fieldnames.extend(sorted(matching_columns)) |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 181 | |
| 182 | ordered_fieldnames.extend( |
| 183 | [x for x in desired_column_order if x not in ordered_fieldnames]) |
Tien Mai | 8bcfbfc | 2020-06-30 16:18:16 | [diff] [blame] | 184 | with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file: |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 185 | writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames) |
| 186 | writer.writeheader() |
| 187 | for row in sorted(flattened_list, key=lambda ext: ext[sort_column]): |
| 188 | writer.writerow(row) |
| 189 | |
| 190 | |
| 191 | def main(args): |
| 192 | if not args.admin_email: |
| 193 | print('admin_email must be specified.') |
| 194 | sys.exit(1) |
| 195 | |
| 196 | if not args.service_account_key_path: |
| 197 | print('service_account_key_path must be specified.') |
| 198 | sys.exit(1) |
| 199 | |
| 200 | # Load the json format key that you downloaded from the Google API |
| 201 | # Console when you created your service account. For p12 keys, use the |
| 202 | # from_p12_keyfile method of ServiceAccountCredentials and specify the |
| 203 | # service account email address, p12 keyfile, and scopes. |
| 204 | service_credentials = Credentials.from_service_account_file( |
| 205 | args.service_account_key_path, |
| 206 | scopes=[ |
Tien Mai | 41367aa | 2020-03-12 14:24:24 | [diff] [blame] | 207 | 'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly' |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 208 | ], |
| 209 | subject=args.admin_email) |
| 210 | |
| 211 | try: |
| 212 | http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http()) |
| 213 | extensions_list = {} |
| 214 | base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers' |
| 215 | request_parameters = '' |
| 216 | browsers_processed = 0 |
| 217 | while True: |
| 218 | print('Making request to server ...') |
Tien Mai | f9a36f3a | 2020-06-19 21:28:25 | [diff] [blame] | 219 | |
| 220 | retrycount = 0 |
| 221 | while retrycount < 5: |
| 222 | response = http.request(base_request_url + '?' + request_parameters, |
| 223 | 'GET')[1] |
| 224 | |
| 225 | if isinstance(response, bytes): |
| 226 | response = response.decode('utf-8') |
| 227 | data = json.loads(response) |
| 228 | if 'browsers' not in data: |
| 229 | print('Response error, retrying...') |
| 230 | time.sleep(3) |
| 231 | retrycount += 1 |
| 232 | else: |
| 233 | break |
Tien Mai | 597a0d1f | 2020-02-18 16:01:25 | [diff] [blame] | 234 | |
| 235 | browsers_in_data = len(data['browsers']) |
| 236 | print('Request returned %s results, analyzing ...' % (browsers_in_data)) |
| 237 | ComputeExtensionsList(extensions_list, data) |
| 238 | browsers_processed += browsers_in_data |
| 239 | |
| 240 | if 'nextPageToken' not in data or not data['nextPageToken']: |
| 241 | break |
| 242 | |
| 243 | print('%s browsers processed.' % (browsers_processed)) |
| 244 | |
| 245 | if (args.max_browsers_to_process is not None and |
| 246 | args.max_browsers_to_process <= browsers_processed): |
| 247 | print('Stopping at %s browsers processed.' % (browsers_processed)) |
| 248 | break |
| 249 | |
| 250 | request_parameters = ('pageToken={}').format(data['nextPageToken']) |
| 251 | finally: |
| 252 | print('Analyze results ...') |
| 253 | ExtensionListAsCsv(extensions_list, args.extension_list_csv) |
| 254 | print("Results written to '%s'" % (args.extension_list_csv)) |
| 255 | |
| 256 | |
| 257 | if __name__ == '__main__': |
| 258 | parser = argparse.ArgumentParser(description='CBCM Extension Analyzer') |
| 259 | parser.add_argument( |
| 260 | '-k', |
| 261 | '--service_account_key_path', |
| 262 | metavar='FILENAME', |
| 263 | required=True, |
| 264 | help='The service account key file used to make API requests.') |
| 265 | parser.add_argument( |
| 266 | '-a', |
| 267 | '--admin_email', |
| 268 | required=True, |
| 269 | help='The admin user used to make the API requests.') |
| 270 | parser.add_argument( |
| 271 | '-x', |
| 272 | '--extension_list_csv', |
| 273 | metavar='FILENAME', |
| 274 | default='./extension_list.csv', |
| 275 | help='Generate an extension list to the specified CSV ' |
| 276 | 'file') |
| 277 | parser.add_argument( |
| 278 | '-m', |
| 279 | '--max_browsers_to_process', |
| 280 | type=int, |
| 281 | help='Maximum number of browsers to process. (Must be > 0).') |
| 282 | args = parser.parse_args() |
| 283 | |
| 284 | if (args.max_browsers_to_process is not None and |
| 285 | args.max_browsers_to_process <= 0): |
| 286 | print('max_browsers_to_process must be > 0.') |
| 287 | parser.print_help() |
| 288 | sys.exit(1) |
| 289 | |
| 290 | main(args) |