Blame - docs/enterprise/extension_query.py - chromium/src.git

blob: f46d54c81a642b5344d439a57ff379c6cbd649f6 [file] [log] [blame]

Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	1	#!/usr/bin/env python
Avi Drissman	7b017a99	2022-09-07 15:50:38	[diff] [blame]	2	# Copyright 2020 The Chromium Authors
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	5	"""Transform CBCM Takeout API Data (Python3)."""
				6
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	7	from __future__ import print_function
				8	from __future__ import unicode_literals
				9
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	10	import argparse
				11	import csv
				12	import json
				13	import sys
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	14	import time
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	15
				16	import google_auth_httplib2
				17
				18	from httplib2 import Http
				19	from google.oauth2.service_account import Credentials
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	20	from builtins import bytes
				21	from builtins import str
				22	from io import open
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	23
				24
				25	def ComputeExtensionsList(extensions_list, data):
				26	"""Computes list of machines that have an extension.
				27
				28	This sample function processes the \|data\| retrieved from the Takeout API and
				29	calculates the list of machines that have installed each extension listed in
				30	the data.
				31
				32	Args:
				33	extensions_list: the extension list dictionary to fill.
				34	data: the data fetched from the Takeout API.
				35	"""
				36	for device in data['browsers']:
				37	if 'browsers' not in device:
				38	continue
				39	for browser in device['browsers']:
				40	if 'profiles' not in browser:
				41	continue
				42	for profile in browser['profiles']:
				43	if 'extensions' not in profile:
				44	continue
				45	for extension in profile['extensions']:
				46	key = extension['extensionId']
				47	if 'version' in extension:
				48	key = key + ' @ ' + extension['version']
				49	if key not in extensions_list:
				50	current_extension = {
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	51	'name': extension.get('name', ''),
				52	'permissions': extension.get('permissions', ''),
				53	'installed': set(),
				54	'disabled': set(),
				55	'forced': set()
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	56	}
				57	else:
				58	current_extension = extensions_list[key]
				59
				60	machine_name = device['machineName']
				61	current_extension['installed'].add(machine_name)
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	62	if extension.get('installType', '') == 'ADMIN':
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	63	current_extension['forced'].add(machine_name)
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	64	if extension.get('disabled', False):
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	65	current_extension['disabled'].add(machine_name)
				66
				67	extensions_list[key] = current_extension
				68
				69
				70	def DictToList(data, key_name='id'):
				71	"""Converts a dict into a list.
				72
				73	The value of each member of \|data\| must also be a dict. The original key for
				74	the value will be inlined into the value, under the \|key_name\| key.
				75
				76	Args:
				77	data: a dict where every value is a dict
				78	key_name: the name given to the key that is inlined into the dict's values
				79
				80	Yields:
				81	The values from \|data\|, with each value's key inlined into the value.
				82	"""
				83	assert isinstance(data, dict), '\|data\| must be a dict'
				84	for key, value in data.items():
				85	assert isinstance(value, dict), '\|value\| must contain dict items'
				86	value[key_name] = key
				87	yield value
				88
				89
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	90	def Flatten(data, all_columns):
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	91	"""Flattens lists inside \|data\|, one level deep.
				92
				93	This function will flatten each dictionary key in \|data\| into a single row
				94	so that it can be written to a CSV file.
				95
				96	Args:
				97	data: the data to be flattened.
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	98	all_columns: set of all columns that are found in the result (this will be
				99	filled by the function).
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	100
				101	Yields:
				102	A list of dict objects whose lists or sets have been flattened.
				103	"""
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	104	SEPARATOR = ', '
				105
				106	# Max length of a cell in Excel is technically 32767 characters but if we get
				107	# too close to this limit Excel seems to create weird results when we open
				108	# the CSV file. To protect against this, give a little more buffer to the max
				109	# characters.
				110	MAX_CELL_LENGTH = 32700
				111
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	112	for item in data:
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	113	added_item = {}
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	114	for prop, value in item.items():
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	115	# Non-container properties can be added directly.
				116	if not isinstance(value, (list, set)):
				117	added_item[prop] = value
				118	continue
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	119
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	120	# Otherwise join the container together into a single cell.
				121	num_prop = 'num_' + prop
				122	added_item[num_prop] = len(value)
				123
				124	# For long lists, the cell contents may go over MAX_CELL_LENGTH, so
				125	# split the list into chunks that will fit into MAX_CELL_LENGTH.
				126	flat_list = SEPARATOR.join(sorted(value))
				127	overflow_prop_index = 0
				128	while True:
				129	current_column = prop
				130	if overflow_prop_index:
				131	current_column = prop + '_' + str(overflow_prop_index)
				132
				133	flat_list_len = len(flat_list)
				134	if flat_list_len > MAX_CELL_LENGTH:
				135	last_separator = flat_list.rfind(SEPARATOR, 0,
				136	MAX_CELL_LENGTH - flat_list_len)
				137	if last_separator != -1:
				138	added_item[current_column] = flat_list[0:last_separator]
				139	flat_list = flat_list[last_separator + 2:]
				140	overflow_prop_index = overflow_prop_index + 1
				141	continue
				142
				143	# Fall-through case where no more splitting is possible, this is the
				144	# lass cell to add for this list.
				145	added_item[current_column] = flat_list
				146	break
				147
				148	assert isinstance(added_item[prop],
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	149	(int, bool, str)), ('unexpected type for item: %s' %
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	150	type(added_item[prop]).__name__)
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	151
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	152	all_columns.update(added_item.keys())
				153	yield added_item
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	154
				155
				156	def ExtensionListAsCsv(extensions_list, csv_filename, sort_column='name'):
				157	"""Saves an extensions list to a CSV file.
				158
				159	Args:
				160	extensions_list: an extensions list as returned by ComputeExtensionsList
				161	csv_filename: the name of the CSV file to save
				162	sort_column: the name of the column by which to sort the data
				163	"""
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	164	all_columns = set()
				165	flattened_list = list(Flatten(DictToList(extensions_list), all_columns))
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	166
				167	desired_column_order = [
				168	'id', 'name', 'num_permissions', 'num_installed', 'num_disabled',
				169	'num_forced', 'permissions', 'installed', 'disabled', 'forced'
				170	]
				171
				172	# Order the columns as desired. Columns other than those in
				173	# \|desired_column_order\| will be in an unspecified order after these columns.
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	174	ordered_fieldnames = []
				175	for c in desired_column_order:
				176	matching_columns = []
				177	for f in all_columns:
				178	if f == c or f.startswith(c):
				179	matching_columns.append(f)
				180	ordered_fieldnames.extend(sorted(matching_columns))
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	181
				182	ordered_fieldnames.extend(
				183	[x for x in desired_column_order if x not in ordered_fieldnames])
Tien Mai	8bcfbfc	2020-06-30 16:18:16	[diff] [blame]	184	with open(csv_filename, mode='w', newline='', encoding='utf-8') as csv_file:
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	185	writer = csv.DictWriter(csv_file, fieldnames=ordered_fieldnames)
				186	writer.writeheader()
				187	for row in sorted(flattened_list, key=lambda ext: ext[sort_column]):
				188	writer.writerow(row)
				189
				190
				191	def main(args):
				192	if not args.admin_email:
				193	print('admin_email must be specified.')
				194	sys.exit(1)
				195
				196	if not args.service_account_key_path:
				197	print('service_account_key_path must be specified.')
				198	sys.exit(1)
				199
				200	# Load the json format key that you downloaded from the Google API
				201	# Console when you created your service account. For p12 keys, use the
				202	# from_p12_keyfile method of ServiceAccountCredentials and specify the
				203	# service account email address, p12 keyfile, and scopes.
				204	service_credentials = Credentials.from_service_account_file(
				205	args.service_account_key_path,
				206	scopes=[
Tien Mai	41367aa	2020-03-12 14:24:24	[diff] [blame]	207	'https://www.googleapis.com/auth/admin.directory.device.chromebrowsers.readonly'
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	208	],
				209	subject=args.admin_email)
				210
				211	try:
				212	http = google_auth_httplib2.AuthorizedHttp(service_credentials, http=Http())
				213	extensions_list = {}
				214	base_request_url = 'https://admin.googleapis.com/admin/directory/v1.1beta1/customer/my_customer/devices/chromebrowsers'
				215	request_parameters = ''
				216	browsers_processed = 0
				217	while True:
				218	print('Making request to server ...')
Tien Mai	f9a36f3a	2020-06-19 21:28:25	[diff] [blame]	219
				220	retrycount = 0
				221	while retrycount < 5:
				222	response = http.request(base_request_url + '?' + request_parameters,
				223	'GET')[1]
				224
				225	if isinstance(response, bytes):
				226	response = response.decode('utf-8')
				227	data = json.loads(response)
				228	if 'browsers' not in data:
				229	print('Response error, retrying...')
				230	time.sleep(3)
				231	retrycount += 1
				232	else:
				233	break
Tien Mai	597a0d1f	2020-02-18 16:01:25	[diff] [blame]	234
				235	browsers_in_data = len(data['browsers'])
				236	print('Request returned %s results, analyzing ...' % (browsers_in_data))
				237	ComputeExtensionsList(extensions_list, data)
				238	browsers_processed += browsers_in_data
				239
				240	if 'nextPageToken' not in data or not data['nextPageToken']:
				241	break
				242
				243	print('%s browsers processed.' % (browsers_processed))
				244
				245	if (args.max_browsers_to_process is not None and
				246	args.max_browsers_to_process <= browsers_processed):
				247	print('Stopping at %s browsers processed.' % (browsers_processed))
				248	break
				249
				250	request_parameters = ('pageToken={}').format(data['nextPageToken'])
				251	finally:
				252	print('Analyze results ...')
				253	ExtensionListAsCsv(extensions_list, args.extension_list_csv)
				254	print("Results written to '%s'" % (args.extension_list_csv))
				255
				256
				257	if __name__ == '__main__':
				258	parser = argparse.ArgumentParser(description='CBCM Extension Analyzer')
				259	parser.add_argument(
				260	'-k',
				261	'--service_account_key_path',
				262	metavar='FILENAME',
				263	required=True,
				264	help='The service account key file used to make API requests.')
				265	parser.add_argument(
				266	'-a',
				267	'--admin_email',
				268	required=True,
				269	help='The admin user used to make the API requests.')
				270	parser.add_argument(
				271	'-x',
				272	'--extension_list_csv',
				273	metavar='FILENAME',
				274	default='./extension_list.csv',
				275	help='Generate an extension list to the specified CSV '
				276	'file')
				277	parser.add_argument(
				278	'-m',
				279	'--max_browsers_to_process',
				280	type=int,
				281	help='Maximum number of browsers to process. (Must be > 0).')
				282	args = parser.parse_args()
				283
				284	if (args.max_browsers_to_process is not None and
				285	args.max_browsers_to_process <= 0):
				286	print('max_browsers_to_process must be > 0.')
				287	parser.print_help()
				288	sys.exit(1)
				289
				290	main(args)