blob: 21c6360dc9e501dee0cf8534082009f018146145 [file] [log] [blame]
Peter Wenb1f3b1d2021-02-02 21:30:201#!/usr/bin/env python3
Avi Drissman73a09d12022-09-08 20:33:382# Copyright 2021 The Chromium Authors
Peter Wenb1f3b1d2021-02-02 21:30:203# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Creates an server to offload non-critical-path GN targets."""
6
Peter Wencd460ff52021-02-23 22:40:057from __future__ import annotations
8
Peter Wenb1f3b1d2021-02-02 21:30:209import argparse
Mohamed Heikalf746b57f2024-11-13 21:20:1710import collections
11import contextlib
12import datetime
Peter Wenb1f3b1d2021-02-02 21:30:2013import json
Peter Wenb1f3b1d2021-02-02 21:30:2014import os
Mohamed Heikalf746b57f2024-11-13 21:20:1715import pathlib
Mohamed Heikalf746b57f2024-11-13 21:20:1716import re
Mohamed Heikalabf646e2024-12-12 16:06:0517import signal
Andrew Grieved863d0f2024-12-13 20:13:0118import shlex
Peter Wen6e7e52b2021-02-13 02:39:2819import shutil
Peter Wenb1f3b1d2021-02-02 21:30:2020import socket
21import subprocess
22import sys
Peter Wenf409c0c2021-02-09 19:33:0223import threading
Mohamed Heikalb752b772024-11-25 23:05:4424import traceback
Mohamed Heikalf746b57f2024-11-13 21:20:1725import time
26from typing import Callable, Dict, List, Optional, Tuple, IO
Peter Wenb1f3b1d2021-02-02 21:30:2027
28sys.path.append(os.path.join(os.path.dirname(__file__), 'gyp'))
29from util import server_utils
30
Mohamed Heikalabf646e2024-12-12 16:06:0531_SOCKET_TIMEOUT = 60 # seconds
Peter Wen6e7e52b2021-02-13 02:39:2832
Mohamed Heikalf746b57f2024-11-13 21:20:1733_LOGFILES = {}
34_LOGFILE_NAME = 'buildserver.log'
35_MAX_LOGFILES = 6
36
37FIRST_LOG_LINE = '#### Start of log for build_id = {build_id} ####\n'
38BUILD_ID_RE = re.compile(r'^#### .*build_id = (?P<build_id>.+) ####')
39
40
Mohamed Heikalb752b772024-11-25 23:05:4441def log(msg: str, quiet: bool = False):
42 if quiet:
43 return
44 # Ensure we start our message on a new line.
Mohamed Heikal9984e432024-12-03 18:21:4045 print('\n' + msg)
Mohamed Heikalb752b772024-11-25 23:05:4446
47
Mohamed Heikal9984e432024-12-03 18:21:4048def set_status(msg: str, *, quiet: bool = False, build_id: str = None):
Mohamed Heikalf746b57f2024-11-13 21:20:1749 prefix = f'[{TaskStats.prefix()}] '
50 # if message is specific to a build then also output to its logfile.
51 if build_id:
52 log_to_file(f'{prefix}{msg}', build_id=build_id)
53
54 # No need to also output to the terminal if quiet.
55 if quiet:
56 return
Peter Wencd460ff52021-02-23 22:40:0557 # Shrink the message (leaving a 2-char prefix and use the rest of the room
58 # for the suffix) according to terminal size so it is always one line.
59 width = shutil.get_terminal_size().columns
Peter Wencd460ff52021-02-23 22:40:0560 max_msg_width = width - len(prefix)
61 if len(msg) > max_msg_width:
62 length_to_show = max_msg_width - 5 # Account for ellipsis and header.
63 msg = f'{msg[:2]}...{msg[-length_to_show:]}'
64 # \r to return the carriage to the beginning of line.
65 # \033[K to replace the normal \n to erase until the end of the line.
66 # Avoid the default line ending so the next \r overwrites the same line just
67 # like ninja's output.
Mohamed Heikal9984e432024-12-03 18:21:4068 print(f'\r{prefix}{msg}\033[K', end='', flush=True)
Peter Wencd460ff52021-02-23 22:40:0569
70
Mohamed Heikalf746b57f2024-11-13 21:20:1771def log_to_file(message: str, build_id: str):
Andrew Grieved863d0f2024-12-13 20:13:0172 logfile = _LOGFILES[build_id]
Mohamed Heikalf746b57f2024-11-13 21:20:1773 print(message, file=logfile, flush=True)
74
75
Mohamed Heikalb752b772024-11-25 23:05:4476def _exception_hook(exctype: type, exc: Exception, tb):
77 # Output uncaught exceptions to all live terminals
78 BuildManager.broadcast(''.join(traceback.format_exception(exctype, exc, tb)))
Mohamed Heikal9984e432024-12-03 18:21:4079 # Cancel all pending tasks cleanly (i.e. delete stamp files if necessary).
80 TaskManager.deactivate()
Mohamed Heikalb752b772024-11-25 23:05:4481 sys.__excepthook__(exctype, exc, tb)
82
83
Mohamed Heikalf746b57f2024-11-13 21:20:1784def create_logfile(build_id, outdir):
85 if logfile := _LOGFILES.get(build_id, None):
86 return logfile
87
88 outdir = pathlib.Path(outdir)
89 latest_logfile = outdir / f'{_LOGFILE_NAME}.0'
90
91 if latest_logfile.exists():
92 with latest_logfile.open('rt') as f:
93 first_line = f.readline()
94 if log_build_id := BUILD_ID_RE.search(first_line):
95 # If the newest logfile on disk is referencing the same build we are
96 # currently processing, we probably crashed previously and we should
97 # pick up where we left off in the same logfile.
98 if log_build_id.group('build_id') == build_id:
99 _LOGFILES[build_id] = latest_logfile.open('at')
100 return _LOGFILES[build_id]
101
102 # Do the logfile name shift.
103 filenames = os.listdir(outdir)
104 logfiles = {f for f in filenames if f.startswith(_LOGFILE_NAME)}
105 for idx in reversed(range(_MAX_LOGFILES)):
106 current_name = f'{_LOGFILE_NAME}.{idx}'
107 next_name = f'{_LOGFILE_NAME}.{idx+1}'
108 if current_name in logfiles:
109 shutil.move(os.path.join(outdir, current_name),
110 os.path.join(outdir, next_name))
111
112 # Create a new 0th logfile.
113 logfile = latest_logfile.open('wt')
114 _LOGFILES[build_id] = logfile
115 logfile.write(FIRST_LOG_LINE.format(build_id=build_id))
116 logfile.flush()
117 return logfile
118
119
Peter Wencd460ff52021-02-23 22:40:05120class TaskStats:
121 """Class to keep track of aggregate stats for all tasks across threads."""
122 _num_processes = 0
123 _completed_tasks = 0
124 _total_tasks = 0
Mohamed Heikalf746b57f2024-11-13 21:20:17125 _total_task_count_per_build = collections.defaultdict(int)
126 _completed_task_count_per_build = collections.defaultdict(int)
127 _running_processes_count_per_build = collections.defaultdict(int)
Andrew Grieved863d0f2024-12-13 20:13:01128 _outdir_per_build = {}
129 _lock = threading.RLock()
Peter Wen6e7e52b2021-02-13 02:39:28130
131 @classmethod
Peter Wencd460ff52021-02-23 22:40:05132 def no_running_processes(cls):
Mohamed Heikalf746b57f2024-11-13 21:20:17133 with cls._lock:
134 return cls._num_processes == 0
Peter Wen6e7e52b2021-02-13 02:39:28135
136 @classmethod
Andrew Grieved863d0f2024-12-13 20:13:01137 def add_task(cls, build_id: str, outdir: str):
Mohamed Heikalf746b57f2024-11-13 21:20:17138 with cls._lock:
139 cls._total_tasks += 1
140 cls._total_task_count_per_build[build_id] += 1
Andrew Grieved863d0f2024-12-13 20:13:01141 cls._outdir_per_build[build_id] = outdir
Peter Wencd460ff52021-02-23 22:40:05142
143 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17144 def add_process(cls, build_id: str):
Peter Wencd460ff52021-02-23 22:40:05145 with cls._lock:
146 cls._num_processes += 1
Mohamed Heikalf746b57f2024-11-13 21:20:17147 cls._running_processes_count_per_build[build_id] += 1
Peter Wencd460ff52021-02-23 22:40:05148
149 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17150 def remove_process(cls, build_id: str):
Peter Wencd460ff52021-02-23 22:40:05151 with cls._lock:
152 cls._num_processes -= 1
Mohamed Heikalf746b57f2024-11-13 21:20:17153 cls._running_processes_count_per_build[build_id] -= 1
Peter Wencd460ff52021-02-23 22:40:05154
155 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17156 def complete_task(cls, build_id: str):
Peter Wencd460ff52021-02-23 22:40:05157 with cls._lock:
158 cls._completed_tasks += 1
Mohamed Heikalf746b57f2024-11-13 21:20:17159 cls._completed_task_count_per_build[build_id] += 1
Peter Wencd460ff52021-02-23 22:40:05160
161 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17162 def num_pending_tasks(cls, build_id: str = None):
163 with cls._lock:
164 if build_id:
165 return cls._total_task_count_per_build[
166 build_id] - cls._completed_task_count_per_build[build_id]
167 return cls._total_tasks - cls._completed_tasks
168
169 @classmethod
170 def num_completed_tasks(cls, build_id: str = None):
171 with cls._lock:
172 if build_id:
173 return cls._completed_task_count_per_build[build_id]
174 return cls._completed_tasks
175
176 @classmethod
Andrew Grieved863d0f2024-12-13 20:13:01177 def query_build(cls, query_build_id: str = None):
178 with cls._lock:
179 active_builds = BuildManager.get_live_builds()
180 if query_build_id:
181 build_ids = [query_build_id]
182 else:
183 build_ids = sorted(
184 set(active_builds) | set(cls._total_task_count_per_build))
185 builds = []
186 for build_id in build_ids:
187 current_tasks = TaskManager.get_current_tasks(build_id)
188 builds.append({
189 'build_id': build_id,
190 'is_active': build_id in active_builds,
191 'completed_tasks': cls.num_completed_tasks(build_id),
192 'pending_tasks': cls.num_pending_tasks(build_id),
193 'active_tasks': [t.cmd for t in current_tasks],
194 'outdir': cls._outdir_per_build.get(build_id), # None if no tasks.
195 })
196 return {
197 'pid': os.getpid(),
198 'builds': builds,
199 }
200
201 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17202 def prefix(cls, build_id: str = None):
Peter Wen6e7e52b2021-02-13 02:39:28203 # Ninja's prefix is: [205 processes, 6/734 @ 6.5/s : 0.922s ]
204 # Time taken and task completion rate are not important for the build server
205 # since it is always running in the background and uses idle priority for
206 # its tasks.
Peter Wencd460ff52021-02-23 22:40:05207 with cls._lock:
Mohamed Heikalf746b57f2024-11-13 21:20:17208 if build_id:
209 _num_processes = cls._running_processes_count_per_build[build_id]
210 _completed_tasks = cls._completed_task_count_per_build[build_id]
211 _total_tasks = cls._total_task_count_per_build[build_id]
212 else:
213 _num_processes = cls._num_processes
214 _completed_tasks = cls._completed_tasks
215 _total_tasks = cls._total_tasks
216 word = 'process' if _num_processes == 1 else 'processes'
217 return (f'{_num_processes} {word}, '
218 f'{_completed_tasks}/{_total_tasks}')
Peter Wenb1f3b1d2021-02-02 21:30:20219
Peter Wenf409c0c2021-02-09 19:33:02220
Mohamed Heikalb752b772024-11-25 23:05:44221def check_pid_alive(pid: int):
222 try:
223 os.kill(pid, 0)
224 except OSError:
225 return False
226 return True
227
228
229class BuildManager:
230 _live_builders: dict[str, int] = dict()
231 _build_ttys: dict[str, IO[str]] = dict()
232 _lock = threading.RLock()
233
234 @classmethod
235 def register_builder(cls, build_id, builder_pid):
236 with cls._lock:
237 cls._live_builders[build_id] = int(builder_pid)
238
239 @classmethod
240 def register_tty(cls, build_id, tty):
241 with cls._lock:
242 cls._build_ttys[build_id] = tty
243
244 @classmethod
245 def get_live_builds(cls):
246 with cls._lock:
247 for build_id, builder_pid in list(cls._live_builders.items()):
248 if not check_pid_alive(builder_pid):
249 del cls._live_builders[build_id]
250 return list(cls._live_builders.keys())
251
252 @classmethod
253 def broadcast(cls, msg: str):
254 seen = set()
255 with cls._lock:
256 for tty in cls._build_ttys.values():
257 # Do not output to the same tty multiple times. Use st_ino and st_dev to
258 # compare open file descriptors.
259 st = os.stat(tty.fileno())
260 key = (st.st_ino, st.st_dev)
261 if key in seen:
262 continue
263 seen.add(key)
264 try:
265 tty.write(msg + '\n')
266 tty.flush()
267 except BrokenPipeError:
268 pass
269
270 @classmethod
271 def has_live_builds(cls):
272 return bool(cls.get_live_builds())
273
274
Peter Wencd460ff52021-02-23 22:40:05275class TaskManager:
276 """Class to encapsulate a threadsafe queue and handle deactivating it."""
Mohamed Heikalb752b772024-11-25 23:05:44277 _queue: collections.deque[Task] = collections.deque()
Mohamed Heikalabf646e2024-12-12 16:06:05278 _current_tasks: set[Task] = set()
Mohamed Heikalf746b57f2024-11-13 21:20:17279 _deactivated = False
Mohamed Heikalb752b772024-11-25 23:05:44280 _lock = threading.RLock()
Peter Wencd460ff52021-02-23 22:40:05281
Mohamed Heikalf746b57f2024-11-13 21:20:17282 @classmethod
283 def add_task(cls, task: Task, options):
284 assert not cls._deactivated
Andrew Grieved863d0f2024-12-13 20:13:01285 TaskStats.add_task(task.build_id, task.cwd)
Mohamed Heikalb752b772024-11-25 23:05:44286 with cls._lock:
287 cls._queue.appendleft(task)
288 set_status(f'QUEUED {task.name}',
289 quiet=options.quiet,
290 build_id=task.build_id)
Mohamed Heikalf746b57f2024-11-13 21:20:17291 cls._maybe_start_tasks()
Peter Wencd460ff52021-02-23 22:40:05292
Mohamed Heikalf746b57f2024-11-13 21:20:17293 @classmethod
Mohamed Heikalabf646e2024-12-12 16:06:05294 def task_done(cls, task: Task):
295 TaskStats.complete_task(build_id=task.build_id)
296 with cls._lock:
297 cls._current_tasks.remove(task)
298
299 @classmethod
Andrew Grieved863d0f2024-12-13 20:13:01300 def get_current_tasks(cls, build_id):
301 with cls._lock:
302 return [t for t in cls._current_tasks if t.build_id == build_id]
303
304 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17305 def deactivate(cls):
306 cls._deactivated = True
Mohamed Heikalabf646e2024-12-12 16:06:05307 tasks_to_terminate: list[Task] = []
Mohamed Heikalb752b772024-11-25 23:05:44308 with cls._lock:
309 while cls._queue:
310 task = cls._queue.pop()
Mohamed Heikalabf646e2024-12-12 16:06:05311 tasks_to_terminate.append(task)
312 # Cancel possibly running tasks.
313 tasks_to_terminate.extend(cls._current_tasks)
314 # Terminate outside lock since task threads need the lock to finish
315 # terminating.
316 for task in tasks_to_terminate:
317 task.terminate()
Mohamed Heikalb752b772024-11-25 23:05:44318
319 @classmethod
320 def cancel_build(cls, build_id):
Mohamed Heikalabf646e2024-12-12 16:06:05321 terminated_pending_tasks: list[Task] = []
322 terminated_current_tasks: list[Task] = []
Mohamed Heikalb752b772024-11-25 23:05:44323 with cls._lock:
Mohamed Heikalabf646e2024-12-12 16:06:05324 # Cancel pending tasks.
Mohamed Heikalb752b772024-11-25 23:05:44325 for task in cls._queue:
326 if task.build_id == build_id:
Mohamed Heikalabf646e2024-12-12 16:06:05327 terminated_pending_tasks.append(task)
328 for task in terminated_pending_tasks:
Mohamed Heikalb752b772024-11-25 23:05:44329 cls._queue.remove(task)
Mohamed Heikalabf646e2024-12-12 16:06:05330 # Cancel running tasks.
331 for task in cls._current_tasks:
332 if task.build_id == build_id:
333 terminated_current_tasks.append(task)
334 # Terminate tasks outside lock since task threads need the lock to finish
335 # terminating.
336 for task in terminated_pending_tasks:
337 task.terminate()
338 for task in terminated_current_tasks:
339 task.terminate()
Peter Wencd460ff52021-02-23 22:40:05340
341 @staticmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17342 # pylint: disable=inconsistent-return-statements
Peter Wencd460ff52021-02-23 22:40:05343 def _num_running_processes():
344 with open('/proc/stat') as f:
345 for line in f:
346 if line.startswith('procs_running'):
347 return int(line.rstrip().split()[1])
348 assert False, 'Could not read /proc/stat'
349
Mohamed Heikalf746b57f2024-11-13 21:20:17350 @classmethod
351 def _maybe_start_tasks(cls):
352 if cls._deactivated:
Peter Wencd460ff52021-02-23 22:40:05353 return
354 # Include load avg so that a small dip in the number of currently running
355 # processes will not cause new tasks to be started while the overall load is
356 # heavy.
Mohamed Heikalf746b57f2024-11-13 21:20:17357 cur_load = max(cls._num_running_processes(), os.getloadavg()[0])
Peter Wencd460ff52021-02-23 22:40:05358 num_started = 0
359 # Always start a task if we don't have any running, so that all tasks are
360 # eventually finished. Try starting up tasks when the overall load is light.
361 # Limit to at most 2 new tasks to prevent ramping up too fast. There is a
362 # chance where multiple threads call _maybe_start_tasks and each gets to
363 # spawn up to 2 new tasks, but since the only downside is some build tasks
364 # get worked on earlier rather than later, it is not worth mitigating.
365 while num_started < 2 and (TaskStats.no_running_processes()
366 or num_started + cur_load < os.cpu_count()):
Mohamed Heikalb752b772024-11-25 23:05:44367 with cls._lock:
368 try:
369 next_task = cls._queue.pop()
Mohamed Heikalabf646e2024-12-12 16:06:05370 cls._current_tasks.add(next_task)
Mohamed Heikalb752b772024-11-25 23:05:44371 except IndexError:
372 return
Mohamed Heikalf746b57f2024-11-13 21:20:17373 num_started += next_task.start(cls._maybe_start_tasks)
Peter Wencd460ff52021-02-23 22:40:05374
375
376# TODO(wnwen): Break this into Request (encapsulating what ninja sends) and Task
377# when a Request starts to be run. This would eliminate ambiguity
378# about when and whether _proc/_thread are initialized.
Peter Wenf409c0c2021-02-09 19:33:02379class Task:
Peter Wencd460ff52021-02-23 22:40:05380 """Class to represent one task and operations on it."""
381
Mohamed Heikalf746b57f2024-11-13 21:20:17382 def __init__(self, name: str, cwd: str, cmd: List[str], tty: IO[str],
Mohamed Heikal6b56cf62024-12-10 23:14:55383 stamp_file: str, build_id: str, options):
Peter Wencd460ff52021-02-23 22:40:05384 self.name = name
385 self.cwd = cwd
386 self.cmd = cmd
387 self.stamp_file = stamp_file
Mohamed Heikalf746b57f2024-11-13 21:20:17388 self.tty = tty
389 self.build_id = build_id
Mohamed Heikalf746b57f2024-11-13 21:20:17390 self.options = options
Peter Wencd460ff52021-02-23 22:40:05391 self._terminated = False
Mohamed Heikal9984e432024-12-03 18:21:40392 self._replaced = False
Mohamed Heikalb752b772024-11-25 23:05:44393 self._lock = threading.RLock()
Peter Wencd460ff52021-02-23 22:40:05394 self._proc: Optional[subprocess.Popen] = None
395 self._thread: Optional[threading.Thread] = None
Mohamed Heikalb752b772024-11-25 23:05:44396 self._delete_stamp_thread: Optional[threading.Thread] = None
Peter Wencd460ff52021-02-23 22:40:05397 self._return_code: Optional[int] = None
Peter Wenf409c0c2021-02-09 19:33:02398
Peter Wen6e7e52b2021-02-13 02:39:28399 @property
400 def key(self):
401 return (self.cwd, self.name)
Peter Wenf409c0c2021-02-09 19:33:02402
Mohamed Heikalabf646e2024-12-12 16:06:05403 def __hash__(self):
404 return hash((self.key, self.build_id))
405
Mohamed Heikalb752b772024-11-25 23:05:44406 def __eq__(self, other):
407 return self.key == other.key and self.build_id == other.build_id
408
Peter Wencd460ff52021-02-23 22:40:05409 def start(self, on_complete_callback: Callable[[], None]) -> int:
410 """Starts the task if it has not already been terminated.
411
412 Returns the number of processes that have been started. This is called at
413 most once when the task is popped off the task queue."""
414
Peter Wen6e7e52b2021-02-13 02:39:28415 # The environment variable forces the script to actually run in order to
416 # avoid infinite recursion.
417 env = os.environ.copy()
418 env[server_utils.BUILD_SERVER_ENV_VARIABLE] = '1'
Peter Wencd460ff52021-02-23 22:40:05419
420 with self._lock:
421 if self._terminated:
422 return 0
Mohamed Heikalb752b772024-11-25 23:05:44423
Peter Wencd460ff52021-02-23 22:40:05424 # Use os.nice(19) to ensure the lowest priority (idle) for these analysis
425 # tasks since we want to avoid slowing down the actual build.
426 # TODO(wnwen): Use ionice to reduce resource consumption.
Mohamed Heikalf746b57f2024-11-13 21:20:17427 TaskStats.add_process(self.build_id)
Mohamed Heikalb752b772024-11-25 23:05:44428 set_status(f'STARTING {self.name}',
429 quiet=self.options.quiet,
430 build_id=self.build_id)
Peter Wen1cdf05d82022-04-05 17:31:23431 # This use of preexec_fn is sufficiently simple, just one os.nice call.
432 # pylint: disable=subprocess-popen-preexec-fn
Peter Wencd460ff52021-02-23 22:40:05433 self._proc = subprocess.Popen(
434 self.cmd,
435 stdout=subprocess.PIPE,
436 stderr=subprocess.STDOUT,
437 cwd=self.cwd,
438 env=env,
439 text=True,
440 preexec_fn=lambda: os.nice(19),
441 )
442 self._thread = threading.Thread(
443 target=self._complete_when_process_finishes,
444 args=(on_complete_callback, ))
445 self._thread.start()
446 return 1
Peter Wenf409c0c2021-02-09 19:33:02447
Mohamed Heikal9984e432024-12-03 18:21:40448 def terminate(self, replaced=False):
Peter Wencd460ff52021-02-23 22:40:05449 """Can be called multiple times to cancel and ignore the task's output."""
Peter Wencd460ff52021-02-23 22:40:05450 with self._lock:
451 if self._terminated:
452 return
453 self._terminated = True
Mohamed Heikal9984e432024-12-03 18:21:40454 self._replaced = replaced
Mohamed Heikalb752b772024-11-25 23:05:44455
Peter Wencd460ff52021-02-23 22:40:05456 # It is safe to access _proc and _thread outside of _lock since they are
457 # only changed by self.start holding _lock when self._terminate is false.
458 # Since we have just set self._terminate to true inside of _lock, we know
459 # that neither _proc nor _thread will be changed from this point onwards.
Peter Wen6e7e52b2021-02-13 02:39:28460 if self._proc:
461 self._proc.terminate()
462 self._proc.wait()
Peter Wencd460ff52021-02-23 22:40:05463 # Ensure that self._complete is called either by the thread or by us.
Peter Wen6e7e52b2021-02-13 02:39:28464 if self._thread:
465 self._thread.join()
Peter Wencd460ff52021-02-23 22:40:05466 else:
467 self._complete()
Peter Wenf409c0c2021-02-09 19:33:02468
Peter Wencd460ff52021-02-23 22:40:05469 def _complete_when_process_finishes(self,
470 on_complete_callback: Callable[[], None]):
Peter Wen6e7e52b2021-02-13 02:39:28471 assert self._proc
472 # We know Popen.communicate will return a str and not a byte since it is
473 # constructed with text=True.
474 stdout: str = self._proc.communicate()[0]
475 self._return_code = self._proc.returncode
Mohamed Heikalf746b57f2024-11-13 21:20:17476 TaskStats.remove_process(build_id=self.build_id)
Peter Wen6e7e52b2021-02-13 02:39:28477 self._complete(stdout)
Peter Wencd460ff52021-02-23 22:40:05478 on_complete_callback()
Peter Wenf409c0c2021-02-09 19:33:02479
Peter Wencd460ff52021-02-23 22:40:05480 def _complete(self, stdout: str = ''):
481 """Update the user and ninja after the task has run or been terminated.
482
483 This method should only be run once per task. Avoid modifying the task so
484 that this method does not need locking."""
485
Mohamed Heikal9984e432024-12-03 18:21:40486 delete_stamp = False
Mohamed Heikalf746b57f2024-11-13 21:20:17487 status_string = 'FINISHED'
Peter Wen6e7e52b2021-02-13 02:39:28488 if self._terminated:
Mohamed Heikalf746b57f2024-11-13 21:20:17489 status_string = 'TERMINATED'
Mohamed Heikal9984e432024-12-03 18:21:40490 # When tasks are replaced, avoid deleting the stamp file, context:
491 # https://issuetracker.google.com/301961827.
492 if not self._replaced:
493 delete_stamp = True
494 elif stdout or self._return_code != 0:
495 status_string = 'FAILED'
496 delete_stamp = True
497 preamble = [
498 f'FAILED: {self.name}',
499 f'Return code: {self._return_code}',
Andrew Grieve38c80462024-12-17 21:33:27500 'CMD: ' + shlex.join(self.cmd),
Mohamed Heikal9984e432024-12-03 18:21:40501 'STDOUT:',
502 ]
503
504 message = '\n'.join(preamble + [stdout])
505 log_to_file(message, build_id=self.build_id)
506 log(message, quiet=self.options.quiet)
Mohamed Heikal6b56cf62024-12-10 23:14:55507 if self.tty:
Mohamed Heikal9984e432024-12-03 18:21:40508 # Add emoji to show that output is from the build server.
509 preamble = [f'⏩ {line}' for line in preamble]
Mohamed Heikal6b1ea4fa2024-12-04 03:36:21510 remote_message = '\n'.join(preamble + [stdout])
511 # Add a new line at start of message to clearly delineate from previous
512 # output/text already on the remote tty we are printing to.
513 self.tty.write(f'\n{remote_message}')
Mohamed Heikal9984e432024-12-03 18:21:40514 self.tty.flush()
Mohamed Heikal9984e432024-12-03 18:21:40515 if delete_stamp:
516 # Force siso to consider failed targets as dirty.
517 try:
518 os.unlink(os.path.join(self.cwd, self.stamp_file))
519 except FileNotFoundError:
520 pass
521 else:
522 # We do not care about the action writing a too new mtime. Siso only cares
523 # about the mtime that is recorded in its database at the time the
524 # original action finished.
525 pass
Mohamed Heikalabf646e2024-12-12 16:06:05526 TaskManager.task_done(self)
527 set_status(f'{status_string} {self.name}',
528 quiet=self.options.quiet,
529 build_id=self.build_id)
Peter Wenb1f3b1d2021-02-02 21:30:20530
531
Mohamed Heikalb752b772024-11-25 23:05:44532def _handle_add_task(data, current_tasks: Dict[Tuple[str, str], Task], options):
533 """Handle messages of type ADD_TASK."""
Mohamed Heikalf746b57f2024-11-13 21:20:17534 build_id = data['build_id']
535 task_outdir = data['cwd']
Mohamed Heikal6b56cf62024-12-10 23:14:55536 tty_name = data.get('tty')
Mohamed Heikalb752b772024-11-25 23:05:44537
Mohamed Heikalb752b772024-11-25 23:05:44538 tty = None
Mohamed Heikal6b56cf62024-12-10 23:14:55539 if tty_name:
540 tty = open(tty_name, 'wt')
Mohamed Heikalb752b772024-11-25 23:05:44541 BuildManager.register_tty(build_id, tty)
542
543 # Make sure a logfile for the build_id exists.
544 create_logfile(build_id, task_outdir)
545
546 new_task = Task(name=data['name'],
547 cwd=task_outdir,
548 cmd=data['cmd'],
549 tty=tty,
550 build_id=build_id,
Mohamed Heikalb752b772024-11-25 23:05:44551 stamp_file=data['stamp_file'],
552 options=options)
553 existing_task = current_tasks.get(new_task.key)
Mohamed Heikalf746b57f2024-11-13 21:20:17554 if existing_task:
Mohamed Heikal9984e432024-12-03 18:21:40555 existing_task.terminate(replaced=True)
Mohamed Heikalb752b772024-11-25 23:05:44556 current_tasks[new_task.key] = new_task
557
Mohamed Heikalb752b772024-11-25 23:05:44558 TaskManager.add_task(new_task, options)
Mohamed Heikalf746b57f2024-11-13 21:20:17559
560
561def _handle_query_build(data, connection: socket.socket):
Mohamed Heikalb752b772024-11-25 23:05:44562 """Handle messages of type QUERY_BUILD."""
Mohamed Heikalf746b57f2024-11-13 21:20:17563 build_id = data['build_id']
Andrew Grieved863d0f2024-12-13 20:13:01564 response = TaskStats.query_build(build_id)
Mohamed Heikalf746b57f2024-11-13 21:20:17565 try:
566 with connection:
567 server_utils.SendMessage(connection, json.dumps(response).encode('utf8'))
568 except BrokenPipeError:
569 # We should not die because the client died.
570 pass
571
572
573def _handle_heartbeat(connection: socket.socket):
Mohamed Heikalb752b772024-11-25 23:05:44574 """Handle messages of type POLL_HEARTBEAT."""
Mohamed Heikalf746b57f2024-11-13 21:20:17575 try:
576 with connection:
577 server_utils.SendMessage(connection,
578 json.dumps({
579 'status': 'OK'
580 }).encode('utf8'))
581 except BrokenPipeError:
582 # We should not die because the client died.
583 pass
584
585
Mohamed Heikalb752b772024-11-25 23:05:44586def _handle_register_builder(data):
587 """Handle messages of type REGISTER_BUILDER."""
588 build_id = data['build_id']
589 builder_pid = data['builder_pid']
590 BuildManager.register_builder(build_id, builder_pid)
591
592
593def _handle_cancel_build(data):
594 """Handle messages of type CANCEL_BUILD."""
595 build_id = data['build_id']
596 TaskManager.cancel_build(build_id)
597
598
599def _listen_for_request_data(sock: socket.socket):
600 """Helper to encapsulate getting a new message."""
601 while True:
602 conn = sock.accept()[0]
603 message_bytes = server_utils.ReceiveMessage(conn)
604 if message_bytes:
605 yield json.loads(message_bytes), conn
606
607
Mohamed Heikalabf646e2024-12-12 16:06:05608def _register_cleanup_signal_handlers(options):
609 original_sigint_handler = signal.getsignal(signal.SIGINT)
610 original_sigterm_handler = signal.getsignal(signal.SIGTERM)
611
612 def _cleanup(signum, frame):
613 log('STOPPING SERVER...', quiet=options.quiet)
614 # Gracefully shut down the task manager, terminating all queued tasks.
615 TaskManager.deactivate()
616 log('STOPPED', quiet=options.quiet)
617 if signum == signal.SIGINT:
618 if callable(original_sigint_handler):
619 original_sigint_handler(signum, frame)
620 else:
621 raise KeyboardInterrupt()
622 if signum == signal.SIGTERM:
623 # Sometimes sigterm handler is not a callable.
624 if callable(original_sigterm_handler):
625 original_sigterm_handler(signum, frame)
626 else:
627 sys.exit(1)
628
629 signal.signal(signal.SIGINT, _cleanup)
630 signal.signal(signal.SIGTERM, _cleanup)
631
632
Mohamed Heikalf746b57f2024-11-13 21:20:17633def _process_requests(sock: socket.socket, options):
Mohamed Heikalb752b772024-11-25 23:05:44634 """Main loop for build server receiving request messages."""
Peter Wen6e7e52b2021-02-13 02:39:28635 # Since dicts in python can contain anything, explicitly type tasks to help
636 # make static type checking more useful.
637 tasks: Dict[Tuple[str, str], Task] = {}
Mohamed Heikalf746b57f2024-11-13 21:20:17638 log(
639 'READY... Remember to set android_static_analysis="build_server" in '
640 'args.gn files',
641 quiet=options.quiet)
Mohamed Heikalabf646e2024-12-12 16:06:05642 _register_cleanup_signal_handlers(options)
Mohamed Heikalf746b57f2024-11-13 21:20:17643 # pylint: disable=too-many-nested-blocks
Mohamed Heikalabf646e2024-12-12 16:06:05644 while True:
645 try:
646 for data, connection in _listen_for_request_data(sock):
647 message_type = data.get('message_type', server_utils.ADD_TASK)
648 if message_type == server_utils.POLL_HEARTBEAT:
649 _handle_heartbeat(connection)
650 if message_type == server_utils.ADD_TASK:
651 connection.close()
652 _handle_add_task(data, tasks, options)
653 if message_type == server_utils.QUERY_BUILD:
654 _handle_query_build(data, connection)
655 if message_type == server_utils.REGISTER_BUILDER:
656 connection.close()
657 _handle_register_builder(data)
658 if message_type == server_utils.CANCEL_BUILD:
659 connection.close()
660 _handle_cancel_build(data)
661 except TimeoutError:
662 # If we have not received a new task in a while and do not have any
663 # pending tasks or running builds, then exit. Otherwise keep waiting.
664 if (TaskStats.num_pending_tasks() == 0
665 and not BuildManager.has_live_builds() and options.exit_on_idle):
Mohamed Heikalb752b772024-11-25 23:05:44666 break
Mohamed Heikalabf646e2024-12-12 16:06:05667 except KeyboardInterrupt:
668 break
Mohamed Heikalf746b57f2024-11-13 21:20:17669
670
671def query_build_info(build_id):
Mohamed Heikalb752b772024-11-25 23:05:44672 """Communicates with the main server to query build info."""
Mohamed Heikalf746b57f2024-11-13 21:20:17673 with contextlib.closing(socket.socket(socket.AF_UNIX)) as sock:
674 sock.connect(server_utils.SOCKET_ADDRESS)
Mohamed Heikalb752b772024-11-25 23:05:44675 sock.settimeout(3)
Mohamed Heikalf746b57f2024-11-13 21:20:17676 server_utils.SendMessage(
677 sock,
678 json.dumps({
679 'message_type': server_utils.QUERY_BUILD,
680 'build_id': build_id,
681 }).encode('utf8'))
682 response_bytes = server_utils.ReceiveMessage(sock)
683 return json.loads(response_bytes)
684
685
686def _wait_for_build(build_id):
Mohamed Heikalb752b772024-11-25 23:05:44687 """Comunicates with the main server waiting for a build to complete."""
Mohamed Heikalf746b57f2024-11-13 21:20:17688 start_time = datetime.datetime.now()
689 while True:
Andrew Grieved863d0f2024-12-13 20:13:01690 try:
691 build_info = query_build_info(build_id)['builds'][0]
692 except ConnectionRefusedError:
693 print('No server running. It likely finished all tasks.')
694 print('You can check $OUTDIR/buildserver.log.0 to be sure.')
695 return 0
696
Mohamed Heikalf746b57f2024-11-13 21:20:17697 pending_tasks = build_info['pending_tasks']
Mohamed Heikalf746b57f2024-11-13 21:20:17698
699 if pending_tasks == 0:
700 print(f'\nAll tasks completed for build_id: {build_id}.')
701 return 0
702
703 current_time = datetime.datetime.now()
704 duration = current_time - start_time
705 print(f'\rWaiting for {pending_tasks} tasks [{str(duration)}]\033[K',
706 end='',
707 flush=True)
708 time.sleep(1)
709
710
711def _check_if_running():
Mohamed Heikalb752b772024-11-25 23:05:44712 """Communicates with the main server to make sure its running."""
Mohamed Heikalf746b57f2024-11-13 21:20:17713 with socket.socket(socket.AF_UNIX) as sock:
714 try:
715 sock.connect(server_utils.SOCKET_ADDRESS)
716 except socket.error:
717 print('Build server is not running and '
718 'android_static_analysis="build_server" is set.\nPlease run '
719 'this command in a separate terminal:\n\n'
720 '$ build/android/fast_local_dev_server.py\n')
721 return 1
722 else:
723 return 0
724
725
Mohamed Heikalb752b772024-11-25 23:05:44726def _send_message_and_close(message_dict):
727 with contextlib.closing(socket.socket(socket.AF_UNIX)) as sock:
728 sock.connect(server_utils.SOCKET_ADDRESS)
729 sock.settimeout(3)
730 server_utils.SendMessage(sock, json.dumps(message_dict).encode('utf8'))
731
732
733def _send_cancel_build(build_id):
734 _send_message_and_close({
735 'message_type': server_utils.CANCEL_BUILD,
736 'build_id': build_id,
737 })
738 return 0
739
740
741def _register_builder(build_id, builder_pid):
742 for _attempt in range(3):
743 try:
744 _send_message_and_close({
745 'message_type': server_utils.REGISTER_BUILDER,
746 'build_id': build_id,
747 'builder_pid': builder_pid,
748 })
749 return 0
750 except socket.error:
751 time.sleep(0.05)
752 print(f'Failed to register builer for build_id={build_id}.')
753 return 1
754
755
Andrew Grieved863d0f2024-12-13 20:13:01756def _print_build_status_all():
757 try:
758 query_data = query_build_info(None)
759 except ConnectionRefusedError:
760 print('No server running. Consult $OUTDIR/buildserver.log.0')
761 return 0
762 builds = query_data['builds']
763 pid = query_data['pid']
764 all_active_tasks = []
765 print(f'Build server (PID={pid}) has {len(builds)} registered builds')
766 for build_info in builds:
767 build_id = build_info['build_id']
768 pending_tasks = build_info['pending_tasks']
769 completed_tasks = build_info['completed_tasks']
770 active_tasks = build_info['active_tasks']
771 out_dir = build_info['outdir']
772 active = build_info['is_active']
773 total_tasks = pending_tasks + completed_tasks
774 all_active_tasks += active_tasks
775 if total_tasks == 0 and not active:
776 status = 'Finished without any jobs'
777 else:
778 if active:
779 status = 'Siso still running'
780 else:
781 status = 'Siso finished'
782 if out_dir:
783 status += f' in {out_dir}'
784 status += f'. Completed [{completed_tasks}/{total_tasks}].'
785 if completed_tasks < total_tasks:
786 status += f' {len(active_tasks)} tasks currently executing'
787 print(f'{build_id}: {status}')
788 if all_active_tasks:
789 total = len(all_active_tasks)
790 to_show = min(4, total)
791 print(f'Currently executing (showing {to_show} of {total}):')
792 for cmd in sorted(all_active_tasks)[:to_show]:
793 truncated = shlex.join(cmd)
794 if len(truncated) > 200:
795 truncated = truncated[:200] + '...'
796 print(truncated)
797 return 0
798
799
Mohamed Heikal6b56cf62024-12-10 23:14:55800def _print_build_status(build_id):
Andrew Grieved863d0f2024-12-13 20:13:01801 try:
802 build_info = query_build_info(build_id)['builds'][0]
803 except ConnectionRefusedError:
804 print('No server running. Consult $OUTDIR/buildserver.log.0')
805 return 0
Mohamed Heikal6b56cf62024-12-10 23:14:55806 pending_tasks = build_info['pending_tasks']
807 completed_tasks = build_info['completed_tasks']
808 total_tasks = pending_tasks + completed_tasks
809
810 # Print nothing if we never got any tasks.
811 if completed_tasks:
812 if pending_tasks:
813 print('Build server is still running in the background. ' +
814 f'[{completed_tasks}/{total_tasks}] Tasks Done.')
815 print('Run this to wait for the pending tasks:')
816 server_path = os.path.relpath(str(server_utils.SERVER_SCRIPT))
817 print(' '.join([server_path, '--wait-for-build', build_id]))
818 else:
819 print('Build Server is done with all background tasks. ' +
Andrew Grieved863d0f2024-12-13 20:13:01820 f'Completed [{completed_tasks}/{total_tasks}].')
Mohamed Heikal6b56cf62024-12-10 23:14:55821 return 0
822
823
Mohamed Heikalf746b57f2024-11-13 21:20:17824def _wait_for_task_requests(args):
825 with socket.socket(socket.AF_UNIX) as sock:
826 sock.settimeout(_SOCKET_TIMEOUT)
827 try:
828 sock.bind(server_utils.SOCKET_ADDRESS)
829 except socket.error as e:
830 # errno 98 is Address already in use
831 if e.errno == 98:
Mohamed Heikalf746b57f2024-11-13 21:20:17832 return 1
833 raise
834 sock.listen()
835 _process_requests(sock, args)
836 return 0
Peter Wenb1f3b1d2021-02-02 21:30:20837
838
839def main():
Andrew Grieved863d0f2024-12-13 20:13:01840 # pylint: disable=too-many-return-statements
Peter Wenf409c0c2021-02-09 19:33:02841 parser = argparse.ArgumentParser(description=__doc__)
Peter Wend70f4862022-02-02 16:00:16842 parser.add_argument(
843 '--fail-if-not-running',
844 action='store_true',
845 help='Used by GN to fail fast if the build server is not running.')
Mohamed Heikalf746b57f2024-11-13 21:20:17846 parser.add_argument(
847 '--exit-on-idle',
848 action='store_true',
849 help='Server started on demand. Exit when all tasks run out.')
850 parser.add_argument('--quiet',
851 action='store_true',
852 help='Do not output status updates.')
853 parser.add_argument('--wait-for-build',
854 metavar='BUILD_ID',
855 help='Wait for build server to finish with all tasks '
856 'for BUILD_ID and output any pending messages.')
Mohamed Heikal6b56cf62024-12-10 23:14:55857 parser.add_argument('--print-status',
858 metavar='BUILD_ID',
859 help='Print the current state of a build.')
Andrew Grieved863d0f2024-12-13 20:13:01860 parser.add_argument('--print-status-all',
861 action='store_true',
862 help='Print the current state of all active builds.')
Mohamed Heikalb752b772024-11-25 23:05:44863 parser.add_argument(
864 '--register-build-id',
865 metavar='BUILD_ID',
866 help='Inform the build server that a new build has started.')
867 parser.add_argument('--builder-pid',
868 help='Builder process\'s pid for build BUILD_ID.')
869 parser.add_argument('--cancel-build',
870 metavar='BUILD_ID',
871 help='Cancel all pending and running tasks for BUILD_ID.')
Peter Wend70f4862022-02-02 16:00:16872 args = parser.parse_args()
873 if args.fail_if_not_running:
Mohamed Heikalf746b57f2024-11-13 21:20:17874 return _check_if_running()
875 if args.wait_for_build:
876 return _wait_for_build(args.wait_for_build)
Mohamed Heikal6b56cf62024-12-10 23:14:55877 if args.print_status:
878 return _print_build_status(args.print_status)
Andrew Grieved863d0f2024-12-13 20:13:01879 if args.print_status_all:
880 return _print_build_status_all()
Mohamed Heikalb752b772024-11-25 23:05:44881 if args.register_build_id:
882 return _register_builder(args.register_build_id, args.builder_pid)
883 if args.cancel_build:
884 return _send_cancel_build(args.cancel_build)
Mohamed Heikalf746b57f2024-11-13 21:20:17885 return _wait_for_task_requests(args)
Peter Wenb1f3b1d2021-02-02 21:30:20886
887
888if __name__ == '__main__':
Mohamed Heikalb752b772024-11-25 23:05:44889 sys.excepthook = _exception_hook
Peter Wenb1f3b1d2021-02-02 21:30:20890 sys.exit(main())