blob: b9f6caaa4c51d55cb2db1b718c3f0a2426f46134 [file] [log] [blame]
Peter Wenb1f3b1d2021-02-02 21:30:201#!/usr/bin/env python3
Avi Drissman73a09d12022-09-08 20:33:382# Copyright 2021 The Chromium Authors
Peter Wenb1f3b1d2021-02-02 21:30:203# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""Creates an server to offload non-critical-path GN targets."""
6
Peter Wencd460ff52021-02-23 22:40:057from __future__ import annotations
8
Peter Wenb1f3b1d2021-02-02 21:30:209import argparse
Mohamed Heikalf746b57f2024-11-13 21:20:1710import collections
11import contextlib
Andrew Grieve0d6e8a752025-02-05 21:20:5012import dataclasses
Mohamed Heikalf746b57f2024-11-13 21:20:1713import datetime
Peter Wenb1f3b1d2021-02-02 21:30:2014import os
Mohamed Heikalf746b57f2024-11-13 21:20:1715import pathlib
Mohamed Heikalf746b57f2024-11-13 21:20:1716import re
Mohamed Heikalabf646e2024-12-12 16:06:0517import signal
Andrew Grieved863d0f2024-12-13 20:13:0118import shlex
Peter Wen6e7e52b2021-02-13 02:39:2819import shutil
Peter Wenb1f3b1d2021-02-02 21:30:2020import socket
21import subprocess
22import sys
Peter Wenf409c0c2021-02-09 19:33:0223import threading
Mohamed Heikalb752b772024-11-25 23:05:4424import traceback
Mohamed Heikalf746b57f2024-11-13 21:20:1725import time
26from typing import Callable, Dict, List, Optional, Tuple, IO
Peter Wenb1f3b1d2021-02-02 21:30:2027
28sys.path.append(os.path.join(os.path.dirname(__file__), 'gyp'))
29from util import server_utils
30
Mohamed Heikalabf646e2024-12-12 16:06:0531_SOCKET_TIMEOUT = 60 # seconds
Peter Wen6e7e52b2021-02-13 02:39:2832
Mohamed Heikalf746b57f2024-11-13 21:20:1733_LOGFILE_NAME = 'buildserver.log'
34_MAX_LOGFILES = 6
35
Andrew Grieve0d6e8a752025-02-05 21:20:5036FIRST_LOG_LINE = """\
37#### Start of log for build: {build_id}
38#### CWD: {outdir}
39"""
40BUILD_ID_RE = re.compile(r'^#### Start of log for build: (?P<build_id>.+)')
Mohamed Heikalf746b57f2024-11-13 21:20:1741
42
Mohamed Heikal3b8c9552025-02-11 22:33:4043def server_log(msg: str):
44 if OptionsManager.is_quiet():
Mohamed Heikalb752b772024-11-25 23:05:4445 return
46 # Ensure we start our message on a new line.
Mohamed Heikal9984e432024-12-03 18:21:4047 print('\n' + msg)
Mohamed Heikalb752b772024-11-25 23:05:4448
49
Mohamed Heikal3b8c9552025-02-11 22:33:4050def print_status(prefix: str, msg: str):
Mohamed Heikalf746b57f2024-11-13 21:20:1751 # No need to also output to the terminal if quiet.
Mohamed Heikal3b8c9552025-02-11 22:33:4052 if OptionsManager.is_quiet():
Mohamed Heikalf746b57f2024-11-13 21:20:1753 return
Peter Wencd460ff52021-02-23 22:40:0554 # Shrink the message (leaving a 2-char prefix and use the rest of the room
55 # for the suffix) according to terminal size so it is always one line.
56 width = shutil.get_terminal_size().columns
Peter Wencd460ff52021-02-23 22:40:0557 max_msg_width = width - len(prefix)
58 if len(msg) > max_msg_width:
59 length_to_show = max_msg_width - 5 # Account for ellipsis and header.
60 msg = f'{msg[:2]}...{msg[-length_to_show:]}'
61 # \r to return the carriage to the beginning of line.
62 # \033[K to replace the normal \n to erase until the end of the line.
63 # Avoid the default line ending so the next \r overwrites the same line just
64 # like ninja's output.
Mohamed Heikal9984e432024-12-03 18:21:4065 print(f'\r{prefix}{msg}\033[K', end='', flush=True)
Peter Wencd460ff52021-02-23 22:40:0566
67
Mohamed Heikalb752b772024-11-25 23:05:4468def _exception_hook(exctype: type, exc: Exception, tb):
Mohamed Heikald764eca2025-01-31 01:06:3569 # Let KeyboardInterrupt through.
70 if issubclass(exctype, KeyboardInterrupt):
71 sys.__excepthook__(exctype, exc, tb)
72 return
73 stacktrace = ''.join(traceback.format_exception(exctype, exc, tb))
74 stacktrace_lines = [f'\n⛔{line}' for line in stacktrace.splitlines()]
Mohamed Heikalb752b772024-11-25 23:05:4475 # Output uncaught exceptions to all live terminals
Andrew Grieve0d6e8a752025-02-05 21:20:5076 # Extra newline since siso's output often erases the current line.
77 BuildManager.broadcast(''.join(stacktrace_lines) + '\n')
Mohamed Heikal9984e432024-12-03 18:21:4078 # Cancel all pending tasks cleanly (i.e. delete stamp files if necessary).
79 TaskManager.deactivate()
Mohamed Heikal3b8c9552025-02-11 22:33:4080 # Reset all remote terminal titles.
81 BuildManager.update_remote_titles('')
82
83
84# Stores global options so as to not keep passing along and storing options
85# everywhere.
86class OptionsManager:
Andrew Grievef12da2c62025-06-24 14:07:5187 _quiet = None
88 _should_remote_print = None
Mohamed Heikal3b8c9552025-02-11 22:33:4089
90 @classmethod
Andrew Grievef12da2c62025-06-24 14:07:5191 def set_options(cls, *, quiet, should_remote_print):
92 cls._quiet = quiet
93 cls._should_remote_print = should_remote_print
Mohamed Heikal3b8c9552025-02-11 22:33:4094
95 @classmethod
96 def is_quiet(cls):
Andrew Grievef12da2c62025-06-24 14:07:5197 assert cls._quiet is not None
98 return cls._quiet
Mohamed Heikalb752b772024-11-25 23:05:4499
Mohamed Heikalf73b717a2025-02-12 15:53:07100 @classmethod
101 def should_remote_print(cls):
Andrew Grievef12da2c62025-06-24 14:07:51102 assert cls._should_remote_print is not None
103 return cls._should_remote_print
Mohamed Heikalf73b717a2025-02-12 15:53:07104
Mohamed Heikalb752b772024-11-25 23:05:44105
Mohamed Heikal08b467e02025-01-27 20:54:25106class LogfileManager:
Mohamed Heikalf73b717a2025-02-12 15:53:07107 _logfiles: dict[str, IO[str]] = {}
108 _lock = threading.RLock()
Mohamed Heikal08b467e02025-01-27 20:54:25109
110 @classmethod
Mohamed Heikal08b467e02025-01-27 20:54:25111 def create_logfile(cls, build_id, outdir):
Mohamed Heikalf73b717a2025-02-12 15:53:07112 with cls._lock:
113 if logfile := cls._logfiles.get(build_id, None):
114 return logfile
115
116 outdir = pathlib.Path(outdir)
117 latest_logfile = outdir / f'{_LOGFILE_NAME}.0'
118
119 if latest_logfile.exists():
120 with latest_logfile.open('rt') as f:
121 first_line = f.readline()
122 if log_build_id := BUILD_ID_RE.search(first_line):
123 # If the newest logfile on disk is referencing the same build we are
124 # currently processing, we probably crashed previously and we should
125 # pick up where we left off in the same logfile.
126 if log_build_id.group('build_id') == build_id:
127 cls._logfiles[build_id] = latest_logfile.open('at')
128 return cls._logfiles[build_id]
129
130 # Do the logfile name shift.
131 filenames = os.listdir(outdir)
132 logfiles = {f for f in filenames if f.startswith(_LOGFILE_NAME)}
133 for idx in reversed(range(_MAX_LOGFILES)):
134 current_name = f'{_LOGFILE_NAME}.{idx}'
135 next_name = f'{_LOGFILE_NAME}.{idx+1}'
136 if current_name in logfiles:
137 shutil.move(os.path.join(outdir, current_name),
138 os.path.join(outdir, next_name))
139
140 # Create a new 0th logfile.
141 logfile = latest_logfile.open('wt')
142 logfile.write(FIRST_LOG_LINE.format(build_id=build_id, outdir=outdir))
143 logfile.flush()
144 cls._logfiles[build_id] = logfile
Mohamed Heikal08b467e02025-01-27 20:54:25145 return logfile
146
Mohamed Heikalf746b57f2024-11-13 21:20:17147
Peter Wencd460ff52021-02-23 22:40:05148class TaskStats:
149 """Class to keep track of aggregate stats for all tasks across threads."""
150 _num_processes = 0
151 _completed_tasks = 0
152 _total_tasks = 0
Andrew Grieved863d0f2024-12-13 20:13:01153 _lock = threading.RLock()
Peter Wen6e7e52b2021-02-13 02:39:28154
155 @classmethod
Peter Wencd460ff52021-02-23 22:40:05156 def no_running_processes(cls):
Mohamed Heikalf746b57f2024-11-13 21:20:17157 with cls._lock:
158 return cls._num_processes == 0
Peter Wen6e7e52b2021-02-13 02:39:28159
160 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40161 def add_task(cls):
Mohamed Heikalf746b57f2024-11-13 21:20:17162 with cls._lock:
163 cls._total_tasks += 1
Peter Wencd460ff52021-02-23 22:40:05164
165 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40166 def add_process(cls):
Peter Wencd460ff52021-02-23 22:40:05167 with cls._lock:
168 cls._num_processes += 1
169
170 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40171 def remove_process(cls):
Peter Wencd460ff52021-02-23 22:40:05172 with cls._lock:
173 cls._num_processes -= 1
174
175 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40176 def complete_task(cls):
Peter Wencd460ff52021-02-23 22:40:05177 with cls._lock:
178 cls._completed_tasks += 1
179
180 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40181 def num_pending_tasks(cls):
Mohamed Heikalf746b57f2024-11-13 21:20:17182 with cls._lock:
Mohamed Heikalf746b57f2024-11-13 21:20:17183 return cls._total_tasks - cls._completed_tasks
184
185 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40186 def num_completed_tasks(cls):
Mohamed Heikalf746b57f2024-11-13 21:20:17187 with cls._lock:
Mohamed Heikalf746b57f2024-11-13 21:20:17188 return cls._completed_tasks
189
190 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40191 def total_tasks(cls):
Andrew Grieve6c764fff2025-01-30 21:02:03192 with cls._lock:
Andrew Grieve6c764fff2025-01-30 21:02:03193 return cls._total_tasks
194
195 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40196 def get_title_message(cls):
Andrew Grieved863d0f2024-12-13 20:13:01197 with cls._lock:
Mohamed Heikal3b8c9552025-02-11 22:33:40198 return f'Analysis Steps: {cls._completed_tasks}/{cls._total_tasks}'
199
200 @classmethod
201 def query_build(cls, query_build_id: str = None):
202 builds = []
203 if query_build_id:
204 if build := BuildManager.get_build(query_build_id):
205 builds.append(build)
206 else:
207 builds = BuildManager.get_all_builds()
208 build_infos = []
209 for build in builds:
210 build_infos.append(build.query_build_info())
211 return {
212 'pid': os.getpid(),
213 'builds': build_infos,
214 }
Andrew Grieved863d0f2024-12-13 20:13:01215
216 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17217 def prefix(cls, build_id: str = None):
Peter Wen6e7e52b2021-02-13 02:39:28218 # Ninja's prefix is: [205 processes, 6/734 @ 6.5/s : 0.922s ]
219 # Time taken and task completion rate are not important for the build server
220 # since it is always running in the background and uses idle priority for
221 # its tasks.
Peter Wencd460ff52021-02-23 22:40:05222 with cls._lock:
Mohamed Heikalf746b57f2024-11-13 21:20:17223 if build_id:
Mohamed Heikal3b8c9552025-02-11 22:33:40224 build = BuildManager.get_build(build_id)
225 _num_processes = build.process_count()
226 _completed_tasks = build.completed_task_count()
227 _total_tasks = build.total_task_count()
Mohamed Heikalf746b57f2024-11-13 21:20:17228 else:
229 _num_processes = cls._num_processes
230 _completed_tasks = cls._completed_tasks
231 _total_tasks = cls._total_tasks
232 word = 'process' if _num_processes == 1 else 'processes'
233 return (f'{_num_processes} {word}, '
234 f'{_completed_tasks}/{_total_tasks}')
Peter Wenb1f3b1d2021-02-02 21:30:20235
Peter Wenf409c0c2021-02-09 19:33:02236
Mohamed Heikalb752b772024-11-25 23:05:44237def check_pid_alive(pid: int):
238 try:
239 os.kill(pid, 0)
240 except OSError:
241 return False
242 return True
243
244
Andrew Grieve0d6e8a752025-02-05 21:20:50245@dataclasses.dataclass
246class Build:
247 id: str
248 pid: int
249 env: dict
Andrew Grieve0d6e8a752025-02-05 21:20:50250 stdout: IO[str]
251 cwd: Optional[str] = None
Mohamed Heikal3b8c9552025-02-11 22:33:40252 _logfile: Optional[IO[str]] = None
253 _is_ninja_alive: bool = True
254 _tasks: List[Task] = dataclasses.field(default_factory=list)
255 _completed_task_count = 0
256 _active_process_count = 0
257 _lock: threading.RLock = dataclasses.field(default_factory=threading.RLock,
258 repr=False,
259 init=False)
Andrew Grieve0d6e8a752025-02-05 21:20:50260
Mohamed Heikal3b8c9552025-02-11 22:33:40261 def __hash__(self):
262 return hash((self.id, self.pid, self.cwd))
263
264 def add_task(self, task: Task):
265 self._status_update(f'QUEUED {task.name}')
266 with self._lock:
267 self._tasks.append(task)
268 TaskStats.add_task()
269 TaskManager.add_task(task)
270
271 def add_process(self, task: Task):
272 self._status_update(f'STARTING {task.name}')
273 with self._lock:
274 self._active_process_count += 1
275 TaskStats.add_process()
276
277 def task_done(self, task: Task, status_string: str):
278 self._status_update(f'{status_string} {task.name}')
279 TaskStats.complete_task()
280 TaskManager.task_done(task)
281 with self._lock:
282 self._completed_task_count += 1
283
284 # We synchronize all terminal title info rather than having it per build
285 # since if two builds are happening in the same terminal concurrently, both
Mohamed Heikalf73b717a2025-02-12 15:53:07286 # builds will be overriding each other's titles continuously. Usually we
287 # only have the one build anyways so it should equivalent in most cases.
Mohamed Heikal3b8c9552025-02-11 22:33:40288 BuildManager.update_remote_titles()
Mohamed Heikalf73b717a2025-02-12 15:53:07289 with self._lock:
290 if not self.is_active():
291 self._logfile.close()
292 # Reset in case its the last build.
293 BuildManager.update_remote_titles('')
Mohamed Heikal3b8c9552025-02-11 22:33:40294
295 def process_complete(self):
296 with self._lock:
297 self._active_process_count -= 1
298 TaskStats.remove_process()
299
300 def ensure_logfile(self):
Mohamed Heikalf73b717a2025-02-12 15:53:07301 with self._lock:
302 if not self._logfile:
303 assert self.cwd is not None
304 self._logfile = LogfileManager.create_logfile(self.id, self.cwd)
Mohamed Heikal3b8c9552025-02-11 22:33:40305
306 def log(self, message: str):
Mohamed Heikalf73b717a2025-02-12 15:53:07307 with self._lock:
308 self.ensure_logfile()
309 if self._logfile.closed:
310 # BuildManager#broadcast can call log after the build is done and the
311 # log is closed. Might make sense to separate out that flow so we can
312 # raise an exception here otherwise.
313 return
314 print(message, file=self._logfile, flush=True)
Mohamed Heikal3b8c9552025-02-11 22:33:40315
316 def _status_update(self, status_message):
317 prefix = f'[{TaskStats.prefix(self.id)}] '
318 self.log(f'{prefix}{status_message}')
319 print_status(prefix, status_message)
320
321 def total_task_count(self):
322 with self._lock:
323 return len(self._tasks)
324
325 def completed_task_count(self):
326 with self._lock:
327 return self._completed_task_count
328
329 def pending_task_count(self):
330 with self._lock:
331 return self.total_task_count() - self.completed_task_count()
332
333 def process_count(self):
334 with self._lock:
335 return self._active_process_count
336
337 def is_active(self):
338 if self.pending_task_count() > 0:
339 return True
340 # Ninja is not coming back to life so only check on it if last we checked it
341 # was still alive.
342 if self._is_ninja_alive:
343 self._is_ninja_alive = check_pid_alive(self.pid)
344 return self._is_ninja_alive
345
346 def query_build_info(self):
347 current_tasks = TaskManager.get_current_tasks(self.id)
348 return {
349 'build_id': self.id,
350 'is_active': self.is_active(),
351 'completed_tasks': self.completed_task_count(),
352 'pending_tasks': self.pending_task_count(),
353 'active_tasks': [t.cmd for t in current_tasks],
354 'outdir': self.cwd,
355 }
Andrew Grieve0d6e8a752025-02-05 21:20:50356
357
Mohamed Heikalb752b772024-11-25 23:05:44358class BuildManager:
Andrew Grieve0d6e8a752025-02-05 21:20:50359 _builds_by_id: dict[str, Build] = dict()
Mohamed Heikal3b8c9552025-02-11 22:33:40360 _cached_ttys: dict[(int, int), tuple[IO[str], bool]] = dict()
Mohamed Heikalb752b772024-11-25 23:05:44361 _lock = threading.RLock()
362
363 @classmethod
Andrew Grieve0d6e8a752025-02-05 21:20:50364 def register_builder(cls, env, pid, cwd):
365 build_id = env['AUTONINJA_BUILD_ID']
366 stdout = cls.open_tty(env['AUTONINJA_STDOUT_NAME'])
367 # Tells the script not to re-delegate to build server.
368 env[server_utils.BUILD_SERVER_ENV_VARIABLE] = '1'
369
Mohamed Heikalb752b772024-11-25 23:05:44370 with cls._lock:
Andrew Grieve0d6e8a752025-02-05 21:20:50371 build = Build(id=build_id,
372 pid=pid,
373 cwd=cwd,
374 env=env,
Andrew Grieve0d6e8a752025-02-05 21:20:50375 stdout=stdout)
Andrew Grieve0d6e8a752025-02-05 21:20:50376 cls.maybe_init_cwd(build, cwd)
377 cls._builds_by_id[build_id] = build
Mohamed Heikal3b8c9552025-02-11 22:33:40378 cls.update_remote_titles()
Andrew Grieve0d6e8a752025-02-05 21:20:50379
380 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40381 def maybe_init_cwd(cls, build: Build, cwd: str):
Andrew Grieve0d6e8a752025-02-05 21:20:50382 if cwd is not None:
383 with cls._lock:
384 if build.cwd is None:
385 build.cwd = cwd
Andrew Grieve0d6e8a752025-02-05 21:20:50386 else:
Mohamed Heikal3b8c9552025-02-11 22:33:40387 assert pathlib.Path(cwd).samefile(
388 build.cwd), f'{repr(cwd)} != {repr(build.cwd)}'
Mohamed Heikaleb1a1dc2025-02-27 17:16:16389 build.ensure_logfile()
Andrew Grieve0d6e8a752025-02-05 21:20:50390
391 @classmethod
392 def get_build(cls, build_id):
393 with cls._lock:
Mohamed Heikal3b8c9552025-02-11 22:33:40394 return cls._builds_by_id.get(build_id, None)
Mohamed Heikalb752b772024-11-25 23:05:44395
396 @classmethod
Mohamed Heikal08b467e02025-01-27 20:54:25397 def open_tty(cls, tty_path):
398 # Do not open the same tty multiple times. Use st_ino and st_dev to compare
399 # file descriptors.
Andrew Grieve0d6e8a752025-02-05 21:20:50400 tty = open(tty_path, 'at')
Mohamed Heikaldb4fd9c2025-01-29 20:56:27401 st = os.stat(tty.fileno())
Mohamed Heikal08b467e02025-01-27 20:54:25402 tty_key = (st.st_ino, st.st_dev)
Mohamed Heikalb752b772024-11-25 23:05:44403 with cls._lock:
Mohamed Heikal08b467e02025-01-27 20:54:25404 # Dedupes ttys
405 if tty_key not in cls._cached_ttys:
406 # TTYs are kept open for the lifetime of the server so that broadcast
407 # messages (e.g. uncaught exceptions) can be sent to them even if they
408 # are not currently building anything.
Mohamed Heikal3b8c9552025-02-11 22:33:40409 cls._cached_ttys[tty_key] = (tty, tty.isatty())
Mohamed Heikaldb4fd9c2025-01-29 20:56:27410 else:
411 tty.close()
Mohamed Heikal3b8c9552025-02-11 22:33:40412 return cls._cached_ttys[tty_key][0]
Mohamed Heikalb752b772024-11-25 23:05:44413
414 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40415 def get_active_builds(cls) -> List[Build]:
416 builds = cls.get_all_builds()
417 return list(build for build in builds if build.is_active())
418
419 @classmethod
420 def get_all_builds(cls) -> List[Build]:
Mohamed Heikalb752b772024-11-25 23:05:44421 with cls._lock:
Andrew Grieve0d6e8a752025-02-05 21:20:50422 return list(cls._builds_by_id.values())
Mohamed Heikalb752b772024-11-25 23:05:44423
424 @classmethod
425 def broadcast(cls, msg: str):
Mohamed Heikalb752b772024-11-25 23:05:44426 with cls._lock:
Mohamed Heikal3b8c9552025-02-11 22:33:40427 ttys = list(cls._cached_ttys.values())
Mohamed Heikalf73b717a2025-02-12 15:53:07428 builds = list(cls._builds_by_id.values())
429 if OptionsManager.should_remote_print():
430 for tty, _unused in ttys:
431 try:
432 tty.write(msg + '\n')
433 tty.flush()
434 except BrokenPipeError:
435 pass
436 for build in builds:
437 build.log(msg)
Mohamed Heikald764eca2025-01-31 01:06:35438 # Write to the current terminal if we have not written to it yet.
439 st = os.stat(sys.stderr.fileno())
440 stderr_key = (st.st_ino, st.st_dev)
441 if stderr_key not in cls._cached_ttys:
442 print(msg, file=sys.stderr)
Mohamed Heikalb752b772024-11-25 23:05:44443
444 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40445 def update_remote_titles(cls, new_title=None):
446 if new_title is None:
447 if not cls.has_active_builds() and TaskStats.num_pending_tasks() == 0:
448 # Setting an empty title causes most terminals to go back to the
449 # default title (and at least prevents the tab title from being
450 # "Analysis Steps: N/N" forevermore.
451 new_title = ''
452 else:
453 new_title = TaskStats.get_title_message()
454
455 with cls._lock:
456 ttys = list(cls._cached_ttys.values())
457 for tty, isatty in ttys:
458 if isatty:
459 try:
460 tty.write(f'\033]2;{new_title}\007')
461 tty.flush()
462 except BrokenPipeError:
463 pass
464
465 @classmethod
466 def has_active_builds(cls):
467 return bool(cls.get_active_builds())
Mohamed Heikalb752b772024-11-25 23:05:44468
469
Peter Wencd460ff52021-02-23 22:40:05470class TaskManager:
471 """Class to encapsulate a threadsafe queue and handle deactivating it."""
Mohamed Heikalb752b772024-11-25 23:05:44472 _queue: collections.deque[Task] = collections.deque()
Mohamed Heikalabf646e2024-12-12 16:06:05473 _current_tasks: set[Task] = set()
Mohamed Heikalf746b57f2024-11-13 21:20:17474 _deactivated = False
Mohamed Heikalb752b772024-11-25 23:05:44475 _lock = threading.RLock()
Peter Wencd460ff52021-02-23 22:40:05476
Mohamed Heikalf746b57f2024-11-13 21:20:17477 @classmethod
Mohamed Heikal3b8c9552025-02-11 22:33:40478 def add_task(cls, task: Task):
Mohamed Heikalf746b57f2024-11-13 21:20:17479 assert not cls._deactivated
Mohamed Heikalb752b772024-11-25 23:05:44480 with cls._lock:
481 cls._queue.appendleft(task)
Mohamed Heikalf746b57f2024-11-13 21:20:17482 cls._maybe_start_tasks()
Peter Wencd460ff52021-02-23 22:40:05483
Mohamed Heikalf746b57f2024-11-13 21:20:17484 @classmethod
Mohamed Heikalabf646e2024-12-12 16:06:05485 def task_done(cls, task: Task):
Mohamed Heikalabf646e2024-12-12 16:06:05486 with cls._lock:
Mohamed Heikal651c9922025-01-16 19:12:21487 cls._current_tasks.discard(task)
Mohamed Heikalabf646e2024-12-12 16:06:05488
489 @classmethod
Andrew Grieved863d0f2024-12-13 20:13:01490 def get_current_tasks(cls, build_id):
491 with cls._lock:
Andrew Grieve0d6e8a752025-02-05 21:20:50492 return [t for t in cls._current_tasks if t.build.id == build_id]
Andrew Grieved863d0f2024-12-13 20:13:01493
494 @classmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17495 def deactivate(cls):
496 cls._deactivated = True
Mohamed Heikalabf646e2024-12-12 16:06:05497 tasks_to_terminate: list[Task] = []
Mohamed Heikalb752b772024-11-25 23:05:44498 with cls._lock:
499 while cls._queue:
500 task = cls._queue.pop()
Mohamed Heikalabf646e2024-12-12 16:06:05501 tasks_to_terminate.append(task)
502 # Cancel possibly running tasks.
503 tasks_to_terminate.extend(cls._current_tasks)
504 # Terminate outside lock since task threads need the lock to finish
505 # terminating.
506 for task in tasks_to_terminate:
507 task.terminate()
Mohamed Heikalb752b772024-11-25 23:05:44508
509 @classmethod
510 def cancel_build(cls, build_id):
Mohamed Heikalabf646e2024-12-12 16:06:05511 terminated_pending_tasks: list[Task] = []
512 terminated_current_tasks: list[Task] = []
Mohamed Heikalb752b772024-11-25 23:05:44513 with cls._lock:
Mohamed Heikalabf646e2024-12-12 16:06:05514 # Cancel pending tasks.
Mohamed Heikalb752b772024-11-25 23:05:44515 for task in cls._queue:
Andrew Grieve0d6e8a752025-02-05 21:20:50516 if task.build.id == build_id:
Mohamed Heikalabf646e2024-12-12 16:06:05517 terminated_pending_tasks.append(task)
518 for task in terminated_pending_tasks:
Mohamed Heikalb752b772024-11-25 23:05:44519 cls._queue.remove(task)
Mohamed Heikalabf646e2024-12-12 16:06:05520 # Cancel running tasks.
521 for task in cls._current_tasks:
Andrew Grieve0d6e8a752025-02-05 21:20:50522 if task.build.id == build_id:
Mohamed Heikalabf646e2024-12-12 16:06:05523 terminated_current_tasks.append(task)
524 # Terminate tasks outside lock since task threads need the lock to finish
525 # terminating.
526 for task in terminated_pending_tasks:
527 task.terminate()
528 for task in terminated_current_tasks:
529 task.terminate()
Peter Wencd460ff52021-02-23 22:40:05530
531 @staticmethod
Mohamed Heikalf746b57f2024-11-13 21:20:17532 # pylint: disable=inconsistent-return-statements
Peter Wencd460ff52021-02-23 22:40:05533 def _num_running_processes():
534 with open('/proc/stat') as f:
535 for line in f:
536 if line.startswith('procs_running'):
537 return int(line.rstrip().split()[1])
538 assert False, 'Could not read /proc/stat'
539
Mohamed Heikalf746b57f2024-11-13 21:20:17540 @classmethod
541 def _maybe_start_tasks(cls):
542 if cls._deactivated:
Peter Wencd460ff52021-02-23 22:40:05543 return
544 # Include load avg so that a small dip in the number of currently running
545 # processes will not cause new tasks to be started while the overall load is
546 # heavy.
Mohamed Heikalf746b57f2024-11-13 21:20:17547 cur_load = max(cls._num_running_processes(), os.getloadavg()[0])
Peter Wencd460ff52021-02-23 22:40:05548 num_started = 0
549 # Always start a task if we don't have any running, so that all tasks are
550 # eventually finished. Try starting up tasks when the overall load is light.
551 # Limit to at most 2 new tasks to prevent ramping up too fast. There is a
552 # chance where multiple threads call _maybe_start_tasks and each gets to
553 # spawn up to 2 new tasks, but since the only downside is some build tasks
554 # get worked on earlier rather than later, it is not worth mitigating.
555 while num_started < 2 and (TaskStats.no_running_processes()
556 or num_started + cur_load < os.cpu_count()):
Mohamed Heikalb752b772024-11-25 23:05:44557 with cls._lock:
558 try:
559 next_task = cls._queue.pop()
Mohamed Heikalabf646e2024-12-12 16:06:05560 cls._current_tasks.add(next_task)
Mohamed Heikalb752b772024-11-25 23:05:44561 except IndexError:
562 return
Mohamed Heikalf746b57f2024-11-13 21:20:17563 num_started += next_task.start(cls._maybe_start_tasks)
Peter Wencd460ff52021-02-23 22:40:05564
565
566# TODO(wnwen): Break this into Request (encapsulating what ninja sends) and Task
567# when a Request starts to be run. This would eliminate ambiguity
568# about when and whether _proc/_thread are initialized.
Peter Wenf409c0c2021-02-09 19:33:02569class Task:
Peter Wencd460ff52021-02-23 22:40:05570 """Class to represent one task and operations on it."""
571
Andrew Grievef12da2c62025-06-24 14:07:51572 def __init__(self, name: str, build: Build, cmd: List[str],
573 stamp_file: Optional[str]):
Peter Wencd460ff52021-02-23 22:40:05574 self.name = name
Andrew Grieve0d6e8a752025-02-05 21:20:50575 self.build = build
Peter Wencd460ff52021-02-23 22:40:05576 self.cmd = cmd
577 self.stamp_file = stamp_file
578 self._terminated = False
Mohamed Heikal9984e432024-12-03 18:21:40579 self._replaced = False
Mohamed Heikalb752b772024-11-25 23:05:44580 self._lock = threading.RLock()
Peter Wencd460ff52021-02-23 22:40:05581 self._proc: Optional[subprocess.Popen] = None
582 self._thread: Optional[threading.Thread] = None
Mohamed Heikalb752b772024-11-25 23:05:44583 self._delete_stamp_thread: Optional[threading.Thread] = None
Peter Wencd460ff52021-02-23 22:40:05584 self._return_code: Optional[int] = None
Peter Wenf409c0c2021-02-09 19:33:02585
Peter Wen6e7e52b2021-02-13 02:39:28586 @property
587 def key(self):
Andrew Grieve0d6e8a752025-02-05 21:20:50588 return (self.build.cwd, self.name)
Peter Wenf409c0c2021-02-09 19:33:02589
Mohamed Heikalabf646e2024-12-12 16:06:05590 def __hash__(self):
Andrew Grieve0d6e8a752025-02-05 21:20:50591 return hash((self.key, self.build.id))
Mohamed Heikalabf646e2024-12-12 16:06:05592
Mohamed Heikalb752b772024-11-25 23:05:44593 def __eq__(self, other):
Andrew Grieve0d6e8a752025-02-05 21:20:50594 return self.key == other.key and self.build is other.build
Mohamed Heikalb752b772024-11-25 23:05:44595
Peter Wencd460ff52021-02-23 22:40:05596 def start(self, on_complete_callback: Callable[[], None]) -> int:
597 """Starts the task if it has not already been terminated.
598
599 Returns the number of processes that have been started. This is called at
600 most once when the task is popped off the task queue."""
Peter Wencd460ff52021-02-23 22:40:05601 with self._lock:
602 if self._terminated:
603 return 0
Mohamed Heikalb752b772024-11-25 23:05:44604
Peter Wencd460ff52021-02-23 22:40:05605 # Use os.nice(19) to ensure the lowest priority (idle) for these analysis
606 # tasks since we want to avoid slowing down the actual build.
607 # TODO(wnwen): Use ionice to reduce resource consumption.
Mohamed Heikal3b8c9552025-02-11 22:33:40608 self.build.add_process(self)
Peter Wen1cdf05d82022-04-05 17:31:23609 # This use of preexec_fn is sufficiently simple, just one os.nice call.
610 # pylint: disable=subprocess-popen-preexec-fn
Peter Wencd460ff52021-02-23 22:40:05611 self._proc = subprocess.Popen(
612 self.cmd,
613 stdout=subprocess.PIPE,
614 stderr=subprocess.STDOUT,
Andrew Grieve0d6e8a752025-02-05 21:20:50615 cwd=self.build.cwd,
616 env=self.build.env,
Peter Wencd460ff52021-02-23 22:40:05617 text=True,
618 preexec_fn=lambda: os.nice(19),
619 )
620 self._thread = threading.Thread(
621 target=self._complete_when_process_finishes,
622 args=(on_complete_callback, ))
623 self._thread.start()
624 return 1
Peter Wenf409c0c2021-02-09 19:33:02625
Mohamed Heikal9984e432024-12-03 18:21:40626 def terminate(self, replaced=False):
Peter Wencd460ff52021-02-23 22:40:05627 """Can be called multiple times to cancel and ignore the task's output."""
Peter Wencd460ff52021-02-23 22:40:05628 with self._lock:
629 if self._terminated:
630 return
631 self._terminated = True
Mohamed Heikal9984e432024-12-03 18:21:40632 self._replaced = replaced
Mohamed Heikalb752b772024-11-25 23:05:44633
Peter Wencd460ff52021-02-23 22:40:05634 # It is safe to access _proc and _thread outside of _lock since they are
635 # only changed by self.start holding _lock when self._terminate is false.
636 # Since we have just set self._terminate to true inside of _lock, we know
637 # that neither _proc nor _thread will be changed from this point onwards.
Peter Wen6e7e52b2021-02-13 02:39:28638 if self._proc:
639 self._proc.terminate()
640 self._proc.wait()
Peter Wencd460ff52021-02-23 22:40:05641 # Ensure that self._complete is called either by the thread or by us.
Peter Wen6e7e52b2021-02-13 02:39:28642 if self._thread:
643 self._thread.join()
Peter Wencd460ff52021-02-23 22:40:05644 else:
645 self._complete()
Peter Wenf409c0c2021-02-09 19:33:02646
Peter Wencd460ff52021-02-23 22:40:05647 def _complete_when_process_finishes(self,
648 on_complete_callback: Callable[[], None]):
Peter Wen6e7e52b2021-02-13 02:39:28649 assert self._proc
650 # We know Popen.communicate will return a str and not a byte since it is
651 # constructed with text=True.
652 stdout: str = self._proc.communicate()[0]
653 self._return_code = self._proc.returncode
Mohamed Heikal3b8c9552025-02-11 22:33:40654 self.build.process_complete()
Peter Wen6e7e52b2021-02-13 02:39:28655 self._complete(stdout)
Peter Wencd460ff52021-02-23 22:40:05656 on_complete_callback()
Peter Wenf409c0c2021-02-09 19:33:02657
Peter Wencd460ff52021-02-23 22:40:05658 def _complete(self, stdout: str = ''):
659 """Update the user and ninja after the task has run or been terminated.
660
661 This method should only be run once per task. Avoid modifying the task so
662 that this method does not need locking."""
663
Mohamed Heikal9984e432024-12-03 18:21:40664 delete_stamp = False
Mohamed Heikalf746b57f2024-11-13 21:20:17665 status_string = 'FINISHED'
Peter Wen6e7e52b2021-02-13 02:39:28666 if self._terminated:
Mohamed Heikalf746b57f2024-11-13 21:20:17667 status_string = 'TERMINATED'
Mohamed Heikal9984e432024-12-03 18:21:40668 # When tasks are replaced, avoid deleting the stamp file, context:
669 # https://issuetracker.google.com/301961827.
670 if not self._replaced:
671 delete_stamp = True
672 elif stdout or self._return_code != 0:
673 status_string = 'FAILED'
674 delete_stamp = True
675 preamble = [
676 f'FAILED: {self.name}',
677 f'Return code: {self._return_code}',
Andrew Grieve38c80462024-12-17 21:33:27678 'CMD: ' + shlex.join(self.cmd),
Mohamed Heikal9984e432024-12-03 18:21:40679 'STDOUT:',
680 ]
681
682 message = '\n'.join(preamble + [stdout])
Mohamed Heikal3b8c9552025-02-11 22:33:40683 self.build.log(message)
684 server_log(message)
Andrew Grieve0d6e8a752025-02-05 21:20:50685
Mohamed Heikalf73b717a2025-02-12 15:53:07686 if OptionsManager.should_remote_print():
687 # Add emoji to show that output is from the build server.
688 preamble = [f'⏩ {line}' for line in preamble]
689 remote_message = '\n'.join(preamble + [stdout])
690 # Add a new line at start of message to clearly delineate from previous
691 # output/text already on the remote tty we are printing to.
692 self.build.stdout.write(f'\n{remote_message}')
693 self.build.stdout.flush()
Andrew Grievef12da2c62025-06-24 14:07:51694 if delete_stamp and self.stamp_file:
Mohamed Heikal9984e432024-12-03 18:21:40695 # Force siso to consider failed targets as dirty.
696 try:
Andrew Grieve0d6e8a752025-02-05 21:20:50697 os.unlink(os.path.join(self.build.cwd, self.stamp_file))
Mohamed Heikal9984e432024-12-03 18:21:40698 except FileNotFoundError:
699 pass
Mohamed Heikal3b8c9552025-02-11 22:33:40700 self.build.task_done(self, status_string)
Peter Wenb1f3b1d2021-02-02 21:30:20701
702
Mohamed Heikal3b8c9552025-02-11 22:33:40703def _handle_add_task(data, current_tasks: Dict[Tuple[str, str], Task]):
Mohamed Heikalb752b772024-11-25 23:05:44704 """Handle messages of type ADD_TASK."""
Mohamed Heikalf746b57f2024-11-13 21:20:17705 build_id = data['build_id']
Andrew Grieve0d6e8a752025-02-05 21:20:50706 build = BuildManager.get_build(build_id)
707 BuildManager.maybe_init_cwd(build, data.get('cwd'))
Mohamed Heikalb752b772024-11-25 23:05:44708
Andrew Grievef12da2c62025-06-24 14:07:51709 cmd = data['cmd']
710 name = data.get('name') or shlex.join(cmd)
711 new_task = Task(name=name,
712 cmd=cmd,
Andrew Grieve0d6e8a752025-02-05 21:20:50713 build=build,
Mohamed Heikal3b8c9552025-02-11 22:33:40714 stamp_file=data['stamp_file'])
Mohamed Heikalb752b772024-11-25 23:05:44715 existing_task = current_tasks.get(new_task.key)
Mohamed Heikalf746b57f2024-11-13 21:20:17716 if existing_task:
Mohamed Heikal9984e432024-12-03 18:21:40717 existing_task.terminate(replaced=True)
Mohamed Heikalb752b772024-11-25 23:05:44718 current_tasks[new_task.key] = new_task
719
Mohamed Heikal3b8c9552025-02-11 22:33:40720 build.add_task(new_task)
Mohamed Heikalf746b57f2024-11-13 21:20:17721
722
723def _handle_query_build(data, connection: socket.socket):
Mohamed Heikalb752b772024-11-25 23:05:44724 """Handle messages of type QUERY_BUILD."""
Mohamed Heikalf746b57f2024-11-13 21:20:17725 build_id = data['build_id']
Andrew Grieved863d0f2024-12-13 20:13:01726 response = TaskStats.query_build(build_id)
Mohamed Heikalf746b57f2024-11-13 21:20:17727 try:
728 with connection:
Mohamed Heikalf11b6f32025-01-30 19:44:29729 server_utils.SendMessage(connection, response)
Mohamed Heikalf746b57f2024-11-13 21:20:17730 except BrokenPipeError:
731 # We should not die because the client died.
732 pass
733
734
735def _handle_heartbeat(connection: socket.socket):
Mohamed Heikalb752b772024-11-25 23:05:44736 """Handle messages of type POLL_HEARTBEAT."""
Mohamed Heikalf746b57f2024-11-13 21:20:17737 try:
738 with connection:
Mohamed Heikalf11b6f32025-01-30 19:44:29739 server_utils.SendMessage(connection, {
740 'status': 'OK',
741 'pid': os.getpid(),
742 })
Mohamed Heikalf746b57f2024-11-13 21:20:17743 except BrokenPipeError:
744 # We should not die because the client died.
745 pass
746
747
Mohamed Heikalb752b772024-11-25 23:05:44748def _handle_register_builder(data):
749 """Handle messages of type REGISTER_BUILDER."""
Andrew Grieve0d6e8a752025-02-05 21:20:50750 env = data['env']
751 pid = int(data['builder_pid'])
752 cwd = data['cwd']
753
754 BuildManager.register_builder(env, pid, cwd)
Mohamed Heikalb752b772024-11-25 23:05:44755
756
757def _handle_cancel_build(data):
758 """Handle messages of type CANCEL_BUILD."""
759 build_id = data['build_id']
760 TaskManager.cancel_build(build_id)
Mohamed Heikal3b8c9552025-02-11 22:33:40761 BuildManager.update_remote_titles('')
Mohamed Heikalb752b772024-11-25 23:05:44762
763
Andrew Grievef12da2c62025-06-24 14:07:51764def _handle_stop_server():
765 """Handle messages of type STOP_SERVER."""
766 server_log('STOPPING SERVER...')
767 TaskManager.deactivate()
768 server_log('STOPPED')
769 sys.exit(0)
770
771
Mohamed Heikalb752b772024-11-25 23:05:44772def _listen_for_request_data(sock: socket.socket):
773 """Helper to encapsulate getting a new message."""
774 while True:
775 conn = sock.accept()[0]
Mohamed Heikalf11b6f32025-01-30 19:44:29776 message = server_utils.ReceiveMessage(conn)
777 if message:
778 yield message, conn
Mohamed Heikalb752b772024-11-25 23:05:44779
780
Mohamed Heikal3b8c9552025-02-11 22:33:40781def _register_cleanup_signal_handlers():
Mohamed Heikalabf646e2024-12-12 16:06:05782 original_sigint_handler = signal.getsignal(signal.SIGINT)
783 original_sigterm_handler = signal.getsignal(signal.SIGTERM)
784
785 def _cleanup(signum, frame):
Mohamed Heikal3b8c9552025-02-11 22:33:40786 server_log('STOPPING SERVER...')
Mohamed Heikalabf646e2024-12-12 16:06:05787 # Gracefully shut down the task manager, terminating all queued tasks.
788 TaskManager.deactivate()
Mohamed Heikal3b8c9552025-02-11 22:33:40789 server_log('STOPPED')
Mohamed Heikalabf646e2024-12-12 16:06:05790 if signum == signal.SIGINT:
791 if callable(original_sigint_handler):
792 original_sigint_handler(signum, frame)
793 else:
794 raise KeyboardInterrupt()
795 if signum == signal.SIGTERM:
796 # Sometimes sigterm handler is not a callable.
797 if callable(original_sigterm_handler):
798 original_sigterm_handler(signum, frame)
799 else:
800 sys.exit(1)
801
802 signal.signal(signal.SIGINT, _cleanup)
803 signal.signal(signal.SIGTERM, _cleanup)
804
805
Mohamed Heikal3b8c9552025-02-11 22:33:40806def _process_requests(sock: socket.socket, exit_on_idle: bool):
Mohamed Heikalb752b772024-11-25 23:05:44807 """Main loop for build server receiving request messages."""
Peter Wen6e7e52b2021-02-13 02:39:28808 # Since dicts in python can contain anything, explicitly type tasks to help
809 # make static type checking more useful.
810 tasks: Dict[Tuple[str, str], Task] = {}
Andrew Grievef12da2c62025-06-24 14:07:51811 server_log(f'Server started. PID={os.getpid()}')
Mohamed Heikal3b8c9552025-02-11 22:33:40812 _register_cleanup_signal_handlers()
Mohamed Heikalf746b57f2024-11-13 21:20:17813 # pylint: disable=too-many-nested-blocks
Mohamed Heikalabf646e2024-12-12 16:06:05814 while True:
815 try:
816 for data, connection in _listen_for_request_data(sock):
817 message_type = data.get('message_type', server_utils.ADD_TASK)
818 if message_type == server_utils.POLL_HEARTBEAT:
819 _handle_heartbeat(connection)
Mohamed Heikalf11b6f32025-01-30 19:44:29820 elif message_type == server_utils.ADD_TASK:
Mohamed Heikalabf646e2024-12-12 16:06:05821 connection.close()
Mohamed Heikal3b8c9552025-02-11 22:33:40822 _handle_add_task(data, tasks)
Mohamed Heikalf11b6f32025-01-30 19:44:29823 elif message_type == server_utils.QUERY_BUILD:
Mohamed Heikalabf646e2024-12-12 16:06:05824 _handle_query_build(data, connection)
Mohamed Heikalf11b6f32025-01-30 19:44:29825 elif message_type == server_utils.REGISTER_BUILDER:
Mohamed Heikalabf646e2024-12-12 16:06:05826 connection.close()
827 _handle_register_builder(data)
Mohamed Heikalf11b6f32025-01-30 19:44:29828 elif message_type == server_utils.CANCEL_BUILD:
Mohamed Heikalabf646e2024-12-12 16:06:05829 connection.close()
830 _handle_cancel_build(data)
Andrew Grievef12da2c62025-06-24 14:07:51831 elif message_type == server_utils.STOP_SERVER:
832 connection.close()
833 _handle_stop_server()
Mohamed Heikalf11b6f32025-01-30 19:44:29834 else:
835 connection.close()
Mohamed Heikalabf646e2024-12-12 16:06:05836 except TimeoutError:
837 # If we have not received a new task in a while and do not have any
838 # pending tasks or running builds, then exit. Otherwise keep waiting.
839 if (TaskStats.num_pending_tasks() == 0
Mohamed Heikal3b8c9552025-02-11 22:33:40840 and not BuildManager.has_active_builds() and exit_on_idle):
Mohamed Heikalb752b772024-11-25 23:05:44841 break
Mohamed Heikalabf646e2024-12-12 16:06:05842 except KeyboardInterrupt:
843 break
Mohamed Heikal3b8c9552025-02-11 22:33:40844 BuildManager.update_remote_titles('')
Mohamed Heikalf746b57f2024-11-13 21:20:17845
846
Mohamed Heikalf11b6f32025-01-30 19:44:29847def query_build_info(build_id=None):
Mohamed Heikalb752b772024-11-25 23:05:44848 """Communicates with the main server to query build info."""
Mohamed Heikalf11b6f32025-01-30 19:44:29849 return _send_message_with_response({
850 'message_type': server_utils.QUERY_BUILD,
851 'build_id': build_id,
852 })
Mohamed Heikalf746b57f2024-11-13 21:20:17853
854
855def _wait_for_build(build_id):
Mohamed Heikalb752b772024-11-25 23:05:44856 """Comunicates with the main server waiting for a build to complete."""
Mohamed Heikalf746b57f2024-11-13 21:20:17857 start_time = datetime.datetime.now()
858 while True:
Andrew Grieved863d0f2024-12-13 20:13:01859 try:
860 build_info = query_build_info(build_id)['builds'][0]
861 except ConnectionRefusedError:
862 print('No server running. It likely finished all tasks.')
863 print('You can check $OUTDIR/buildserver.log.0 to be sure.')
864 return 0
865
Mohamed Heikalf746b57f2024-11-13 21:20:17866 pending_tasks = build_info['pending_tasks']
Mohamed Heikalf746b57f2024-11-13 21:20:17867
868 if pending_tasks == 0:
869 print(f'\nAll tasks completed for build_id: {build_id}.')
870 return 0
871
872 current_time = datetime.datetime.now()
873 duration = current_time - start_time
874 print(f'\rWaiting for {pending_tasks} tasks [{str(duration)}]\033[K',
875 end='',
876 flush=True)
877 time.sleep(1)
878
879
Mohamed Heikalf11b6f32025-01-30 19:44:29880def _wait_for_idle():
881 """Communicates with the main server waiting for all builds to complete."""
882 start_time = datetime.datetime.now()
883 while True:
884 try:
885 builds = query_build_info()['builds']
886 except ConnectionRefusedError:
887 print('No server running. It likely finished all tasks.')
888 print('You can check $OUTDIR/buildserver.log.0 to be sure.')
889 return 0
890
891 all_pending_tasks = 0
892 all_completed_tasks = 0
893 for build_info in builds:
894 pending_tasks = build_info['pending_tasks']
895 completed_tasks = build_info['completed_tasks']
896 active = build_info['is_active']
897 # Ignore completed builds.
898 if active or pending_tasks:
899 all_pending_tasks += pending_tasks
900 all_completed_tasks += completed_tasks
901 total_tasks = all_pending_tasks + all_completed_tasks
902
903 if all_pending_tasks == 0:
904 print('\nServer Idle, All tasks complete.')
905 return 0
906
907 current_time = datetime.datetime.now()
908 duration = current_time - start_time
909 print(
910 f'\rWaiting for {all_pending_tasks} remaining tasks. '
911 f'({all_completed_tasks}/{total_tasks} tasks complete) '
912 f'[{str(duration)}]\033[K',
913 end='',
914 flush=True)
915 time.sleep(0.5)
916
917
Mohamed Heikalb752b772024-11-25 23:05:44918def _send_message_and_close(message_dict):
919 with contextlib.closing(socket.socket(socket.AF_UNIX)) as sock:
920 sock.connect(server_utils.SOCKET_ADDRESS)
Mohamed Heikalf11b6f32025-01-30 19:44:29921 sock.settimeout(1)
922 server_utils.SendMessage(sock, message_dict)
923
924
925def _send_message_with_response(message_dict):
926 with contextlib.closing(socket.socket(socket.AF_UNIX)) as sock:
927 sock.connect(server_utils.SOCKET_ADDRESS)
928 sock.settimeout(1)
929 server_utils.SendMessage(sock, message_dict)
930 return server_utils.ReceiveMessage(sock)
Mohamed Heikalb752b772024-11-25 23:05:44931
932
933def _send_cancel_build(build_id):
934 _send_message_and_close({
935 'message_type': server_utils.CANCEL_BUILD,
936 'build_id': build_id,
937 })
938 return 0
939
940
Andrew Grievef12da2c62025-06-24 14:07:51941def _send_stop_server():
942 try:
943 _send_message_and_close({
944 'message_type': server_utils.STOP_SERVER,
945 })
946 except socket.error as e:
947 if e.errno == 111:
948 sys.stderr.write('No running build server found.\n')
949 return 1
950 raise
951 return 0
952
953
954def _register_build(builder_pid, output_directory):
Mohamed Heikal3b8c9552025-02-11 22:33:40955 if output_directory is not None:
956 output_directory = str(pathlib.Path(output_directory).absolute())
Mohamed Heikalb752b772024-11-25 23:05:44957 for _attempt in range(3):
958 try:
Andrew Grieve0d6e8a752025-02-05 21:20:50959 # Ensure environment variables that the server expects to be there are
960 # present.
961 server_utils.AssertEnvironmentVariables()
962
Mohamed Heikalb752b772024-11-25 23:05:44963 _send_message_and_close({
964 'message_type': server_utils.REGISTER_BUILDER,
Andrew Grieve0d6e8a752025-02-05 21:20:50965 'env': dict(os.environ),
Mohamed Heikalb752b772024-11-25 23:05:44966 'builder_pid': builder_pid,
Andrew Grieve0d6e8a752025-02-05 21:20:50967 'cwd': output_directory,
Mohamed Heikalb752b772024-11-25 23:05:44968 })
969 return 0
Mohamed Heikalf11b6f32025-01-30 19:44:29970 except OSError:
Mohamed Heikalb752b772024-11-25 23:05:44971 time.sleep(0.05)
Andrew Grievef12da2c62025-06-24 14:07:51972 print('Failed to register build. No server running?')
Mohamed Heikalb752b772024-11-25 23:05:44973 return 1
974
975
Mohamed Heikalf11b6f32025-01-30 19:44:29976def poll_server(retries=3):
977 """Communicates with the main server to query build info."""
978 for _attempt in range(retries):
979 try:
980 response = _send_message_with_response(
981 {'message_type': server_utils.POLL_HEARTBEAT})
982 if response:
983 break
984 except OSError:
985 time.sleep(0.05)
986 else:
987 return None
988 return response['pid']
989
990
Andrew Grieved863d0f2024-12-13 20:13:01991def _print_build_status_all():
992 try:
993 query_data = query_build_info(None)
994 except ConnectionRefusedError:
995 print('No server running. Consult $OUTDIR/buildserver.log.0')
996 return 0
997 builds = query_data['builds']
998 pid = query_data['pid']
999 all_active_tasks = []
1000 print(f'Build server (PID={pid}) has {len(builds)} registered builds')
1001 for build_info in builds:
1002 build_id = build_info['build_id']
1003 pending_tasks = build_info['pending_tasks']
1004 completed_tasks = build_info['completed_tasks']
1005 active_tasks = build_info['active_tasks']
1006 out_dir = build_info['outdir']
1007 active = build_info['is_active']
1008 total_tasks = pending_tasks + completed_tasks
1009 all_active_tasks += active_tasks
1010 if total_tasks == 0 and not active:
1011 status = 'Finished without any jobs'
1012 else:
1013 if active:
Andrew Grievef12da2c62025-06-24 14:07:511014 status = 'Main build is still running'
Andrew Grieved863d0f2024-12-13 20:13:011015 else:
Andrew Grievef12da2c62025-06-24 14:07:511016 status = 'Main build completed'
Andrew Grieved863d0f2024-12-13 20:13:011017 if out_dir:
1018 status += f' in {out_dir}'
Andrew Grievef12da2c62025-06-24 14:07:511019 status += f'. Tasks completed: {completed_tasks}/{total_tasks}'
Andrew Grieved863d0f2024-12-13 20:13:011020 if completed_tasks < total_tasks:
Andrew Grieve2f123a02025-03-20 18:13:061021 status += f' {len(active_tasks)} task(s) currently executing'
Andrew Grieved863d0f2024-12-13 20:13:011022 print(f'{build_id}: {status}')
1023 if all_active_tasks:
1024 total = len(all_active_tasks)
1025 to_show = min(4, total)
1026 print(f'Currently executing (showing {to_show} of {total}):')
1027 for cmd in sorted(all_active_tasks)[:to_show]:
1028 truncated = shlex.join(cmd)
1029 if len(truncated) > 200:
1030 truncated = truncated[:200] + '...'
1031 print(truncated)
1032 return 0
1033
1034
Mohamed Heikal6b56cf62024-12-10 23:14:551035def _print_build_status(build_id):
Mohamed Heikal3b8c9552025-02-11 22:33:401036 server_path = os.path.relpath(str(server_utils.SERVER_SCRIPT))
Andrew Grieved863d0f2024-12-13 20:13:011037 try:
Mohamed Heikal3b8c9552025-02-11 22:33:401038 builds = query_build_info(build_id)['builds']
1039 if not builds:
Mohamed Heikald68096552025-03-18 19:54:511040 print(f'⚠️ No build found with id ({build_id})')
1041 print('⚠️ To see the status of all builds:',
Mohamed Heikal3b8c9552025-02-11 22:33:401042 shlex.join([server_path, '--print-status-all']))
1043 return 1
1044 build_info = builds[0]
Andrew Grieved863d0f2024-12-13 20:13:011045 except ConnectionRefusedError:
Mohamed Heikald68096552025-03-18 19:54:511046 print('⚠️ No server running. Consult $OUTDIR/buildserver.log.0')
Andrew Grieved863d0f2024-12-13 20:13:011047 return 0
Mohamed Heikal6b56cf62024-12-10 23:14:551048 pending_tasks = build_info['pending_tasks']
Mohamed Heikal6b56cf62024-12-10 23:14:551049
Andrew Grieve2f123a02025-03-20 18:13:061050 # Print nothing unless there are still pending tasks
1051 if pending_tasks:
1052 is_str = 'is' if pending_tasks == 1 else 'are'
1053 job_str = 'job' if pending_tasks == 1 else 'jobs'
1054 print(f'⏩ There {is_str} still {pending_tasks} static analysis {job_str}'
1055 ' running in the background.')
1056 print('⏩ To wait for them:', shlex.join([server_path, '--wait-for-idle']))
Mohamed Heikal6b56cf62024-12-10 23:14:551057 return 0
1058
1059
Andrew Grievef12da2c62025-06-24 14:07:511060def _start_server(exit_on_idle):
1061 sys.excepthook = _exception_hook
Mohamed Heikalf746b57f2024-11-13 21:20:171062 with socket.socket(socket.AF_UNIX) as sock:
1063 sock.settimeout(_SOCKET_TIMEOUT)
1064 try:
1065 sock.bind(server_utils.SOCKET_ADDRESS)
Mohamed Heikalf11b6f32025-01-30 19:44:291066 except OSError as e:
Mohamed Heikalf746b57f2024-11-13 21:20:171067 # errno 98 is Address already in use
1068 if e.errno == 98:
Mohamed Heikal3b8c9552025-02-11 22:33:401069 if not OptionsManager.is_quiet():
Mohamed Heikalf11b6f32025-01-30 19:44:291070 pid = poll_server()
1071 print(f'Another instance is already running (pid: {pid}).',
1072 file=sys.stderr)
Mohamed Heikalf746b57f2024-11-13 21:20:171073 return 1
1074 raise
1075 sock.listen()
Mohamed Heikal3b8c9552025-02-11 22:33:401076 _process_requests(sock, exit_on_idle)
Mohamed Heikalf746b57f2024-11-13 21:20:171077 return 0
Peter Wenb1f3b1d2021-02-02 21:30:201078
1079
Andrew Grievef12da2c62025-06-24 14:07:511080def _add_task(cmd):
1081 build_id = f'default-{time.time()}'
1082 os.environ['AUTONINJA_BUILD_ID'] = build_id
1083 tty = os.readlink('/proc/self/fd/1')
1084 if os.path.exists(tty):
1085 os.environ['AUTONINJA_STDOUT_NAME'] = tty
1086 else:
1087 os.environ['AUTONINJA_STDOUT_NAME'] = '/dev/null'
1088
1089 if code := _register_build(os.getpid(), os.getcwd()):
1090 return code
1091
1092 try:
1093 _send_message_and_close({
1094 'name': None,
1095 'message_type': server_utils.ADD_TASK,
1096 'cmd': cmd,
1097 'cwd': os.getcwd(),
1098 'build_id': build_id,
1099 'stamp_file': None,
1100 })
1101 return 0
1102 except socket.error as e:
1103 if e.errno == 111:
1104 sys.stderr.write('No running build server found.\n')
1105 return 1
1106 raise
1107
1108
1109def _main_old():
Peter Wenf409c0c2021-02-09 19:33:021110 parser = argparse.ArgumentParser(description=__doc__)
Peter Wend70f4862022-02-02 16:00:161111 parser.add_argument(
Mohamed Heikalf746b57f2024-11-13 21:20:171112 '--exit-on-idle',
1113 action='store_true',
1114 help='Server started on demand. Exit when all tasks run out.')
1115 parser.add_argument('--quiet',
1116 action='store_true',
1117 help='Do not output status updates.')
Mohamed Heikalf73b717a2025-02-12 15:53:071118 parser.add_argument('--no-remote-print',
1119 action='store_true',
1120 help='Do not output errors to remote terminals.')
Mohamed Heikalf746b57f2024-11-13 21:20:171121 parser.add_argument('--wait-for-build',
1122 metavar='BUILD_ID',
1123 help='Wait for build server to finish with all tasks '
1124 'for BUILD_ID and output any pending messages.')
Mohamed Heikalf11b6f32025-01-30 19:44:291125 parser.add_argument('--wait-for-idle',
1126 action='store_true',
1127 help='Wait for build server to finish with all '
1128 'pending tasks.')
Mohamed Heikal6b56cf62024-12-10 23:14:551129 parser.add_argument('--print-status',
1130 metavar='BUILD_ID',
1131 help='Print the current state of a build.')
Andrew Grieved863d0f2024-12-13 20:13:011132 parser.add_argument('--print-status-all',
1133 action='store_true',
1134 help='Print the current state of all active builds.')
Mohamed Heikalb752b772024-11-25 23:05:441135 parser.add_argument(
1136 '--register-build-id',
1137 metavar='BUILD_ID',
1138 help='Inform the build server that a new build has started.')
Andrew Grieve0d6e8a752025-02-05 21:20:501139 parser.add_argument('--output-directory',
1140 help='Build directory (use with --register-build-id)')
Mohamed Heikalb752b772024-11-25 23:05:441141 parser.add_argument('--builder-pid',
1142 help='Builder process\'s pid for build BUILD_ID.')
1143 parser.add_argument('--cancel-build',
1144 metavar='BUILD_ID',
1145 help='Cancel all pending and running tasks for BUILD_ID.')
Peter Wend70f4862022-02-02 16:00:161146 args = parser.parse_args()
Mohamed Heikal3b8c9552025-02-11 22:33:401147
Andrew Grievef12da2c62025-06-24 14:07:511148 OptionsManager.set_options(quiet=args.quiet,
1149 should_remote_print=not args.no_remote_print)
1150
Mohamed Heikalf746b57f2024-11-13 21:20:171151 if args.wait_for_build:
1152 return _wait_for_build(args.wait_for_build)
Mohamed Heikalf11b6f32025-01-30 19:44:291153 if args.wait_for_idle:
1154 return _wait_for_idle()
Mohamed Heikal6b56cf62024-12-10 23:14:551155 if args.print_status:
1156 return _print_build_status(args.print_status)
Andrew Grieved863d0f2024-12-13 20:13:011157 if args.print_status_all:
1158 return _print_build_status_all()
Mohamed Heikalb752b772024-11-25 23:05:441159 if args.register_build_id:
Andrew Grievef12da2c62025-06-24 14:07:511160 return _register_build(args.builder_pid, args.output_directory)
Mohamed Heikalb752b772024-11-25 23:05:441161 if args.cancel_build:
1162 return _send_cancel_build(args.cancel_build)
Andrew Grievef12da2c62025-06-24 14:07:511163 return _start_server(args.exit_on_idle)
1164
1165
1166def _main_new():
1167 parser = argparse.ArgumentParser(description=__doc__)
1168 sub_parsers = parser.add_subparsers(dest='command')
1169
1170 sub_parser = sub_parsers.add_parser('start', help='Start the server')
1171 sub_parser.add_argument('--quiet',
1172 action='store_true',
1173 help='Do not output status updates.')
1174 sub_parser.add_argument('--no-remote-print',
1175 action='store_true',
1176 help='Do not output errors to remote terminals.')
1177 sub_parser.add_argument(
1178 '--exit-on-idle',
1179 action='store_true',
1180 help='Server started on demand. Exit when all tasks run out.')
1181
1182 sub_parser = sub_parsers.add_parser('stop',
1183 help='Stops the server if it is running')
1184
1185 sub_parser = sub_parsers.add_parser(
1186 'register-build', help='Tell a running server about a new ninja session')
1187 sub_parser.add_argument('--output-directory',
1188 required=True,
1189 help='CWD for the build')
1190 sub_parser.add_argument('--builder-pid',
1191 required=True,
1192 help='Builder process\'s PID.')
1193
1194 sub_parser = sub_parsers.add_parser(
1195 'unregister-build',
1196 help='Tell a running server a ninja session has finished')
1197 sub_parser.add_argument('--build-id',
1198 required=True,
1199 help='The AUTONINJA_BUILD_ID')
1200 sub_parser.add_argument('--verbose',
1201 action='store_true',
1202 help='Print status if jobs exist.')
1203 sub_parser.add_argument('--cancel-jobs',
1204 action='store_true',
1205 help='Cancel pending jobs')
1206
1207 sub_parser = sub_parsers.add_parser('status', help='Print status and exit')
1208 sub_parser.add_argument('--build-id',
1209 help='The AUTONINJA_BUILD_ID of the session to query '
1210 '(otherwise prints all sessions).')
1211
1212 sub_parser = sub_parsers.add_parser('wait', help='Wait for jobs to complete')
1213 sub_parser.add_argument(
1214 '--build-id',
1215 help='The AUTONINJA_BUILD_ID of the session to wait for '
1216 '(otherwise waits for all sessions).')
1217
1218 sub_parser = sub_parsers.add_parser('run', help='Adds a task.')
1219 sub_parser.add_argument('cmd', nargs='+', help='The command to run')
1220
1221 args = parser.parse_args()
1222
1223 ret = 0
1224 if args.command == 'start':
1225 OptionsManager.set_options(quiet=args.quiet,
1226 should_remote_print=not args.no_remote_print)
1227 ret = _start_server(args.exit_on_idle)
1228 elif args.command == 'stop':
1229 ret = _send_stop_server()
1230 elif args.command == 'register-build':
1231 ret = _register_build(args.builder_pid, args.output_directory)
1232 elif args.command == 'unregister-build':
1233 if args.verbose:
1234 ret = _print_build_status(args.build_id)
1235 if args.cancel_jobs:
1236 ret = _send_cancel_build(args.cancel_build)
1237 elif args.command == 'status':
1238 if args.build_id:
1239 ret = _print_build_status(args.build_id)
1240 else:
1241 ret = _print_build_status_all()
1242 elif args.command == 'wait':
1243 if args.build_id:
1244 ret = _wait_for_build(args.build_id)
1245 else:
1246 ret = _wait_for_idle()
1247 elif args.command == 'run':
1248 ret = _add_task(args.cmd)
1249 else:
1250 parser.print_help()
1251 return 1
1252 return ret
1253
1254
1255def main():
1256 if len(sys.argv) <= 1 or not sys.argv[1].startswith('-'):
1257 return _main_new()
1258 return _main_old()
Peter Wenb1f3b1d2021-02-02 21:30:201259
1260
1261if __name__ == '__main__':
Peter Wenb1f3b1d2021-02-02 21:30:201262 sys.exit(main())