openpilot v0.9.6 release
date: 2024-01-12T10:13:37 master commit: ba792d576a49a0899b88a753fa1c52956bedf9e6
This commit is contained in:
0
selfdrive/manager/__init__.py
Normal file
0
selfdrive/manager/__init__.py
Normal file
91
selfdrive/manager/build.py
Executable file
91
selfdrive/manager/build.py
Executable file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
# NOTE: Do NOT import anything here that needs be built (e.g. params)
|
||||
from openpilot.common.basedir import BASEDIR
|
||||
from openpilot.common.spinner import Spinner
|
||||
from openpilot.common.text_window import TextWindow
|
||||
from openpilot.system.hardware import AGNOS
|
||||
from openpilot.common.swaglog import cloudlog, add_file_handler
|
||||
from openpilot.system.version import is_dirty
|
||||
|
||||
MAX_CACHE_SIZE = 4e9 if "CI" in os.environ else 2e9
|
||||
CACHE_DIR = Path("/data/scons_cache" if AGNOS else "/tmp/scons_cache")
|
||||
|
||||
TOTAL_SCONS_NODES = 2560
|
||||
MAX_BUILD_PROGRESS = 100
|
||||
PREBUILT = os.path.exists(os.path.join(BASEDIR, 'prebuilt'))
|
||||
|
||||
def build(spinner: Spinner, dirty: bool = False, minimal: bool = False) -> None:
|
||||
env = os.environ.copy()
|
||||
env['SCONS_PROGRESS'] = "1"
|
||||
nproc = os.cpu_count()
|
||||
if nproc is None:
|
||||
nproc = 2
|
||||
|
||||
extra_args = ["--minimal"] if minimal else []
|
||||
|
||||
# building with all cores can result in using too
|
||||
# much memory, so retry with less parallelism
|
||||
compile_output: List[bytes] = []
|
||||
for n in (nproc, nproc/2, 1):
|
||||
compile_output.clear()
|
||||
scons: subprocess.Popen = subprocess.Popen(["scons", f"-j{int(n)}", "--cache-populate", *extra_args], cwd=BASEDIR, env=env, stderr=subprocess.PIPE)
|
||||
assert scons.stderr is not None
|
||||
|
||||
# Read progress from stderr and update spinner
|
||||
while scons.poll() is None:
|
||||
try:
|
||||
line = scons.stderr.readline()
|
||||
if line is None:
|
||||
continue
|
||||
line = line.rstrip()
|
||||
|
||||
prefix = b'progress: '
|
||||
if line.startswith(prefix):
|
||||
i = int(line[len(prefix):])
|
||||
spinner.update_progress(MAX_BUILD_PROGRESS * min(1., i / TOTAL_SCONS_NODES), 100.)
|
||||
elif len(line):
|
||||
compile_output.append(line)
|
||||
print(line.decode('utf8', 'replace'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if scons.returncode == 0:
|
||||
break
|
||||
|
||||
if scons.returncode != 0:
|
||||
# Read remaining output
|
||||
if scons.stderr is not None:
|
||||
compile_output += scons.stderr.read().split(b'\n')
|
||||
|
||||
# Build failed log errors
|
||||
error_s = b"\n".join(compile_output).decode('utf8', 'replace')
|
||||
add_file_handler(cloudlog)
|
||||
cloudlog.error("scons build failed\n" + error_s)
|
||||
|
||||
# Show TextWindow
|
||||
spinner.close()
|
||||
if not os.getenv("CI"):
|
||||
with TextWindow("openpilot failed to build\n \n" + error_s) as t:
|
||||
t.wait_for_exit()
|
||||
exit(1)
|
||||
|
||||
# enforce max cache size
|
||||
cache_files = [f for f in CACHE_DIR.rglob('*') if f.is_file()]
|
||||
cache_files.sort(key=lambda f: f.stat().st_mtime)
|
||||
cache_size = sum(f.stat().st_size for f in cache_files)
|
||||
for f in cache_files:
|
||||
if cache_size < MAX_CACHE_SIZE:
|
||||
break
|
||||
cache_size -= f.stat().st_size
|
||||
f.unlink()
|
||||
|
||||
|
||||
if __name__ == "__main__" and not PREBUILT:
|
||||
spinner = Spinner()
|
||||
spinner.update_progress(0, 100)
|
||||
build(spinner, is_dirty(), minimal = AGNOS)
|
||||
43
selfdrive/manager/helpers.py
Normal file
43
selfdrive/manager/helpers.py
Normal file
@@ -0,0 +1,43 @@
|
||||
import os
|
||||
import sys
|
||||
import fcntl
|
||||
import errno
|
||||
import signal
|
||||
|
||||
|
||||
def unblock_stdout() -> None:
|
||||
# get a non-blocking stdout
|
||||
child_pid, child_pty = os.forkpty()
|
||||
if child_pid != 0: # parent
|
||||
|
||||
# child is in its own process group, manually pass kill signals
|
||||
signal.signal(signal.SIGINT, lambda signum, frame: os.kill(child_pid, signal.SIGINT))
|
||||
signal.signal(signal.SIGTERM, lambda signum, frame: os.kill(child_pid, signal.SIGTERM))
|
||||
|
||||
fcntl.fcntl(sys.stdout, fcntl.F_SETFL, fcntl.fcntl(sys.stdout, fcntl.F_GETFL) | os.O_NONBLOCK)
|
||||
|
||||
while True:
|
||||
try:
|
||||
dat = os.read(child_pty, 4096)
|
||||
except OSError as e:
|
||||
if e.errno == errno.EIO:
|
||||
break
|
||||
continue
|
||||
|
||||
if not dat:
|
||||
break
|
||||
|
||||
try:
|
||||
sys.stdout.write(dat.decode('utf8'))
|
||||
except (OSError, UnicodeDecodeError):
|
||||
pass
|
||||
|
||||
# os.wait() returns a tuple with the pid and a 16 bit value
|
||||
# whose low byte is the signal number and whose high byte is the exit status
|
||||
exit_status = os.wait()[1] >> 8
|
||||
os._exit(exit_status)
|
||||
|
||||
|
||||
def write_onroad_params(started, params):
|
||||
params.put_bool("IsOnroad", started)
|
||||
params.put_bool("IsOffroad", not started)
|
||||
252
selfdrive/manager/manager.py
Executable file
252
selfdrive/manager/manager.py
Executable file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python3
|
||||
import datetime
|
||||
import os
|
||||
import signal
|
||||
import subprocess
|
||||
import sys
|
||||
import traceback
|
||||
from typing import List, Tuple, Union
|
||||
|
||||
from cereal import log
|
||||
import cereal.messaging as messaging
|
||||
import openpilot.selfdrive.sentry as sentry
|
||||
from openpilot.common.basedir import BASEDIR
|
||||
from openpilot.common.params import Params, ParamKeyType
|
||||
from openpilot.common.text_window import TextWindow
|
||||
from openpilot.selfdrive.boardd.set_time import set_time
|
||||
from openpilot.system.hardware import HARDWARE, PC
|
||||
from openpilot.selfdrive.manager.helpers import unblock_stdout, write_onroad_params
|
||||
from openpilot.selfdrive.manager.process import ensure_running
|
||||
from openpilot.selfdrive.manager.process_config import managed_processes
|
||||
from openpilot.selfdrive.athena.registration import register, UNREGISTERED_DONGLE_ID
|
||||
from openpilot.common.swaglog import cloudlog, add_file_handler
|
||||
from openpilot.system.version import is_dirty, get_commit, get_version, get_origin, get_short_branch, \
|
||||
get_normalized_origin, terms_version, training_version, \
|
||||
is_tested_branch, is_release_branch
|
||||
|
||||
|
||||
|
||||
def manager_init() -> None:
|
||||
# update system time from panda
|
||||
set_time(cloudlog)
|
||||
|
||||
# save boot log
|
||||
subprocess.call("./bootlog", cwd=os.path.join(BASEDIR, "system/loggerd"))
|
||||
|
||||
params = Params()
|
||||
params.clear_all(ParamKeyType.CLEAR_ON_MANAGER_START)
|
||||
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
|
||||
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
|
||||
if is_release_branch():
|
||||
params.clear_all(ParamKeyType.DEVELOPMENT_ONLY)
|
||||
|
||||
default_params: List[Tuple[str, Union[str, bytes]]] = [
|
||||
("CompletedTrainingVersion", "0"),
|
||||
("DisengageOnAccelerator", "0"),
|
||||
("GsmMetered", "1"),
|
||||
("HasAcceptedTerms", "0"),
|
||||
("LanguageSetting", "main_en"),
|
||||
("OpenpilotEnabledToggle", "1"),
|
||||
("LongitudinalPersonality", str(log.LongitudinalPersonality.standard)),
|
||||
]
|
||||
if not PC:
|
||||
default_params.append(("LastUpdateTime", datetime.datetime.utcnow().isoformat().encode('utf8')))
|
||||
|
||||
if params.get_bool("RecordFrontLock"):
|
||||
params.put_bool("RecordFront", True)
|
||||
|
||||
# set unset params
|
||||
for k, v in default_params:
|
||||
if params.get(k) is None:
|
||||
params.put(k, v)
|
||||
|
||||
# is this dashcam?
|
||||
if os.getenv("PASSIVE") is not None:
|
||||
params.put_bool("Passive", bool(int(os.getenv("PASSIVE", "0"))))
|
||||
|
||||
if params.get("Passive") is None:
|
||||
raise Exception("Passive must be set to continue")
|
||||
|
||||
# Create folders needed for msgq
|
||||
try:
|
||||
os.mkdir("/dev/shm")
|
||||
except FileExistsError:
|
||||
pass
|
||||
except PermissionError:
|
||||
print("WARNING: failed to make /dev/shm")
|
||||
|
||||
# set version params
|
||||
params.put("Version", get_version())
|
||||
params.put("TermsVersion", terms_version)
|
||||
params.put("TrainingVersion", training_version)
|
||||
params.put("GitCommit", get_commit(default=""))
|
||||
params.put("GitBranch", get_short_branch(default=""))
|
||||
params.put("GitRemote", get_origin(default=""))
|
||||
params.put_bool("IsTestedBranch", is_tested_branch())
|
||||
params.put_bool("IsReleaseBranch", is_release_branch())
|
||||
|
||||
# set dongle id
|
||||
reg_res = register(show_spinner=True)
|
||||
if reg_res:
|
||||
dongle_id = reg_res
|
||||
else:
|
||||
serial = params.get("HardwareSerial")
|
||||
raise Exception(f"Registration failed for device {serial}")
|
||||
os.environ['DONGLE_ID'] = dongle_id # Needed for swaglog
|
||||
|
||||
if not is_dirty():
|
||||
os.environ['CLEAN'] = '1'
|
||||
|
||||
# init logging
|
||||
sentry.init(sentry.SentryProject.SELFDRIVE)
|
||||
cloudlog.bind_global(dongle_id=dongle_id,
|
||||
version=get_version(),
|
||||
origin=get_normalized_origin(),
|
||||
branch=get_short_branch(),
|
||||
commit=get_commit(),
|
||||
dirty=is_dirty(),
|
||||
device=HARDWARE.get_device_type())
|
||||
|
||||
|
||||
def manager_prepare() -> None:
|
||||
for p in managed_processes.values():
|
||||
p.prepare()
|
||||
|
||||
|
||||
def manager_cleanup() -> None:
|
||||
# send signals to kill all procs
|
||||
for p in managed_processes.values():
|
||||
p.stop(block=False)
|
||||
|
||||
# ensure all are killed
|
||||
for p in managed_processes.values():
|
||||
p.stop(block=True)
|
||||
|
||||
cloudlog.info("everything is dead")
|
||||
|
||||
|
||||
def manager_thread() -> None:
|
||||
cloudlog.bind(daemon="manager")
|
||||
cloudlog.info("manager start")
|
||||
cloudlog.info({"environ": os.environ})
|
||||
|
||||
params = Params()
|
||||
|
||||
ignore: List[str] = []
|
||||
if params.get("DongleId", encoding='utf8') in (None, UNREGISTERED_DONGLE_ID):
|
||||
ignore += ["manage_athenad", "uploader"]
|
||||
if os.getenv("NOBOARD") is not None:
|
||||
ignore.append("pandad")
|
||||
ignore += [x for x in os.getenv("BLOCK", "").split(",") if len(x) > 0]
|
||||
|
||||
sm = messaging.SubMaster(['deviceState', 'carParams'], poll=['deviceState'])
|
||||
pm = messaging.PubMaster(['managerState'])
|
||||
|
||||
write_onroad_params(False, params)
|
||||
ensure_running(managed_processes.values(), False, params=params, CP=sm['carParams'], not_run=ignore)
|
||||
|
||||
started_prev = False
|
||||
|
||||
while True:
|
||||
sm.update()
|
||||
|
||||
started = sm['deviceState'].started
|
||||
|
||||
if started and not started_prev:
|
||||
params.clear_all(ParamKeyType.CLEAR_ON_ONROAD_TRANSITION)
|
||||
elif not started and started_prev:
|
||||
params.clear_all(ParamKeyType.CLEAR_ON_OFFROAD_TRANSITION)
|
||||
|
||||
# update onroad params, which drives boardd's safety setter thread
|
||||
if started != started_prev:
|
||||
write_onroad_params(started, params)
|
||||
|
||||
started_prev = started
|
||||
|
||||
ensure_running(managed_processes.values(), started, params=params, CP=sm['carParams'], not_run=ignore)
|
||||
|
||||
running = ' '.join("%s%s\u001b[0m" % ("\u001b[32m" if p.proc.is_alive() else "\u001b[31m", p.name)
|
||||
for p in managed_processes.values() if p.proc)
|
||||
print(running)
|
||||
cloudlog.debug(running)
|
||||
|
||||
# send managerState
|
||||
msg = messaging.new_message('managerState', valid=True)
|
||||
msg.managerState.processes = [p.get_process_state_msg() for p in managed_processes.values()]
|
||||
pm.send('managerState', msg)
|
||||
|
||||
# Exit main loop when uninstall/shutdown/reboot is needed
|
||||
shutdown = False
|
||||
for param in ("DoUninstall", "DoShutdown", "DoReboot"):
|
||||
if params.get_bool(param):
|
||||
shutdown = True
|
||||
params.put("LastManagerExitReason", f"{param} {datetime.datetime.now()}")
|
||||
cloudlog.warning(f"Shutting down manager - {param} set")
|
||||
|
||||
if shutdown:
|
||||
break
|
||||
|
||||
|
||||
def main() -> None:
|
||||
prepare_only = os.getenv("PREPAREONLY") is not None
|
||||
|
||||
manager_init()
|
||||
|
||||
# Start UI early so prepare can happen in the background
|
||||
if not prepare_only:
|
||||
managed_processes['ui'].start()
|
||||
|
||||
manager_prepare()
|
||||
|
||||
if prepare_only:
|
||||
return
|
||||
|
||||
# SystemExit on sigterm
|
||||
signal.signal(signal.SIGTERM, lambda signum, frame: sys.exit(1))
|
||||
|
||||
try:
|
||||
manager_thread()
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
sentry.capture_exception()
|
||||
finally:
|
||||
manager_cleanup()
|
||||
|
||||
params = Params()
|
||||
if params.get_bool("DoUninstall"):
|
||||
cloudlog.warning("uninstalling")
|
||||
HARDWARE.uninstall()
|
||||
elif params.get_bool("DoReboot"):
|
||||
cloudlog.warning("reboot")
|
||||
HARDWARE.reboot()
|
||||
elif params.get_bool("DoShutdown"):
|
||||
cloudlog.warning("shutdown")
|
||||
HARDWARE.shutdown()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unblock_stdout()
|
||||
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("got CTRL-C, exiting")
|
||||
except Exception:
|
||||
add_file_handler(cloudlog)
|
||||
cloudlog.exception("Manager failed to start")
|
||||
|
||||
try:
|
||||
managed_processes['ui'].stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Show last 3 lines of traceback
|
||||
error = traceback.format_exc(-3)
|
||||
error = "Manager failed to start\n\n" + error
|
||||
with TextWindow(error) as t:
|
||||
t.wait_for_exit()
|
||||
|
||||
raise
|
||||
|
||||
# manual exit because we are forked
|
||||
sys.exit(0)
|
||||
291
selfdrive/manager/process.py
Normal file
291
selfdrive/manager/process.py
Normal file
@@ -0,0 +1,291 @@
|
||||
import importlib
|
||||
import os
|
||||
import signal
|
||||
import struct
|
||||
import time
|
||||
import subprocess
|
||||
from typing import Optional, Callable, List, ValuesView
|
||||
from abc import ABC, abstractmethod
|
||||
from multiprocessing import Process
|
||||
|
||||
from setproctitle import setproctitle
|
||||
|
||||
from cereal import car, log
|
||||
import cereal.messaging as messaging
|
||||
import openpilot.selfdrive.sentry as sentry
|
||||
from openpilot.common.basedir import BASEDIR
|
||||
from openpilot.common.params import Params
|
||||
from openpilot.common.swaglog import cloudlog
|
||||
|
||||
WATCHDOG_FN = "/dev/shm/wd_"
|
||||
ENABLE_WATCHDOG = os.getenv("NO_WATCHDOG") is None
|
||||
|
||||
|
||||
def launcher(proc: str, name: str) -> None:
|
||||
try:
|
||||
# import the process
|
||||
mod = importlib.import_module(proc)
|
||||
|
||||
# rename the process
|
||||
setproctitle(proc)
|
||||
|
||||
# create new context since we forked
|
||||
messaging.context = messaging.Context()
|
||||
|
||||
# add daemon name tag to logs
|
||||
cloudlog.bind(daemon=name)
|
||||
sentry.set_tag("daemon", name)
|
||||
|
||||
# exec the process
|
||||
mod.main()
|
||||
except KeyboardInterrupt:
|
||||
cloudlog.warning(f"child {proc} got SIGINT")
|
||||
except Exception:
|
||||
# can't install the crash handler because sys.excepthook doesn't play nice
|
||||
# with threads, so catch it here.
|
||||
sentry.capture_exception()
|
||||
raise
|
||||
|
||||
|
||||
def nativelauncher(pargs: List[str], cwd: str, name: str) -> None:
|
||||
os.environ['MANAGER_DAEMON'] = name
|
||||
|
||||
# exec the process
|
||||
os.chdir(cwd)
|
||||
os.execvp(pargs[0], pargs)
|
||||
|
||||
|
||||
def join_process(process: Process, timeout: float) -> None:
|
||||
# Process().join(timeout) will hang due to a python 3 bug: https://bugs.python.org/issue28382
|
||||
# We have to poll the exitcode instead
|
||||
t = time.monotonic()
|
||||
while time.monotonic() - t < timeout and process.exitcode is None:
|
||||
time.sleep(0.001)
|
||||
|
||||
|
||||
class ManagerProcess(ABC):
|
||||
daemon = False
|
||||
sigkill = False
|
||||
should_run: Callable[[bool, Params, car.CarParams], bool]
|
||||
proc: Optional[Process] = None
|
||||
enabled = True
|
||||
name = ""
|
||||
|
||||
last_watchdog_time = 0
|
||||
watchdog_max_dt: Optional[int] = None
|
||||
watchdog_seen = False
|
||||
shutting_down = False
|
||||
|
||||
@abstractmethod
|
||||
def prepare(self) -> None:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def start(self) -> None:
|
||||
pass
|
||||
|
||||
def restart(self) -> None:
|
||||
self.stop(sig=signal.SIGKILL)
|
||||
self.start()
|
||||
|
||||
def check_watchdog(self, started: bool) -> None:
|
||||
if self.watchdog_max_dt is None or self.proc is None:
|
||||
return
|
||||
|
||||
try:
|
||||
fn = WATCHDOG_FN + str(self.proc.pid)
|
||||
with open(fn, "rb") as f:
|
||||
# TODO: why can't pylint find struct.unpack?
|
||||
self.last_watchdog_time = struct.unpack('Q', f.read())[0]
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
dt = time.monotonic() - self.last_watchdog_time / 1e9
|
||||
|
||||
if dt > self.watchdog_max_dt:
|
||||
if self.watchdog_seen and ENABLE_WATCHDOG:
|
||||
cloudlog.error(f"Watchdog timeout for {self.name} (exitcode {self.proc.exitcode}) restarting ({started=})")
|
||||
self.restart()
|
||||
else:
|
||||
self.watchdog_seen = True
|
||||
|
||||
def stop(self, retry: bool = True, block: bool = True, sig: Optional[signal.Signals] = None) -> Optional[int]:
|
||||
if self.proc is None:
|
||||
return None
|
||||
|
||||
if self.proc.exitcode is None:
|
||||
if not self.shutting_down:
|
||||
cloudlog.info(f"killing {self.name}")
|
||||
if sig is None:
|
||||
sig = signal.SIGKILL if self.sigkill else signal.SIGINT
|
||||
self.signal(sig)
|
||||
self.shutting_down = True
|
||||
|
||||
if not block:
|
||||
return None
|
||||
|
||||
join_process(self.proc, 5)
|
||||
|
||||
# If process failed to die send SIGKILL
|
||||
if self.proc.exitcode is None and retry:
|
||||
cloudlog.info(f"killing {self.name} with SIGKILL")
|
||||
self.signal(signal.SIGKILL)
|
||||
self.proc.join()
|
||||
|
||||
ret = self.proc.exitcode
|
||||
cloudlog.info(f"{self.name} is dead with {ret}")
|
||||
|
||||
if self.proc.exitcode is not None:
|
||||
self.shutting_down = False
|
||||
self.proc = None
|
||||
|
||||
return ret
|
||||
|
||||
def signal(self, sig: int) -> None:
|
||||
if self.proc is None:
|
||||
return
|
||||
|
||||
# Don't signal if already exited
|
||||
if self.proc.exitcode is not None and self.proc.pid is not None:
|
||||
return
|
||||
|
||||
# Can't signal if we don't have a pid
|
||||
if self.proc.pid is None:
|
||||
return
|
||||
|
||||
cloudlog.info(f"sending signal {sig} to {self.name}")
|
||||
os.kill(self.proc.pid, sig)
|
||||
|
||||
def get_process_state_msg(self):
|
||||
state = log.ManagerState.ProcessState.new_message()
|
||||
state.name = self.name
|
||||
if self.proc:
|
||||
state.running = self.proc.is_alive()
|
||||
state.shouldBeRunning = self.proc is not None and not self.shutting_down
|
||||
state.pid = self.proc.pid or 0
|
||||
state.exitCode = self.proc.exitcode or 0
|
||||
return state
|
||||
|
||||
|
||||
class NativeProcess(ManagerProcess):
|
||||
def __init__(self, name, cwd, cmdline, should_run, enabled=True, sigkill=False, watchdog_max_dt=None):
|
||||
self.name = name
|
||||
self.cwd = cwd
|
||||
self.cmdline = cmdline
|
||||
self.should_run = should_run
|
||||
self.enabled = enabled
|
||||
self.sigkill = sigkill
|
||||
self.watchdog_max_dt = watchdog_max_dt
|
||||
self.launcher = nativelauncher
|
||||
|
||||
def prepare(self) -> None:
|
||||
pass
|
||||
|
||||
def start(self) -> None:
|
||||
# In case we only tried a non blocking stop we need to stop it before restarting
|
||||
if self.shutting_down:
|
||||
self.stop()
|
||||
|
||||
if self.proc is not None:
|
||||
return
|
||||
|
||||
cwd = os.path.join(BASEDIR, self.cwd)
|
||||
cloudlog.info(f"starting process {self.name}")
|
||||
self.proc = Process(name=self.name, target=self.launcher, args=(self.cmdline, cwd, self.name))
|
||||
self.proc.start()
|
||||
self.watchdog_seen = False
|
||||
self.shutting_down = False
|
||||
|
||||
|
||||
class PythonProcess(ManagerProcess):
|
||||
def __init__(self, name, module, should_run, enabled=True, sigkill=False, watchdog_max_dt=None):
|
||||
self.name = name
|
||||
self.module = module
|
||||
self.should_run = should_run
|
||||
self.enabled = enabled
|
||||
self.sigkill = sigkill
|
||||
self.watchdog_max_dt = watchdog_max_dt
|
||||
self.launcher = launcher
|
||||
|
||||
def prepare(self) -> None:
|
||||
if self.enabled:
|
||||
cloudlog.info(f"preimporting {self.module}")
|
||||
importlib.import_module(self.module)
|
||||
|
||||
def start(self) -> None:
|
||||
# In case we only tried a non blocking stop we need to stop it before restarting
|
||||
if self.shutting_down:
|
||||
self.stop()
|
||||
|
||||
if self.proc is not None:
|
||||
return
|
||||
|
||||
cloudlog.info(f"starting python {self.module}")
|
||||
self.proc = Process(name=self.name, target=self.launcher, args=(self.module, self.name))
|
||||
self.proc.start()
|
||||
self.watchdog_seen = False
|
||||
self.shutting_down = False
|
||||
|
||||
|
||||
class DaemonProcess(ManagerProcess):
|
||||
"""Python process that has to stay running across manager restart.
|
||||
This is used for athena so you don't lose SSH access when restarting manager."""
|
||||
def __init__(self, name, module, param_name, enabled=True):
|
||||
self.name = name
|
||||
self.module = module
|
||||
self.param_name = param_name
|
||||
self.enabled = enabled
|
||||
self.params = None
|
||||
|
||||
@staticmethod
|
||||
def should_run(started, params, CP):
|
||||
return True
|
||||
|
||||
def prepare(self) -> None:
|
||||
pass
|
||||
|
||||
def start(self) -> None:
|
||||
if self.params is None:
|
||||
self.params = Params()
|
||||
|
||||
pid = self.params.get(self.param_name, encoding='utf-8')
|
||||
if pid is not None:
|
||||
try:
|
||||
os.kill(int(pid), 0)
|
||||
with open(f'/proc/{pid}/cmdline') as f:
|
||||
if self.module in f.read():
|
||||
# daemon is running
|
||||
return
|
||||
except (OSError, FileNotFoundError):
|
||||
# process is dead
|
||||
pass
|
||||
|
||||
cloudlog.info(f"starting daemon {self.name}")
|
||||
proc = subprocess.Popen(['python', '-m', self.module],
|
||||
stdin=open('/dev/null'),
|
||||
stdout=open('/dev/null', 'w'),
|
||||
stderr=open('/dev/null', 'w'),
|
||||
preexec_fn=os.setpgrp)
|
||||
|
||||
self.params.put(self.param_name, str(proc.pid))
|
||||
|
||||
def stop(self, retry=True, block=True, sig=None) -> None:
|
||||
pass
|
||||
|
||||
|
||||
def ensure_running(procs: ValuesView[ManagerProcess], started: bool, params=None, CP: car.CarParams=None,
|
||||
not_run: Optional[List[str]]=None) -> List[ManagerProcess]:
|
||||
if not_run is None:
|
||||
not_run = []
|
||||
|
||||
running = []
|
||||
for p in procs:
|
||||
if p.enabled and p.name not in not_run and p.should_run(started, params, CP):
|
||||
p.start()
|
||||
running.append(p)
|
||||
else:
|
||||
p.stop(block=False)
|
||||
|
||||
p.check_watchdog(started)
|
||||
|
||||
return running
|
||||
91
selfdrive/manager/process_config.py
Normal file
91
selfdrive/manager/process_config.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import os
|
||||
|
||||
from cereal import car
|
||||
from openpilot.common.params import Params
|
||||
from openpilot.system.hardware import PC, TICI
|
||||
from openpilot.selfdrive.manager.process import PythonProcess, NativeProcess, DaemonProcess
|
||||
|
||||
WEBCAM = os.getenv("USE_WEBCAM") is not None
|
||||
|
||||
def driverview(started: bool, params: Params, CP: car.CarParams) -> bool:
|
||||
return started or params.get_bool("IsDriverViewEnabled")
|
||||
|
||||
def notcar(started: bool, params: Params, CP: car.CarParams) -> bool:
|
||||
return started and CP.notCar
|
||||
|
||||
def iscar(started: bool, params: Params, CP: car.CarParams) -> bool:
|
||||
return started and not CP.notCar
|
||||
|
||||
def logging(started, params, CP: car.CarParams) -> bool:
|
||||
run = (not CP.notCar) or not params.get_bool("DisableLogging")
|
||||
return started and run
|
||||
|
||||
def ublox_available() -> bool:
|
||||
return os.path.exists('/dev/ttyHS0') and not os.path.exists('/persist/comma/use-quectel-gps')
|
||||
|
||||
def ublox(started, params, CP: car.CarParams) -> bool:
|
||||
use_ublox = ublox_available()
|
||||
if use_ublox != params.get_bool("UbloxAvailable"):
|
||||
params.put_bool("UbloxAvailable", use_ublox)
|
||||
return started and use_ublox
|
||||
|
||||
def qcomgps(started, params, CP: car.CarParams) -> bool:
|
||||
return started and not ublox_available()
|
||||
|
||||
def always_run(started, params, CP: car.CarParams) -> bool:
|
||||
return True
|
||||
|
||||
def only_onroad(started: bool, params, CP: car.CarParams) -> bool:
|
||||
return started
|
||||
|
||||
def only_offroad(started, params, CP: car.CarParams) -> bool:
|
||||
return not started
|
||||
|
||||
procs = [
|
||||
DaemonProcess("manage_athenad", "selfdrive.athena.manage_athenad", "AthenadPid"),
|
||||
|
||||
NativeProcess("camerad", "system/camerad", ["./camerad"], driverview),
|
||||
NativeProcess("logcatd", "system/logcatd", ["./logcatd"], only_onroad),
|
||||
NativeProcess("proclogd", "system/proclogd", ["./proclogd"], only_onroad),
|
||||
PythonProcess("logmessaged", "system.logmessaged", always_run),
|
||||
PythonProcess("micd", "system.micd", iscar),
|
||||
PythonProcess("timezoned", "system.timezoned", always_run, enabled=not PC),
|
||||
|
||||
PythonProcess("dmonitoringmodeld", "selfdrive.modeld.dmonitoringmodeld", driverview, enabled=(not PC or WEBCAM)),
|
||||
NativeProcess("encoderd", "system/loggerd", ["./encoderd"], only_onroad),
|
||||
NativeProcess("stream_encoderd", "system/loggerd", ["./encoderd", "--stream"], notcar),
|
||||
NativeProcess("loggerd", "system/loggerd", ["./loggerd"], logging),
|
||||
NativeProcess("modeld", "selfdrive/modeld", ["./modeld"], only_onroad),
|
||||
NativeProcess("mapsd", "selfdrive/navd", ["./mapsd"], only_onroad),
|
||||
PythonProcess("navmodeld", "selfdrive.modeld.navmodeld", only_onroad),
|
||||
NativeProcess("sensord", "system/sensord", ["./sensord"], only_onroad, enabled=not PC),
|
||||
NativeProcess("ui", "selfdrive/ui", ["./ui"], always_run, watchdog_max_dt=(5 if not PC else None)),
|
||||
PythonProcess("soundd", "selfdrive.ui.soundd", only_onroad),
|
||||
NativeProcess("locationd", "selfdrive/locationd", ["./locationd"], only_onroad),
|
||||
NativeProcess("boardd", "selfdrive/boardd", ["./boardd"], always_run, enabled=False),
|
||||
PythonProcess("calibrationd", "selfdrive.locationd.calibrationd", only_onroad),
|
||||
PythonProcess("torqued", "selfdrive.locationd.torqued", only_onroad),
|
||||
PythonProcess("controlsd", "selfdrive.controls.controlsd", only_onroad),
|
||||
PythonProcess("deleter", "system.loggerd.deleter", always_run),
|
||||
PythonProcess("dmonitoringd", "selfdrive.monitoring.dmonitoringd", driverview, enabled=(not PC or WEBCAM)),
|
||||
PythonProcess("qcomgpsd", "system.qcomgpsd.qcomgpsd", qcomgps, enabled=TICI),
|
||||
PythonProcess("navd", "selfdrive.navd.navd", only_onroad),
|
||||
PythonProcess("pandad", "selfdrive.boardd.pandad", always_run),
|
||||
PythonProcess("paramsd", "selfdrive.locationd.paramsd", only_onroad),
|
||||
NativeProcess("ubloxd", "system/ubloxd", ["./ubloxd"], ublox, enabled=TICI),
|
||||
PythonProcess("pigeond", "system.sensord.pigeond", ublox, enabled=TICI),
|
||||
PythonProcess("plannerd", "selfdrive.controls.plannerd", only_onroad),
|
||||
PythonProcess("radard", "selfdrive.controls.radard", only_onroad),
|
||||
PythonProcess("thermald", "selfdrive.thermald.thermald", always_run),
|
||||
PythonProcess("tombstoned", "selfdrive.tombstoned", always_run, enabled=not PC),
|
||||
PythonProcess("updated", "selfdrive.updated", only_offroad, enabled=not PC),
|
||||
PythonProcess("uploader", "system.loggerd.uploader", always_run),
|
||||
PythonProcess("statsd", "selfdrive.statsd", always_run),
|
||||
|
||||
# debug procs
|
||||
NativeProcess("bridge", "cereal/messaging", ["./bridge"], notcar),
|
||||
PythonProcess("webrtcd", "system.webrtc.webrtcd", notcar),
|
||||
PythonProcess("webjoystick", "tools.bodyteleop.web", notcar),
|
||||
]
|
||||
|
||||
managed_processes = {p.name: p for p in procs}
|
||||
0
selfdrive/manager/test/__init__.py
Normal file
0
selfdrive/manager/test/__init__.py
Normal file
81
selfdrive/manager/test/test_manager.py
Executable file
81
selfdrive/manager/test/test_manager.py
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import pytest
|
||||
import signal
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from cereal import car
|
||||
from openpilot.common.params import Params
|
||||
import openpilot.selfdrive.manager.manager as manager
|
||||
from openpilot.selfdrive.manager.process import ensure_running
|
||||
from openpilot.selfdrive.manager.process_config import managed_processes
|
||||
from openpilot.system.hardware import HARDWARE
|
||||
|
||||
os.environ['FAKEUPLOAD'] = "1"
|
||||
|
||||
MAX_STARTUP_TIME = 3
|
||||
BLACKLIST_PROCS = ['manage_athenad', 'pandad', 'pigeond']
|
||||
|
||||
|
||||
@pytest.mark.tici
|
||||
class TestManager(unittest.TestCase):
|
||||
def setUp(self):
|
||||
os.environ['PASSIVE'] = '0'
|
||||
HARDWARE.set_power_save(False)
|
||||
|
||||
# ensure clean CarParams
|
||||
params = Params()
|
||||
params.clear_all()
|
||||
|
||||
def tearDown(self):
|
||||
manager.manager_cleanup()
|
||||
|
||||
def test_manager_prepare(self):
|
||||
os.environ['PREPAREONLY'] = '1'
|
||||
manager.main()
|
||||
|
||||
def test_blacklisted_procs(self):
|
||||
# TODO: ensure there are blacklisted procs until we have a dedicated test
|
||||
self.assertTrue(len(BLACKLIST_PROCS), "No blacklisted procs to test not_run")
|
||||
|
||||
def test_startup_time(self):
|
||||
for _ in range(10):
|
||||
start = time.monotonic()
|
||||
os.environ['PREPAREONLY'] = '1'
|
||||
manager.main()
|
||||
t = time.monotonic() - start
|
||||
assert t < MAX_STARTUP_TIME, f"startup took {t}s, expected <{MAX_STARTUP_TIME}s"
|
||||
|
||||
def test_clean_exit(self):
|
||||
"""
|
||||
Ensure all processes exit cleanly when stopped.
|
||||
"""
|
||||
HARDWARE.set_power_save(False)
|
||||
manager.manager_init()
|
||||
manager.manager_prepare()
|
||||
|
||||
CP = car.CarParams.new_message()
|
||||
procs = ensure_running(managed_processes.values(), True, Params(), CP, not_run=BLACKLIST_PROCS)
|
||||
|
||||
time.sleep(10)
|
||||
|
||||
for p in procs:
|
||||
with self.subTest(proc=p.name):
|
||||
state = p.get_process_state_msg()
|
||||
self.assertTrue(state.running, f"{p.name} not running")
|
||||
exit_code = p.stop(retry=False)
|
||||
|
||||
self.assertNotIn(p.name, BLACKLIST_PROCS, f"{p.name} was started")
|
||||
|
||||
self.assertTrue(exit_code is not None, f"{p.name} failed to exit")
|
||||
|
||||
# TODO: interrupted blocking read exits with 1 in cereal. use a more unique return code
|
||||
exit_codes = [0, 1]
|
||||
if p.sigkill:
|
||||
exit_codes = [-signal.SIGKILL]
|
||||
self.assertIn(exit_code, exit_codes, f"{p.name} died with {exit_code}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
Reference in New Issue
Block a user