spoc/usr/lib/python3.8/spoc/container.py

# -*- coding: utf-8 -*-

import copy
import enum
import os
import shlex
import shutil
import subprocess
import time
from concurrent.futures import ThreadPoolExecutor

from . import config, net, repo_local, templates
from .depsolver import DepSolver
from .exceptions import InvalidContainerStateError

# States taken from https://github.com/lxc/lxc/blob/master/src/lxc/state.h
class ContainerState(enum.Enum):
    STOPPED = 'STOPPED'
    STARTING = 'STARTING'
    RUNNING = 'RUNNING'
    STOPPING = 'STOPPING'
    ABORTING = 'ABORTING'
    FREEZING = 'FREEZING'
    FROZEN = 'FROZEN'
    THAWED = 'THAWED'
    UNKNOWN = 'UNKNOWN'

DEFINITION_MEMBERS = {'build', 'depends', 'layers', 'mounts', 'env', 'uid', 'gid', 'cmd', 'cwd', 'ready', 'halt'}

class Container:
    def __init__(self, name, load_from_repo=True):
        self.name = name
        self.build = False
        self.depends = []
        self.layers = []
        self.mounts = {}
        self.env = {}
        self.uid = None
        self.gid = None
        self.cmd = None
        self.cwd = None
        self.ready = None
        self.halt = None
        self.container_path = os.path.join(config.CONTAINERS_DIR, name)
        self.config_path = os.path.join(self.container_path, 'config')
        self.rootfs_path = os.path.join(self.container_path, 'rootfs')
        self.olwork_path = os.path.join(self.container_path, 'olwork')
        self.ephemeral_layer_path = os.path.join(self.container_path, 'ephemeral')
        self.log_path = os.path.join(config.LOG_DIR, f'{name}.log')
        if load_from_repo:
            self.set_definition(repo_local.get_container(name))

    def set_definition(self, definition):
        # Set attributes given by definition
        for key in DEFINITION_MEMBERS.intersection(definition):
            setattr(self, key, definition[key])

    def get_definition(self):
        # Return shallow copy of container definition as dictionary
        definition = {}
        for key in DEFINITION_MEMBERS:
            value = getattr(self, key)
            if value:
                definition[key] = copy.copy(value)
        return definition

    def get_state(self):
        # Get current state of the container, uses LXC monitor socket accessible only in ocntainer's namespace
        try:
            state = subprocess.run(['lxc-info', '-sH', '-P', config.CONTAINERS_DIR, self.name], capture_output=True, check=True)
            return ContainerState[state.stdout.strip().decode()]
        except subprocess.CalledProcessError:
            return ContainerState.UNKNOWN

    def is_running(self):
        # Convenience method to determine if the container is running
        return self.get_state() == ContainerState.RUNNING

    def is_stopped(self):
        # Convenience method to determine if the container is stopped
        return self.get_state() == ContainerState.STOPPED

    def await_state(self, awaited_state):
        # Block execution until the container reaches the desired state or until timeout
        try:
            subprocess.run(['lxc-wait', '-P', config.CONTAINERS_DIR, '-s', awaited_state.value, '-t', '30', self.name], check=True)
        except subprocess.CalledProcessError:
            # Sometimes LXC decides to return rc 1 even on successful state change
            actual_state = self.get_state()
            if actual_state != awaited_state:
                raise InvalidContainerStateError(self.name, actual_state)

    def mount_rootfs(self):
        # Prepares container rootfs
        # Called in lxc.hook.pre-start as the standard mount options are insufficient for rootless containers (see notes for overlayfs below)
        layers = [os.path.join(config.LAYERS_DIR, layer) for layer in self.layers]
        if not self.build:
            # Add ephemeral layer if the container is not created as part of build process
            layers.append(self.ephemeral_layer_path)
        if len(layers) > 1:
            # Multiple layers require overlayfs, however non-root users don't normally have capability to create overlayfs mounts - https://www.spinics.net/lists/linux-fsdevel/msg105877.html
            # Standard linux kernels currently doesn't support overlay mounts in user namespaces (lxc.hook.pre-mount)
            # The exception is Ubuntu or custom patches such as https://salsa.debian.org/kernel-team/linux/blob/master/debian/patches/debian/overlayfs-permit-mounts-in-userns.patch
            # Possible alternative is fuse-overlayfs, which doesn't work well on Alpine (and it's FUSE anyway, so it needs an extra service and a process for each mount)
            # Another alternative would be to mount in the namespace via -N option, but LXC doesn't expose PID or namespaces of the process during container setup
            overlay_opts = f'upperdir={layers[-1]},lowerdir={":".join(reversed(layers[:-1]))},workdir={self.olwork_path}'
            subprocess.run(['mount', '-t', 'overlay', '-o', overlay_opts, 'none', self.rootfs_path])
        else:
            # We only have a single layer, no overlay needed
            subprocess.run(['mount', '--bind', layers[0], self.rootfs_path])

    def unmount_rootfs(self):
        # Recursively unmounts container rootfs
        # Called in lxc.hook.post-stop
        # For unprivileged containers it could theoretically be called already in lxc.hook.start-host, as the user namespace clones the mounts,
        # so they are not needed in the parent namespace anymore, but removing rootfs on container stop seems more intuitive
        subprocess.run(['umount', '-R', self.rootfs_path], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    def clean_ephemeral_layer(self):
        # Cleans container ephemeral layer. Called in lxc.hook.post-stop and lxc.hook.pre-start in case of unclean shutdown
        # This is done early in the container start process, so the inode of the ephemeral directory must remain unchanged
        for item in os.scandir(self.ephemeral_layer_path):
            shutil.rmtree(item.path) if item.is_dir() else os.unlink(item.path)

    def get_mount_entry(self, volume, mountpoint):
        mount_type = 'dir'
        if mountpoint.endswith(':file'):
            mount_type = 'file'
            mountpoint = mountpoint[:-5]
        return f'lxc.mount.entry = {os.path.join(config.VOLUMES_DIR, volume)} {mountpoint} none bind,create={mount_type} 0 0'

    def create(self):
        # Create container directories
        os.makedirs(self.rootfs_path, 0o755, True)
        os.makedirs(self.olwork_path, 0o755, True)
        os.makedirs(self.ephemeral_layer_path, 0o755, True)
        os.makedirs(config.LOG_DIR, 0o750, True)
        # Change UID/GID of the ephemeral layer directory
        # Chown is possible only when the process is running as root, for user namespaces, see https://linuxcontainers.org/lxc/manpages/man1/lxc-usernsexec.1.html
        os.chown(self.ephemeral_layer_path, 100000, 100000)
        # Create container configuration file based on the container definition
        mounts = '\n'.join([self.get_mount_entry(v, m) for v,m in self.mounts.items()])
        env = '\n'.join([f'lxc.environment = {k}={v}' for k,v in self.env.items()])
        uid = self.uid if self.uid else 0
        gid = self.gid if self.gid else 0
        cmd = self.cmd if self.cmd else '/sbin/init'
        cwd = self.cwd if self.cwd else '/'
        halt = self.halt if self.halt else 'SIGINT'
        ip_address, ip_netmask, ip_gateway = net.request_ip(self.name)
        # Write LXC configuration file
        with open(self.config_path, 'w') as f:
            f.write(templates.LXC_CONTAINER_TEMPLATE.format(name=self.name,
                                                            interface=config.NETWORK_INTERFACE,
                                                            resolv_conf=config.RESOLV_CONF,
                                                            ip_address=ip_address,
                                                            ip_netmask=ip_netmask,
                                                            ip_gateway=ip_gateway,
                                                            rootfs=self.rootfs_path,
                                                            hosts=config.HOSTS_FILE,
                                                            mounts=mounts,
                                                            env=env,
                                                            uid=uid,
                                                            gid=gid,
                                                            cmd=cmd,
                                                            cwd=cwd,
                                                            halt=halt,
                                                            log=self.log_path))
        repo_local.register_container(self.name, self.get_definition())

    def destroy(self):
        repo_local.unregister_container(self.name)
        self.unmount_rootfs()
        try:
            shutil.rmtree(self.container_path)
        except FileNotFoundError:
            pass
        try:
            os.unlink(self.log_path)
        except FileNotFoundError:
            pass
        # Release the IP address from global hosts configuration
        net.release_ip(self.name)

    def start(self, command=None):
        # Start the container including its dependencies
        depsolver = DepSolver()
        self.get_start_dependencies(depsolver)
        for dependency in depsolver.solve():
            if not dependency.is_running():
                # Pass start command only to the current container
                dependency.do_start(command if dependency.name == self.name else None)

    def do_start(self, command=None):
        cmd = ['--']+command if command else []
        # Start the current container, wait until it is reported as started and execute application readiness check
        subprocess.Popen(['lxc-start', '-P', config.CONTAINERS_DIR, self.name]+cmd)
        self.await_state(ContainerState.RUNNING)
        # Launch the readiness check in a separate thread, so it can be reliably cancelled after timeout
        with ThreadPoolExecutor(max_workers=1) as pool:
            # Create anonymous object to pass the task cancellation information
            guard = type('', (object,), {'cancel': False})()
            future = pool.submit(self.check_readiness, guard)
            future.result(timeout=30)
            guard.cancel = True

    def check_readiness(self, guard):
        # Run spoc.init.ready until it returns return code 0 or the guard cancels the loop
        ready_cmd = shlex.split(self.ready) if self.ready else ['/bin/true']
        while not guard.cancel:
            state = self.get_state()
            if state != ContainerState.RUNNING:
                raise InvalidContainerStateError(self.name, state)
            check = subprocess.run(['lxc-attach', '-P', config.CONTAINERS_DIR, '--clear-env', self.name, '--']+ready_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, timeout=30)
            if check.returncode == 0:
                break
            time.sleep(0.25)

    def stop(self):
        # Stop the containers depending on the current cotnainer
        depsolver = DepSolver()
        self.get_stop_dependencies(depsolver)
        for dependency in depsolver.solve():
            if not dependency.is_stopped():
                dependency.do_stop()

    def do_stop(self):
        # Stop the current container and wait until it stops completely
        lxc_stop = subprocess.Popen(['lxc-stop', '-P', config.CONTAINERS_DIR, self.name])
        self.await_state(ContainerState.STOPPED)
        # Reap the lxc-stop process
        lxc_stop.wait()

    def execute(self, cmd, uid=None, gid=None, **kwargs):
        # If the container is starting or stopping, wait until the operation is finished
        state = self.get_state()
        if state == ContainerState.STARTING:
            self.await_state(ContainerState.RUNNING)
            state = self.get_state()
        elif state == ContainerState.STOPPING:
            self.await_state(ContainerState.STOPPED)
            state = self.get_state()
        # Resolve UID/GID, if they have been given
        uidgid_param = []
        uid,gid = self.get_uidgid(uid, gid)
        if uid:
            uidgid_param.extend(('-u', uid))
        if gid:
            uidgid_param.extend(('-g', gid))
        # If the container is stopped, use lxc-execute, otherwise use lxc-attach
        if state == ContainerState.STOPPED:
            return subprocess.run(['lxc-execute', '-P', config.CONTAINERS_DIR]+uidgid_param+[self.name, '--']+cmd, **kwargs)
        elif state == ContainerState.RUNNING:
            return subprocess.run(['lxc-attach', '-P', config.CONTAINERS_DIR, '--clear-env']+uidgid_param+[self.name, '--']+cmd, **kwargs)
        else:
            raise InvalidContainerStateError(self.name, state)

    def get_uidgid(self, user=None, group=None):
        # Helper function to get UID/GID of an user/group from the container
        uid,gid = None,None
        if user:
            uid_entry = self.execute(['/usr/bin/getent', 'passwd', user], capture_output=True, check=True).stdout.decode().split(':')
            uid,gid = uid_entry[2],uid_entry[3]
        if group:
            gid = self.execute(['/usr/bin/getent', 'group', group], capture_output=True, check=True).stdout.decode().split(':')[2]
        return (uid,gid)

    def get_start_dependencies(self, depsolver):
        depsolver.add(self.name, self.depends, self)
        for dependency in self.depends:
            Container(dependency).get_start_dependencies(depsolver)

    def get_stop_dependencies(self, depsolver):
        reverse_depends = []
        for name, definition in repo_local.get_containers().items():
            if 'depends' in definition and self.name in definition['depends']:
                reverse_depends.append(name)
        depsolver.add(self.name, reverse_depends, self)
        for dependency in reverse_depends:
            Container(dependency).get_stop_dependencies(depsolver)