container

package
v0.4.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Apr 11, 2026 License: MIT Imports: 33 Imported by: 0

Documentation

Overview

Package container implements unprivileged Linux containers with built-in support for syscall filtering.

Example

Note: this package requires cgo, which is unavailable in the Go playground.

package main

import (
	"context"
	"os"
	"os/exec"

	"hakurei.app/check"
	"hakurei.app/container"
	"hakurei.app/fhs"
)

func main() {
	// Must be called early if the current process starts containers.
	container.TryArgv0(nil)

	// Configure the container.
	z := container.New(context.Background(), nil)
	z.Hostname = "hakurei-example"
	z.Proc(fhs.AbsProc).Dev(fhs.AbsDev, true)
	z.Stdin, z.Stdout, z.Stderr = os.Stdin, os.Stdout, os.Stderr

	// Bind / for demonstration.
	z.Bind(fhs.AbsRoot, fhs.AbsRoot, 0)
	if name, err := exec.LookPath("hostname"); err != nil {
		panic(err)
	} else {
		z.Path = check.MustAbs(name)
	}

	// This completes the first stage of container setup and starts the container init process.
	// The new process blocks until the Serve method is called.
	if err := z.Start(); err != nil {
		panic(err)
	}

	// This serves the setup payload to the container init process,
	// starting the second stage of container setup.
	if err := z.Serve(); err != nil {
		panic(err)
	}

	// Must be called if the Start method succeeds.
	if err := z.Wait(); err != nil {
		panic(err)
	}

}
Output:
hakurei-example

Index

Examples

Constants

View Source
const (
	PR_CAP_AMBIENT           = 0x2f
	PR_CAP_AMBIENT_RAISE     = 0x2
	PR_CAP_AMBIENT_CLEAR_ALL = 0x4

	CAP_SYS_ADMIN    = 0x15
	CAP_SETPCAP      = 0x8
	CAP_NET_ADMIN    = 0xc
	CAP_DAC_OVERRIDE = 0x1
)
View Source
const (
	// OverlayEphemeralUnexpectedUpper is set when [MountOverlayOp.Work] is nil
	// and [MountOverlayOp.Upper] holds an unexpected value.
	OverlayEphemeralUnexpectedUpper = iota
	// OverlayReadonlyLower is set when [MountOverlayOp.Lower] contains less than
	// two entries when mounting readonly.
	OverlayReadonlyLower
	// OverlayEmptyLower is set when [MountOverlayOp.Lower] has length of zero.
	OverlayEmptyLower
)
View Source
const (

	// SourceNone is used when the source value is ignored,
	// such as when remounting.
	SourceNone = "none"
	// SourceProc is used when mounting proc.
	// Note that any source value is allowed when fstype is [FstypeProc].
	SourceProc = "proc"
	// SourceDevpts is used when mounting devpts.
	// Note that any source value is allowed when fstype is [FstypeDevpts].
	SourceDevpts = "devpts"
	// SourceMqueue is used when mounting mqueue.
	// Note that any source value is allowed when fstype is [FstypeMqueue].
	SourceMqueue = "mqueue"
	// SourceOverlay is used when mounting overlay.
	// Note that any source value is allowed when fstype is [FstypeOverlay].
	SourceOverlay = "overlay"

	// SourceTmpfs is used when mounting tmpfs.
	SourceTmpfs = "tmpfs"
	// SourceTmpfsRootfs is used when mounting the tmpfs instance backing the intermediate root.
	SourceTmpfsRootfs = "rootfs"
	// SourceTmpfsDevtmpfs is used when mounting tmpfs representing a subset of host devtmpfs.
	SourceTmpfsDevtmpfs = "devtmpfs"
	// SourceTmpfsEphemeral is used when mounting a writable instance of tmpfs.
	SourceTmpfsEphemeral = "ephemeral"
	// SourceTmpfsReadonly is used when mounting a readonly instance of tmpfs.
	SourceTmpfsReadonly = "readonly"

	// FstypeNULL is used when the fstype value is ignored,
	// such as when bind mounting or remounting.
	FstypeNULL = zeroString
	// FstypeProc represents the proc pseudo-filesystem.
	// A fully visible instance of proc must be available in the mount namespace for proc to be mounted.
	FstypeProc = "proc"
	// FstypeDevpts represents the devpts pseudo-filesystem.
	// This type of filesystem is usually mounted on /dev/pts.
	FstypeDevpts = "devpts"
	// FstypeTmpfs represents the tmpfs filesystem.
	// This filesystem type can be mounted anywhere in the container filesystem.
	FstypeTmpfs = "tmpfs"
	// FstypeMqueue represents the mqueue pseudo-filesystem.
	// This filesystem type is usually mounted on /dev/mqueue.
	FstypeMqueue = "mqueue"
	// FstypeOverlay represents the overlay pseudo-filesystem.
	// This filesystem type can be mounted anywhere in the container filesystem.
	FstypeOverlay = "overlay"

	// OptionOverlayLowerdir represents the lowerdir option of the overlay pseudo-filesystem.
	// Any filesystem, does not need to be on a writable filesystem.
	OptionOverlayLowerdir = "lowerdir"
	// OptionOverlayUpperdir represents the upperdir option of the overlay pseudo-filesystem.
	// The upperdir is normally on a writable filesystem.
	OptionOverlayUpperdir = "upperdir"
	// OptionOverlayWorkdir represents the workdir option of the overlay pseudo-filesystem.
	// The workdir needs to be an empty directory on the same filesystem as upperdir.
	OptionOverlayWorkdir = "workdir"
	// OptionOverlayUserxattr represents the userxattr option of the overlay pseudo-filesystem.
	// Use the "user.overlay." xattr namespace instead of "trusted.overlay.".
	OptionOverlayUserxattr = "userxattr"
)
View Source
const (
	O_PATH = 0x200000

	PR_SET_NO_NEW_PRIVS = 0x26
)
View Source
const (
	// CancelSignal is the signal expected by container init on context cancel.
	// A custom [Container.Cancel] function must eventually deliver this signal.
	CancelSignal = SIGUSR2
)
View Source
const (
	// Nonexistent is a path that cannot exist.
	//
	// This path can never be presented by the kernel if proc is mounted on
	// /proc/. This can only exist if parts of /proc/ is covered, or proc is not
	// mounted at all. Neither configuration is supported by this package.
	Nonexistent = fhs.Proc + "nonexistent"
)

Variables

This section is empty.

Functions

func Init

func Init(msg message.Msg)

Init is called by TryArgv0 if the current process is the container init.

func IsAutoRootBindable added in v0.1.3

func IsAutoRootBindable(msg message.Msg, name string) bool

IsAutoRootBindable returns whether a dir entry name is selected for AutoRoot.

func LastCap

func LastCap(msg message.Msg) uintptr

func OverflowGid

func OverflowGid(msg message.Msg) int

func OverflowUid

func OverflowUid(msg message.Msg) int

func TryArgv0

func TryArgv0(msg message.Msg)

TryArgv0 calls Init if the last element of argv0 is "init". If a nil msg is passed, the system logger is used instead.

Types

type AutoEtcOp

type AutoEtcOp struct{ Prefix string }

AutoEtcOp expands host /etc into a toplevel symlink mirror with /etc semantics.

This is not a generic setup op. It is implemented here to reduce ipc overhead.

func (*AutoEtcOp) Is

func (e *AutoEtcOp) Is(op Op) bool

func (*AutoEtcOp) String

func (e *AutoEtcOp) String() string

func (*AutoEtcOp) Valid added in v0.2.0

func (e *AutoEtcOp) Valid() bool

type AutoRootOp added in v0.1.3

type AutoRootOp struct {
	Host *check.Absolute
	// passed through to bindMount
	Flags int
	// contains filtered or unexported fields
}

AutoRootOp expands a directory into a toplevel bind mount mirror on container root.

This is not a generic setup op. It is implemented here to reduce ipc overhead.

func (*AutoRootOp) Is added in v0.1.3

func (r *AutoRootOp) Is(op Op) bool

func (*AutoRootOp) String added in v0.1.3

func (r *AutoRootOp) String() string

func (*AutoRootOp) Valid added in v0.2.0

func (r *AutoRootOp) Valid() bool

type BindMountOp

type BindMountOp struct {
	Source, Target *check.Absolute

	Flags int
	// contains filtered or unexported fields
}

BindMountOp creates a bind mount from host path Source to container path Target.

Note that Flags uses bits declared in the std package and should not be set with constants in syscall.

func (*BindMountOp) Is

func (b *BindMountOp) Is(op Op) bool

func (*BindMountOp) String

func (b *BindMountOp) String() string

func (*BindMountOp) Valid added in v0.2.0

func (b *BindMountOp) Valid() bool

type Container

type Container struct {
	// Whether the container init should stay alive after its parent terminates.
	AllowOrphan bool
	// Whether to set SchedPolicy and SchedPriority via sched_setscheduler(2).
	SetScheduler bool
	// Scheduling policy to set via sched_setscheduler(2).
	SchedPolicy ext.SchedPolicy
	// Scheduling priority to set via sched_setscheduler(2). The zero value
	// implies the minimum value supported by the current SchedPolicy.
	SchedPriority ext.Int
	// Cgroup fd, nil to disable.
	Cgroup *int
	// ExtraFiles passed through to initial process in the container, with
	// behaviour identical to its [exec.Cmd] counterpart.
	ExtraFiles []*os.File

	Stdin  io.Reader
	Stdout io.Writer
	Stderr io.Writer

	// Custom cancellation behaviour for the underlying [exec.Cmd]. Must
	// deliver [CancelSignal] before returning.
	Cancel func(cmd *exec.Cmd) error
	// Copied to the underlying [exec.Cmd].
	WaitDelay time.Duration

	Params
	// contains filtered or unexported fields
}

Container represents a container environment being prepared or run. None of Container methods are safe for concurrent use.

func New

func New(ctx context.Context, msg message.Msg) *Container

New returns the address to a new instance of Container. This value requires further initialisation before use.

func NewCommand added in v0.2.0

func NewCommand(
	ctx context.Context,
	msg message.Msg,
	pathname *check.Absolute,
	name string,
	args ...string,
) *Container

NewCommand calls New and initialises the [Params.Path] and [Params.Args] fields.

func (*Container) ProcessState added in v0.1.2

func (p *Container) ProcessState() *os.ProcessState

ProcessState returns the address of os.ProcessState held by the underlying exec.Cmd.

func (*Container) Serve

func (p *Container) Serve() (err error)

Serve serves [Container.Params] to the container init.

Serve must only be called once.

func (*Container) Start

func (p *Container) Start() error

Start starts the container init. The init process blocks until Serve is called.

func (*Container) StderrPipe added in v0.2.2

func (p *Container) StderrPipe() (r io.ReadCloser, err error)

StderrPipe calls the exec.Cmd method with the same name.

func (*Container) StdinPipe added in v0.2.2

func (p *Container) StdinPipe() (w io.WriteCloser, err error)

StdinPipe calls the exec.Cmd method with the same name.

func (*Container) StdoutPipe added in v0.2.2

func (p *Container) StdoutPipe() (r io.ReadCloser, err error)

StdoutPipe calls the exec.Cmd method with the same name.

func (*Container) String

func (p *Container) String() string

func (*Container) Wait

func (p *Container) Wait() error

Wait blocks until the container init process to exit and releases any resources associated with the Container.

type DaemonOp added in v0.3.2

type DaemonOp struct {
	// Pathname indicating readiness of daemon.
	Target *check.Absolute
	// Absolute pathname passed to [exec.Cmd].
	Path *check.Absolute
	// Arguments (excl. first) passed to [exec.Cmd].
	Args []string
}

DaemonOp starts a daemon in the container and blocks until Target appears.

func (*DaemonOp) Is added in v0.3.2

func (d *DaemonOp) Is(op Op) bool

func (*DaemonOp) String added in v0.3.2

func (d *DaemonOp) String() string

func (*DaemonOp) Valid added in v0.3.2

func (d *DaemonOp) Valid() bool

type MkdirOp

type MkdirOp struct {
	Path *check.Absolute
	Perm os.FileMode
}

MkdirOp creates a directory at container Path with permission bits set to Perm.

func (*MkdirOp) Is

func (m *MkdirOp) Is(op Op) bool

func (*MkdirOp) String

func (m *MkdirOp) String() string

func (*MkdirOp) Valid added in v0.2.0

func (m *MkdirOp) Valid() bool

type MountDevOp

type MountDevOp struct {
	Target *check.Absolute
	Mqueue bool
	Write  bool
}

MountDevOp mounts a subset of host /dev on container path Target. If Mqueue is true, a private instance of FstypeMqueue is mounted. If Write is true, the resulting mount point is left writable.

func (*MountDevOp) Is

func (d *MountDevOp) Is(op Op) bool

func (*MountDevOp) String

func (d *MountDevOp) String() string

func (*MountDevOp) Valid added in v0.2.0

func (d *MountDevOp) Valid() bool

type MountError added in v0.2.2

type MountError struct {
	Source, Target, Fstype string

	Flags uintptr
	Data  string
	syscall.Errno
}

MountError wraps errors returned by syscall.Mount.

func (*MountError) Error added in v0.2.2

func (e *MountError) Error() string

func (*MountError) Message added in v0.3.0

func (e *MountError) Message() string

func (*MountError) Unwrap added in v0.2.2

func (e *MountError) Unwrap() error

type MountOverlayOp added in v0.2.0

type MountOverlayOp struct {
	Target *check.Absolute

	// Any filesystem, does not need to be on a writable filesystem.
	Lower []*check.Absolute

	// The upperdir is normally on a writable filesystem.
	//
	// If Work is nil and Upper holds the special value [fhs.AbsRoot], an
	// ephemeral upperdir and workdir will be set up.
	//
	// If both Work and Upper are nil, upperdir and workdir is omitted and the
	// overlay is mounted readonly.
	Upper *check.Absolute

	// The workdir needs to be an empty directory on the same filesystem as upperdir.
	Work *check.Absolute
	// contains filtered or unexported fields
}

MountOverlayOp mounts FstypeOverlay on container path Target.

func (*MountOverlayOp) Is added in v0.2.0

func (o *MountOverlayOp) Is(op Op) bool

func (*MountOverlayOp) String added in v0.2.0

func (o *MountOverlayOp) String() string

func (*MountOverlayOp) Valid added in v0.2.0

func (o *MountOverlayOp) Valid() bool

type MountProcOp

type MountProcOp struct{ Target *check.Absolute }

MountProcOp mounts a new instance of FstypeProc on container path Target.

func (*MountProcOp) Is

func (p *MountProcOp) Is(op Op) bool

func (*MountProcOp) String

func (p *MountProcOp) String() string

func (*MountProcOp) Valid added in v0.2.0

func (p *MountProcOp) Valid() bool

type MountTmpfsOp

type MountTmpfsOp struct {
	FSName string
	Path   *check.Absolute
	Flags  uintptr
	Size   int
	Perm   os.FileMode
}

MountTmpfsOp mounts FstypeTmpfs on container Path.

func (*MountTmpfsOp) Is

func (t *MountTmpfsOp) Is(op Op) bool

func (*MountTmpfsOp) String

func (t *MountTmpfsOp) String() string

func (*MountTmpfsOp) Valid added in v0.2.0

func (t *MountTmpfsOp) Valid() bool

type Op

type Op interface {
	Is(op Op) bool
	Valid() bool
	fmt.Stringer
	// contains filtered or unexported methods
}

Op is a generic setup step ran inside the container init. Implementations of this interface are sent as a stream of gobs.

type OpRepeatError added in v0.2.2

type OpRepeatError string

OpRepeatError is returned applying a repeated nonrepeatable Op.

func (OpRepeatError) Error added in v0.2.2

func (e OpRepeatError) Error() string

type OpStateError added in v0.2.2

type OpStateError string

OpStateError indicates an impossible internal state has been reached in an Op.

func (OpStateError) Error added in v0.2.2

func (o OpStateError) Error() string

type Ops

type Ops []Op

Ops is a collection of Op.

func (*Ops) Bind

func (f *Ops) Bind(source, target *check.Absolute, flags int) *Ops

Bind is a helper for appending BindMountOp to Ops.

func (*Ops) Daemon added in v0.3.2

func (f *Ops) Daemon(target, path *check.Absolute, args ...string) *Ops

Daemon is a helper for appending DaemonOp to Ops.

func (*Ops) Dev

func (f *Ops) Dev(target *check.Absolute, mqueue bool) *Ops

Dev appends an Op that mounts a subset of host /dev.

func (*Ops) DevWritable added in v0.2.0

func (f *Ops) DevWritable(target *check.Absolute, mqueue bool) *Ops

DevWritable appends an Op that mounts a writable subset of host /dev.

There is usually no good reason to write to /dev, so this should always be followed by a RemountOp.

func (*Ops) Etc

func (f *Ops) Etc(host *check.Absolute, prefix string) *Ops

Etc is a helper for appending AutoEtcOp to Ops.

func (*Ops) Grow

func (f *Ops) Grow(n int)

Grow grows the slice Ops points to using slices.Grow.

func (f *Ops) Link(target *check.Absolute, linkName string, dereference bool) *Ops

Link appends an Op that creates a symlink in the container filesystem.

func (*Ops) Mkdir

func (f *Ops) Mkdir(name *check.Absolute, perm os.FileMode) *Ops

Mkdir is a helper for appending MkdirOp to Ops.

func (*Ops) Overlay added in v0.2.0

func (f *Ops) Overlay(
	target, state, work *check.Absolute,
	layers ...*check.Absolute,
) *Ops

Overlay is a helper for appending MountOverlayOp to Ops.

func (*Ops) OverlayEphemeral added in v0.2.0

func (f *Ops) OverlayEphemeral(target *check.Absolute, layers ...*check.Absolute) *Ops

OverlayEphemeral appends a MountOverlayOp with an ephemeral upperdir and workdir.

func (*Ops) OverlayReadonly added in v0.2.0

func (f *Ops) OverlayReadonly(target *check.Absolute, layers ...*check.Absolute) *Ops

OverlayReadonly appends a readonly MountOverlayOp.

func (*Ops) Place

func (f *Ops) Place(name *check.Absolute, data []byte) *Ops

Place is a helper for appending TmpfileOp to Ops.

func (*Ops) Proc

func (f *Ops) Proc(target *check.Absolute) *Ops

Proc is a helper for appending MountProcOp to Ops.

func (*Ops) Readonly added in v0.1.3

func (f *Ops) Readonly(target *check.Absolute, perm os.FileMode) *Ops

Readonly appends an Op that mounts read-only tmpfs on container path [MountTmpfsOp.Path].

func (*Ops) Remount added in v0.1.3

func (f *Ops) Remount(target *check.Absolute, flags uintptr) *Ops

Remount is a helper for appending RemountOp to Ops.

func (*Ops) Root added in v0.1.3

func (f *Ops) Root(host *check.Absolute, flags int) *Ops

Root is a helper for appending AutoRootOp to Ops.

func (*Ops) Tmpfs

func (f *Ops) Tmpfs(target *check.Absolute, size int, perm os.FileMode) *Ops

Tmpfs appends an Op that mounts tmpfs on container path [MountTmpfsOp.Path].

type OverlayArgumentError added in v0.2.2

type OverlayArgumentError struct {
	Type  uintptr
	Value string
}

OverlayArgumentError is returned for MountOverlayOp supplied with invalid argument.

func (*OverlayArgumentError) Error added in v0.2.2

func (e *OverlayArgumentError) Error() string

type Params

type Params struct {
	// Working directory in the container.
	Dir *check.Absolute
	// Initial process environment.
	Env []string
	// Pathname of initial process in the container.
	Path *check.Absolute
	// Initial process argv.
	Args []string
	// Deliver SIGINT to the initial process on context cancellation.
	ForwardCancel bool
	// Time to wait for processes lingering after the initial process terminates.
	AdoptWaitDelay time.Duration

	// Mapped Uid in user namespace.
	Uid int
	// Mapped Gid in user namespace.
	Gid int
	// Hostname value in UTS namespace.
	Hostname string
	// Sequential container setup ops.
	*Ops

	// Seccomp system call filter rules.
	SeccompRules []std.NativeRule
	// Extra seccomp flags.
	SeccompFlags seccomp.ExportFlag
	// Seccomp presets. Has no effect unless SeccompRules is zero-length.
	SeccompPresets std.FilterPreset
	// Do not load seccomp program.
	SeccompDisable bool

	// Permission bits of newly created parent directories.
	// The zero value is interpreted as 0755.
	ParentPerm os.FileMode
	// Do not syscall.Setsid.
	RetainSession bool
	// Do not [syscall.CLONE_NEWNET].
	HostNet bool
	// Do not [LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET].
	HostAbstract bool
	// Retain CAP_SYS_ADMIN.
	Privileged bool
}

Params holds container configuration and is safe to serialise.

type RemountOp added in v0.1.3

type RemountOp struct {
	Target *check.Absolute
	Flags  uintptr
}

RemountOp remounts Target with Flags.

func (*RemountOp) Is added in v0.1.3

func (r *RemountOp) Is(op Op) bool

func (*RemountOp) String added in v0.1.3

func (r *RemountOp) String() string

func (*RemountOp) Valid added in v0.2.0

func (r *RemountOp) Valid() bool

type StartError added in v0.2.2

type StartError struct {
	// Fatal suggests whether this error should be considered fatal for the entire program.
	Fatal bool
	// Step refers to the part of the setup this error is returned from.
	Step string
	// Err is the underlying error.
	Err error
	// Origin is whether this error originated from the [Container.Start] method.
	Origin bool
	// Passthrough is whether the Error method is passed through to Err.
	Passthrough bool
}

A StartError contains additional information on a container startup failure.

func (*StartError) Error added in v0.2.2

func (e *StartError) Error() string

func (*StartError) Message added in v0.2.2

func (e *StartError) Message() string

Message returns a user-facing error message.

func (*StartError) Unwrap added in v0.2.2

func (e *StartError) Unwrap() error

type SymlinkOp

type SymlinkOp struct {
	Target *check.Absolute
	// LinkName is an arbitrary uninterpreted pathname.
	LinkName string

	// Dereference causes LinkName to be dereferenced during early.
	Dereference bool
}

SymlinkOp optionally dereferences LinkName and creates a symlink at container path Target.

func (*SymlinkOp) Is

func (l *SymlinkOp) Is(op Op) bool

func (*SymlinkOp) String

func (l *SymlinkOp) String() string

func (*SymlinkOp) Valid added in v0.2.0

func (l *SymlinkOp) Valid() bool

type TmpfileOp

type TmpfileOp struct {
	Path *check.Absolute
	Data []byte
}

TmpfileOp places a file on container Path containing Data.

func (*TmpfileOp) Is

func (t *TmpfileOp) Is(op Op) bool

func (*TmpfileOp) String

func (t *TmpfileOp) String() string

func (*TmpfileOp) Valid added in v0.2.0

func (t *TmpfileOp) Valid() bool

type TmpfsSizeError added in v0.2.2

type TmpfsSizeError int

func (TmpfsSizeError) Error added in v0.2.2

func (e TmpfsSizeError) Error() string

Directories

Path Synopsis
Package std contains constants from container packages without depending on cgo.
Package std contains constants from container packages without depending on cgo.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL