The Joblet is a distributed job execution platform that provides secure, resource-controlled execution of arbitrary commands on Linux systems. It implements a sophisticated single-binary architecture using gRPC with mutual TLS authentication, complete process isolation through Linux namespaces, and fine-grained resource management via cgroups v2.
The Joblet implements a unique single-binary architecture where the same executable operates in different modes:
// Mode detection via environment variable
mode := os.Getenv("JOBLET_MODE")
switch mode {
case "server":
// Run as gRPC server and job manager
return modes.RunServer(cfg)
case "init":
// Run as isolated job process
return modes.RunJobInit(cfg)
default:
// Default to server mode
return modes.RunServer(cfg)
}
Benefits:
OU=admin
): Full job control (run, stop, view)OU=viewer
): Read-only access (status, logs, list)Network: Shared (host networking for compatibility)
Mount: Isolated (chroot + bind mounts)
IPC: Isolated (separate IPC namespace)
UTS: Isolated (separate hostname/domain)
Cgroup: Isolated (separate cgroup namespace)
# Applied per job via cgroups v2
resources:
cpu: 50% # CPU percentage limit
memory: 512MB # Memory limit
io: 100MB/s # I/O bandwidth limit
// Create isolated process with namespaces
cmd := exec.Command("/opt/joblet/joblet") // Same binary in init mode
cmd.SysProcAttr = &syscall.SysProcAttr{
Cloneflags: syscall.CLONE_NEWPID | // PID isolation
syscall.CLONE_NEWNS | // Mount isolation
syscall.CLONE_NEWIPC | // IPC isolation
syscall.CLONE_NEWUTS | // UTS isolation
syscall.CLONE_NEWCGROUP, // Cgroup isolation
// Note: No CLONE_NEWNET (host networking)
}
// Assign process to cgroup for resource control
cgroupPath := fmt.Sprintf("/sys/fs/cgroup/joblet.slice/joblet.service/job-%s", jobID)
procFile := filepath.Join(cgroupPath, "cgroup.procs")
ioutil.WriteFile(procFile, []byte(fmt.Sprintf("%d", pid)), 0644)
Unlike traditional container solutions, Joblet uses host networking for maximum compatibility:
Benefits:
Security Considerations:
type Job struct {
Id string // Unique identifier
Command string // Command to execute
Args []string // Command arguments
Limits ResourceLimits // CPU/memory/IO limits
Status JobStatus // Current state
Pid int32 // Process ID
CgroupPath string // Resource control path
StartTime time.Time // Creation time
EndTime *time.Time // Completion time
ExitCode int32 // Process exit status
}
type JobStatus string
const (
StatusInitializing JobStatus = "INITIALIZING"
StatusRunning JobStatus = "RUNNING"
StatusCompleted JobStatus = "COMPLETED"
StatusFailed JobStatus = "FAILED"
StatusStopped JobStatus = "STOPPED"
)
Linux Kernel Cgroups v2 Hierarchy:
/sys/fs/cgroup/
├── joblet.slice/ # Systemd slice
│ └── joblet.service/ # Main service cgroup
│ ├── cgroup.controllers # Available controllers
│ ├── cgroup.subtree_control # Enabled controllers
│ ├── job-1/ # Individual job cgroup
│ │ ├── memory.max # Memory limit
│ │ ├── cpu.max # CPU limit
│ │ ├── io.max # I/O limit
│ │ └── cgroup.procs # Process list
│ └── job-2/
│ └── ...
# Set CPU quota: 50% of one core
echo "50000 100000" > /sys/fs/cgroup/joblet.slice/joblet.service/job-1/cpu.max
# Format: quota_microseconds period_microseconds
# Set memory limit: 512MB
echo "536870912" > /sys/fs/cgroup/joblet.slice/joblet.service/job-1/memory.max
# Set I/O bandwidth: 10MB/s read, 5MB/s write
echo "8:0 rbps=10485760 wbps=5242880" > /sys/fs/cgroup/joblet.slice/joblet.service/job-1/io.max
// Real-time resource usage collection
type ResourceUsage struct {
CPUUsage time.Duration // Total CPU time
MemoryUsage int64 // Current memory bytes
IORead int64 // Total bytes read
IOWrite int64 // Total bytes written
}
// Collected via cgroup statistics files
func (r *ResourceManager) GetUsage(jobID string) (*ResourceUsage, error) {
cgroupPath := r.getCgroupPath(jobID)
// Read CPU usage
cpuStat := filepath.Join(cgroupPath, "cpu.stat")
// Read memory usage
memoryCurrent := filepath.Join(cgroupPath, "memory.current")
// Read I/O usage
ioStat := filepath.Join(cgroupPath, "io.stat")
// Parse and return aggregated usage
}
Instead of separate certificate files, Joblet uses embedded certificates in YAML configuration:
# /opt/joblet/config/joblet-config.yml (Server)
version: "3.0"
server:
address: "0.0.0.0"
port: 50051
mode: "server"
security:
serverCert: |
-----BEGIN CERTIFICATE-----
MIIDXTCCAkWgAwIBAgIJAKoK/heBjcO...
-----END CERTIFICATE-----
serverKey: |
-----BEGIN PRIVATE KEY-----
MIIEvgIBADANBgkqhkiG9w0BAQEFAA...
-----END PRIVATE KEY-----
caCert: |
-----BEGIN CERTIFICATE-----
MIIDQTCCAimgAwIBAgITBmyfz5m/jA...
-----END CERTIFICATE-----
# /opt/joblet/config/rnx-config.yml (Client)
version: "3.0"
nodes:
default:
address: "192.168.1.100:50051"
cert: |
-----BEGIN CERTIFICATE-----
# Admin client certificate
key: |
-----BEGIN PRIVATE KEY-----
# Admin client key
ca: |
-----BEGIN CERTIFICATE-----
# CA certificate
viewer:
address: "192.168.1.100:50051"
cert: |
-----BEGIN CERTIFICATE-----
# Viewer client certificate (OU=viewer)
// Job cleanup with multiple fallback strategies
func (j *Joblet) cleanupJob(jobID string) error {
// 1. Try graceful shutdown (SIGTERM)
if err := j.terminateGracefully(jobID); err == nil {
return nil
}
// 2. Force termination (SIGKILL)
if err := j.forceTerminate(jobID); err == nil {
return nil
}
// 3. Cgroup cleanup
if err := j.cleanupCgroup(jobID); err != nil {
log.Warn("cgroup cleanup failed", "jobId", jobID, "error", err)
}
// 4. Resource cleanup
j.cleanupResources(jobID)
return nil // Always succeed to prevent state inconsistency
}
// Built-in performance metrics
type Metrics struct {
JobsCreated int64
JobsCompleted int64
JobsFailed int64
AvgJobDuration time.Duration
ConcurrentJobs int64
MemoryUsage int64
CPUUsage float64
}
The current design provides a solid foundation for future enhancements while maintaining:
This design document represents the current state of the Joblet system and serves as a reference for developers, operators, and users seeking to understand the system’s architecture, security model, and operational characteristics.