joblet

Configuration Guide

Comprehensive guide to configuring Joblet server and RNX client.

Table of Contents

Server Configuration

The Joblet server configuration file is typically located at /opt/joblet/config/joblet-config.yml.

Basic Configuration

version: "3.0"

server:
  mode: "server"                    # Always "server" for daemon mode
  address: "0.0.0.0"               # Listen address
  port: 50051                      # gRPC port

  # TLS configuration
  tls:
    enabled: true                  # Enable TLS (recommended)
    min_version: "1.3"            # Minimum TLS version

  # Connection settings
  max_message_size: 104857600     # Max gRPC message size (100MB)
  keepalive:
    time: 120s                    # Keepalive time
    timeout: 20s                  # Keepalive timeout

Resource Limits

joblet:
  # Default resource limits for jobs
  defaultCpuLimit: 100            # Default CPU limit (100 = 1 core)
  defaultMemoryLimit: 512         # Default memory limit in MB
  defaultIoLimit: 10485760        # Default I/O limit in bytes/sec (10MB/s)

  # Job execution settings
  maxConcurrentJobs: 100          # Maximum concurrent jobs
  jobTimeout: "24h"               # Maximum job runtime
  scheduleTimeout: "168h"         # Maximum schedule ahead time (7 days)

  # Command validation
  validateCommands: true          # Validate commands before execution
  allowedCommands: # Whitelist specific commands (optional)
    - python
    - python3
    - node
    - bash
    - sh

  # Cleanup settings
  cleanupTimeout: "30s"          # Timeout for cleanup operations
  preserveFailedJobs: true       # Keep failed job data for debugging

Network Configuration

network:
  enabled: true                   # Enable network management
  state_dir: "/opt/joblet/network" # Network state directory

  # Default network settings
  default_network: "bridge"       # Default network for jobs
  allow_custom_networks: true     # Allow custom network creation
  max_custom_networks: 50         # Maximum custom networks

  # Predefined networks
  networks:
    bridge:
      cidr: "172.20.0.0/16"      # Bridge network CIDR
      bridge_name: "joblet0"      # Bridge interface name
      enable_nat: true            # Enable NAT for internet access
      enable_icc: true            # Inter-container communication

    host:
      type: "host"                # Use host network namespace

    none:
      type: "none"                # No network access

  # DNS configuration
  dns:
    servers:
      - "8.8.8.8"
      - "8.8.4.4"
    search:
      - "local"
    options:
      - "ndots:1"

  # Traffic control
  traffic_control:
    enabled: true                 # Enable bandwidth limiting
    default_ingress: 0            # Default ingress limit (0 = unlimited)
    default_egress: 0             # Default egress limit

Volume Configuration

volume:
  enabled: true                   # Enable volume management
  state_dir: "/opt/joblet/state"  # Volume state directory
  base_path: "/opt/joblet/volumes" # Volume storage path

  # Volume limits
  max_volumes: 100                # Maximum number of volumes
  max_size: "100GB"              # Maximum total volume size
  default_size: "1GB"            # Default volume size

  # Volume types configuration
  filesystem:
    enabled: true
    default_fs: "ext4"           # Default filesystem type
    mount_options: "noatime,nodiratime"

  memory:
    enabled: true
    max_memory_volumes: 10       # Maximum memory volumes
    max_memory_usage: "10GB"     # Maximum total memory usage

  # Cleanup settings
  auto_cleanup: false            # Auto-remove unused volumes
  cleanup_interval: "24h"        # Cleanup check interval

Security Settings

security:
  # Embedded certificates (generated by certs_gen_embedded.sh)
  serverCert: |
    -----BEGIN CERTIFICATE-----
    MIIFKzCCAxOgAwIBAgIUY8Z9...
    -----END CERTIFICATE-----

  serverKey: |
    -----BEGIN PRIVATE KEY-----
    MIIJQwIBADANBgkqhkiG9w0BAQ...
    -----END PRIVATE KEY-----

  caCert: |
    -----BEGIN CERTIFICATE-----
    MIIFazCCA1OgAwIBAgIUX...
    -----END CERTIFICATE-----

  # Authentication settings
  require_client_cert: true       # Require client certificates
  verify_client_cert: true        # Verify client certificates

  # Authorization
  enable_rbac: true              # Enable role-based access control
  default_role: "viewer"         # Default role for unknown OUs

  # Audit logging
  audit:
    enabled: true
    log_file: "/var/log/joblet/audit.log"
    log_successful_auth: true
    log_failed_auth: true
    log_job_operations: true

Logging Configuration

logging:
  level: "info"                  # Log level: debug, info, warn, error
  format: "json"                 # Log format: json or text

  # Output configuration
  outputs:
    - type: "file"
      path: "/var/log/joblet/joblet.log"
      rotate: true
      max_size: "100MB"
      max_backups: 10
      max_age: 30

    - type: "stdout"
      format: "text"             # Override format for stdout

  # Component-specific logging
  components:
    grpc: "warn"
    cgroup: "info"
    network: "info"
    volume: "info"
    auth: "info"

Advanced Settings

# Cgroup configuration
cgroup:
  baseDir: "/sys/fs/cgroup/joblet.slice" # Cgroup hierarchy path
  version: "v2"                          # Cgroup version (v1 or v2)

  # Controllers to enable
  enableControllers:
    - memory
    - cpu
    - io
    - pids
    - cpuset

  # Resource accounting
  accounting:
    enabled: true
    interval: "10s"              # Metrics collection interval

# Filesystem isolation
filesystem:
  baseDir: "/opt/joblet/jobs"    # Base directory for job workspaces
  tmpDir: "/opt/joblet/tmp"      # Temporary directory

  # Workspace settings
  workspace:
    default_quota: "1MB"         # Default workspace size
    cleanup_on_exit: true        # Clean workspace after job
    preserve_on_failure: true    # Keep workspace on failure

  # Security
  enable_chroot: true            # Use chroot isolation
  readonly_rootfs: false         # Make root filesystem read-only

# Process management
process:
  default_user: "nobody"         # Default user for jobs
  default_group: "nogroup"       # Default group for jobs
  allow_setuid: false           # Allow setuid in jobs

  # Namespace configuration
  namespaces:
    - pid                       # Process isolation
    - mount                     # Filesystem isolation
    - network                   # Network isolation
    - ipc                       # IPC isolation
    - uts                       # Hostname isolation
    - cgroup                    # Cgroup isolation

# Monitoring configuration
monitoring:
  enabled: true
  bind_address: "127.0.0.1:9090" # Prometheus metrics endpoint

  collection:
    system_interval: "15s"       # System metrics interval
    process_interval: "30s"      # Process metrics interval

  # Metrics to collect
  metrics:
    - cpu
    - memory
    - disk
    - network
    - processes

Client Configuration

The RNX client configuration file is typically located at ~/.rnx/rnx-config.yml.

Single Node Setup

version: "3.0"

# Default node configuration
default_node: "default"

nodes:
  default:
    address: "joblet-server:50051"

    # Embedded certificates
    cert: |
      -----BEGIN CERTIFICATE-----
      MIIFLDCCAxSgAwIBAgIUd...
      -----END CERTIFICATE-----

    key: |
      -----BEGIN PRIVATE KEY-----
      MIIJQgIBADANBgkqhkiG9w0BAQ...
      -----END PRIVATE KEY-----

    ca: |
      -----BEGIN CERTIFICATE-----
      MIIFazCCA1OgAwIBAgIUX...
      -----END CERTIFICATE-----

    # Connection settings
    timeout: "30s"
    keepalive: "120s"

    # Retry configuration
    retry:
      enabled: true
      max_attempts: 3
      backoff: "1s"

Multi-Node Setup

version: "3.0"

default_node: "production"

# Global settings
global:
  timeout: "30s"
  keepalive: "120s"

nodes:
  production:
    address: "prod.joblet.company.com:50051"
    cert: |
      -----BEGIN CERTIFICATE-----
      # Production admin certificate
      -----END CERTIFICATE-----
    key: |
      -----BEGIN PRIVATE KEY-----
      # Production admin key
      -----END PRIVATE KEY-----
    ca: |
      -----BEGIN CERTIFICATE-----
      # Company CA certificate
      -----END CERTIFICATE-----

  staging:
    address: "staging.joblet.company.com:50051"
    cert: |
      -----BEGIN CERTIFICATE-----
      # Staging admin certificate
      -----END CERTIFICATE-----
    # ... rest of credentials

  development:
    address: "dev.joblet.company.com:50051"
    cert: |
      -----BEGIN CERTIFICATE-----
      # Dev admin certificate
      -----END CERTIFICATE-----
    # ... rest of credentials

  viewer:
    address: "prod.joblet.company.com:50051"
    cert: |
      -----BEGIN CERTIFICATE-----
      # Viewer certificate (OU=viewer)
      -----END CERTIFICATE-----
    # ... rest of credentials

# Client preferences
preferences:
  output_format: "table"         # Default output format
  color_output: true            # Enable colored output
  confirm_destructive: true     # Confirm before destructive operations

  # Upload settings
  upload:
    chunk_size: 1048576         # Upload chunk size (1MB)
    compression: true           # Compress uploads
    show_progress: true         # Show upload progress

Authentication Roles

Joblet uses certificate Organization Units (OU) for role-based access:

# Admin role certificate (full access)
# Certificate subject: /CN=admin-client/OU=admin

# Viewer role certificate (read-only)
# Certificate subject: /CN=viewer-client/OU=viewer

Generate role-specific certificates:

# Admin certificate
openssl req -new -key client-key.pem -out admin.csr \
  -subj "/CN=admin-client/OU=admin"

# Viewer certificate  
openssl req -new -key client-key.pem -out viewer.csr \
  -subj "/CN=viewer-client/OU=viewer"

Environment Variables

Server Environment Variables

Variable Description Default
JOBLET_CONFIG_PATH Path to configuration file /opt/joblet/config/joblet-config.yml
JOBLET_LOG_LEVEL Log level override from config
JOBLET_SERVER_ADDRESS Server address override from config
JOBLET_SERVER_PORT Server port override from config
JOBLET_MAX_JOBS Maximum concurrent jobs from config
JOBLET_CI_MODE Enable CI mode (relaxed isolation) false

Client Environment Variables

Variable Description Default
RNX_CONFIG Path to configuration file searches standard locations
RNX_NODE Default node to use default
RNX_OUTPUT_FORMAT Output format (table/json) table
RNX_NO_COLOR Disable colored output false
RNX_TIMEOUT Request timeout 30s

Configuration Examples

High-Security Production Setup

version: "3.0"

server:
  address: "0.0.0.0"
  port: 50051
  tls:
    enabled: true
    min_version: "1.3"
    cipher_suites:
      - TLS_AES_256_GCM_SHA384
      - TLS_CHACHA20_POLY1305_SHA256

joblet:
  validateCommands: true
  allowedCommands:
    - python3
    - node
  maxConcurrentJobs: 50
  jobTimeout: "1h"

security:
  require_client_cert: true
  verify_client_cert: true
  enable_rbac: true
  audit:
    enabled: true
    log_all_operations: true

filesystem:
  enable_chroot: true
  readonly_rootfs: true

process:
  default_user: "nobody"
  allow_setuid: false

Development Environment Setup

version: "3.0"

server:
  address: "0.0.0.0"
  port: 50051

joblet:
  defaultCpuLimit: 0      # No limits in dev
  defaultMemoryLimit: 0
  defaultIoLimit: 0
  validateCommands: false # Allow any command

logging:
  level: "debug"
  format: "text"

network:
  networks:
    bridge:
      cidr: "172.30.0.0/16"
      enable_nat: true

volume:
  max_volumes: 1000
  max_size: "1TB"

CI/CD Optimized Setup

version: "3.0"

server:
  address: "0.0.0.0"
  port: 50051

joblet:
  maxConcurrentJobs: 200
  jobTimeout: "30m"
  cleanupTimeout: "5s"
  preserveFailedJobs: false

filesystem:
  workspace:
    cleanup_on_exit: true
    preserve_on_failure: false

cgroup:
  accounting:
    enabled: false      # Reduce overhead

logging:
  level: "warn"        # Reduce log volume
  outputs:
    - type: "stdout"
      format: "json"   # Structured logs for CI

Best Practices

  1. Security First: Always use TLS and client certificates in production
  2. Resource Limits: Set appropriate defaults to prevent resource exhaustion
  3. Monitoring: Enable metrics collection for production environments
  4. Logging: Use JSON format for easier log parsing
  5. Cleanup: Configure automatic cleanup to prevent disk space issues
  6. Validation: Enable command validation in production
  7. Audit: Enable audit logging for compliance
  8. Backup: Keep configuration file backups

Configuration Validation

Validate your configuration:

# Server configuration
joblet --config=/opt/joblet/config/joblet-config.yml --validate

# Client configuration
rnx --config=~/.rnx/rnx-config.yml nodes

Troubleshooting

See Troubleshooting Guide for configuration-related issues.