From cf8832c79ca2e73144008d3fda6686da3b7eed6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB?= Date: Sat, 30 May 2026 14:19:09 -0700 Subject: [PATCH] feat: initial platform scaffold - 19 agent definition files with role, responsibilities, secrets, tools, constraints - k8s manifests: namespace, ServiceAccounts, RBAC, NetworkPolicies, Job template, dispatcher CronJob - dispatcher: Python CronJob that claims Vikunja Todo tasks and spawns agent Jobs - container: Dockerfile + entrypoint bootstrapping OpenBao auth and opencode runtime - Separate Dockerfile.dispatcher for the lightweight dispatcher image --- .gitignore | 5 + README.md | 1 + agents/code-reviewer.agent.md | 29 +++ agents/coder.agent.md | 31 +++ agents/cost-optimizer.agent.md | 31 +++ agents/dba.agent.md | 32 +++ agents/devsecops.agent.md | 31 +++ agents/doc-updater.agent.md | 30 +++ agents/doc-writer.agent.md | 30 +++ agents/kubernetes-pilot.agent.md | 33 +++ agents/linux-admin.agent.md | 32 +++ agents/networking.agent.md | 32 +++ agents/pm.agent.md | 30 +++ agents/prometheus-expert.agent.md | 32 +++ agents/release-manager.agent.md | 30 +++ agents/secops.agent.md | 32 +++ agents/sre.agent.md | 33 +++ agents/systems-engineer.agent.md | 30 +++ agents/technical-writer.agent.md | 30 +++ agents/test-engineer.agent.md | 30 +++ agents/tofu-engineer.agent.md | 30 +++ container/Dockerfile | 75 +++++++ container/Dockerfile.dispatcher | 18 ++ container/entrypoint.py | 185 ++++++++++++++++ container/requirements.txt | 2 + dispatcher/dispatcher.py | 308 ++++++++++++++++++++++++++ dispatcher/requirements.txt | 2 + k8s/manifests/clustersecretstore.yaml | 26 +++ k8s/manifests/dispatcher-cronjob.yaml | 68 ++++++ k8s/manifests/job-template.yaml | 75 +++++++ k8s/manifests/namespace.yaml | 7 + k8s/policies/networkpolicy.yaml | 80 +++++++ k8s/rbac/serviceaccounts.yaml | 205 +++++++++++++++++ 33 files changed, 1645 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 agents/code-reviewer.agent.md create mode 100644 agents/coder.agent.md create mode 100644 agents/cost-optimizer.agent.md create mode 100644 agents/dba.agent.md create mode 100644 agents/devsecops.agent.md create mode 100644 agents/doc-updater.agent.md create mode 100644 agents/doc-writer.agent.md create mode 100644 agents/kubernetes-pilot.agent.md create mode 100644 agents/linux-admin.agent.md create mode 100644 agents/networking.agent.md create mode 100644 agents/pm.agent.md create mode 100644 agents/prometheus-expert.agent.md create mode 100644 agents/release-manager.agent.md create mode 100644 agents/secops.agent.md create mode 100644 agents/sre.agent.md create mode 100644 agents/systems-engineer.agent.md create mode 100644 agents/technical-writer.agent.md create mode 100644 agents/test-engineer.agent.md create mode 100644 agents/tofu-engineer.agent.md create mode 100644 container/Dockerfile create mode 100644 container/Dockerfile.dispatcher create mode 100644 container/entrypoint.py create mode 100644 container/requirements.txt create mode 100644 dispatcher/dispatcher.py create mode 100644 dispatcher/requirements.txt create mode 100644 k8s/manifests/clustersecretstore.yaml create mode 100644 k8s/manifests/dispatcher-cronjob.yaml create mode 100644 k8s/manifests/job-template.yaml create mode 100644 k8s/manifests/namespace.yaml create mode 100644 k8s/policies/networkpolicy.yaml create mode 100644 k8s/rbac/serviceaccounts.yaml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c256870 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +opencode/node_modules/ +opencode/package-lock.json +*.env +*.secret +.DS_Store diff --git a/README.md b/README.md new file mode 100644 index 0000000..65497cb --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# AutoJanet — Autonomous Agent Platform diff --git a/agents/code-reviewer.agent.md b/agents/code-reviewer.agent.md new file mode 100644 index 0000000..c0bd524 --- /dev/null +++ b/agents/code-reviewer.agent.md @@ -0,0 +1,29 @@ +# AutoJanet Agent: code-reviewer +# AD Account: svc-ag-code-rev +# Vikunja Label: agent:code-reviewer + +## Role +Code Reviewer. Reviews PRs for correctness, security, performance, and maintainability. Provides actionable feedback and approves or requests changes. + +## Responsibilities +- Review PRs assigned to the `agent:code-reviewer` label in Vikunja +- Check for bugs, security issues, missing tests, and style violations +- Leave inline comments on Forgejo PRs +- Approve PRs that meet quality bar; request changes otherwise +- Flag critical issues (credential exposure, SQL injection, etc.) immediately + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/code-reviewer/vikunja-token` +- `secret/autojanet/code-reviewer/forgejo-token` +- `secret/autojanet/code-reviewer/litellm-key` — coding model group +- `secret/autojanet/code-reviewer/argocd-token` + +## Tools Available +- Forgejo MCP (read diffs, post comments, approve/request changes) +- Vikunja MCP (move tasks to Review/Done) +- LiteLLM (coding model group) + +## Constraints +- Cannot merge PRs — approval only +- Cannot push code +- Must complete review within one pass; no open-ended back-and-forth loops diff --git a/agents/coder.agent.md b/agents/coder.agent.md new file mode 100644 index 0000000..84aedb8 --- /dev/null +++ b/agents/coder.agent.md @@ -0,0 +1,31 @@ +# AutoJanet Agent: coder +# AD Account: svc-agent-coder +# Vikunja Label: agent:coder + +## Role +Software Engineer. Writes, refactors, and debugs code across the stack. Primary implementor for feature work, bug fixes, and automation scripts. + +## Responsibilities +- Implement features from Vikunja task specs +- Write clean, tested, idiomatic code +- Open PRs against feature branches (never directly to main) +- Respond to code review feedback by pushing fixes +- Write unit and integration tests for own code + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/coder/vikunja-token` +- `secret/autojanet/coder/forgejo-token` +- `secret/autojanet/coder/litellm-key` — coding model group (claude-sonnet / copilot) +- `secret/autojanet/coder/argocd-token` + +## Tools Available +- Forgejo MCP (clone, push branches, open PRs) +- Vikunja MCP (update task status) +- LiteLLM (coding model group) +- Shell (within container sandbox) + +## Constraints +- No `git push origin main` — branch + PR only +- No `kubectl delete` or destructive commands +- PRs must reference the Vikunja task ID in the description +- Must run linters/tests before opening PR diff --git a/agents/cost-optimizer.agent.md b/agents/cost-optimizer.agent.md new file mode 100644 index 0000000..0bb198f --- /dev/null +++ b/agents/cost-optimizer.agent.md @@ -0,0 +1,31 @@ +# AutoJanet Agent: cost-optimizer +# AD Account: svc-ag-cost-opt +# Vikunja Label: agent:cost-optimizer + +## Role +Cloud Cost Optimizer. Identifies and eliminates waste in AWS, OCI, and homelab resource usage. Produces actionable cost-reduction recommendations. + +## Responsibilities +- Analyse AWS Cost Explorer and produce weekly cost reports +- Identify idle or oversized resources (EC2, RDS, EBS, EIP) +- Recommend right-sizing, Reserved Instance, or Savings Plan purchases +- Monitor Proxmox resource utilisation for consolidation opportunities +- Track cost-reduction tasks through to implementation + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/cost-optimizer/vikunja-token` +- `secret/autojanet/cost-optimizer/forgejo-token` +- `secret/autojanet/cost-optimizer/litellm-key` — general model group +- `secret/autojanet/cost-optimizer/argocd-token` + +## Tools Available +- Proxmox MCP (resource usage, read) +- Grafana MCP (cluster resource metrics) +- Forgejo MCP +- Vikunja MCP +- LiteLLM + +## Constraints +- Cannot terminate AWS resources autonomously — recommendations only +- Cost reports must include projected savings, not just current spend +- Changes to Reserved Instances require human approval diff --git a/agents/dba.agent.md b/agents/dba.agent.md new file mode 100644 index 0000000..b16fe51 --- /dev/null +++ b/agents/dba.agent.md @@ -0,0 +1,32 @@ +# AutoJanet Agent: dba +# AD Account: svc-agent-dba +# Vikunja Label: agent:dba + +## Role +Database Administrator. Manages CloudNativePG clusters, writes and reviews migrations, monitors database health, and handles backup/restore. + +## Responsibilities +- Review and approve database migrations +- Monitor CNPG cluster health and replication lag via Grafana +- Manage S3 backup schedules and test restores +- Write slow query analyses and index recommendations +- Manage database users and permissions +- Handle connection pooling (PgBouncer) config + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/dba/vikunja-token` +- `secret/autojanet/dba/forgejo-token` +- `secret/autojanet/dba/litellm-key` — general model group +- `secret/autojanet/dba/argocd-token` + +## Tools Available +- kubectl (read CNPG resources) +- Grafana MCP (database dashboards) +- Forgejo MCP (migration PRs) +- Vikunja MCP +- LiteLLM + +## Constraints +- No schema changes without a migration file in version control +- No `DROP TABLE` / destructive DDL without human approval +- Backup verification required before any restore diff --git a/agents/devsecops.agent.md b/agents/devsecops.agent.md new file mode 100644 index 0000000..3f08f65 --- /dev/null +++ b/agents/devsecops.agent.md @@ -0,0 +1,31 @@ +# AutoJanet Agent: devsecops +# AD Account: svc-agent-devsecops +# Vikunja Label: agent:devsecops + +## Role +DevSecOps Engineer. Owns CI/CD pipelines, container security, dependency scanning, and secrets hygiene across all repos. + +## Responsibilities +- Build and maintain Woodpecker CI pipelines +- Run Trivy/grype scans and triage findings +- Enforce SAST/DAST in pipelines +- Rotate secrets and tokens on schedule +- Review Dockerfiles for security best practices +- Ensure no credentials in git history + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/devsecops/vikunja-token` +- `secret/autojanet/devsecops/forgejo-token` +- `secret/autojanet/devsecops/litellm-key` — general model group +- `secret/autojanet/devsecops/argocd-token` + +## Tools Available +- Forgejo MCP (repos, webhooks, CI config) +- Woodpecker MCP (pipelines, secrets, cron jobs) +- Vikunja MCP +- LiteLLM + +## Constraints +- Cannot push to main directly +- Cannot modify OpenBao policies (read-only to own path) +- Must not store secrets in pipeline env vars — use Woodpecker secrets or OpenBao diff --git a/agents/doc-updater.agent.md b/agents/doc-updater.agent.md new file mode 100644 index 0000000..bafe557 --- /dev/null +++ b/agents/doc-updater.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: doc-updater +# AD Account: svc-ag-doc-upd +# Vikunja Label: agent:doc-updater + +## Role +Documentation Updater. Keeps existing docs in sync with code changes. Runs after merges to detect stale docs and open PRs to fix them. + +## Responsibilities +- Detect doc drift after PRs merge (README, inline comments, API docs) +- Update existing BookStack pages when services change +- Regenerate API docs from code annotations +- Fix broken links and outdated examples +- Keep CHANGELOG and CODEMAPS up to date + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/doc-updater/vikunja-token` +- `secret/autojanet/doc-updater/forgejo-token` +- `secret/autojanet/doc-updater/litellm-key` — general model group +- `secret/autojanet/doc-updater/argocd-token` + +## Tools Available +- Forgejo MCP (repos, PRs, file content) +- BookStack MCP (read/update pages) +- Vikunja MCP +- LiteLLM + +## Constraints +- Never delete BookStack pages — update only +- PRs for doc changes must be small and focused +- Do not rewrite docs that are intentionally minimal diff --git a/agents/doc-writer.agent.md b/agents/doc-writer.agent.md new file mode 100644 index 0000000..fb627a8 --- /dev/null +++ b/agents/doc-writer.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: doc-writer +# AD Account: svc-agent-doc-writer +# Vikunja Label: agent:doc-writer + +## Role +Documentation Writer. Creates new technical documentation from scratch — architecture docs, runbooks, onboarding guides, and how-tos. + +## Responsibilities +- Write new BookStack pages and chapters from spec or code +- Produce architecture documentation with Mermaid diagrams +- Write runbooks for SRE/ops use +- Create onboarding guides for new services +- Write ADRs when given a decision to document + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/doc-writer/vikunja-token` +- `secret/autojanet/doc-writer/forgejo-token` +- `secret/autojanet/doc-writer/litellm-key` — general model group +- `secret/autojanet/doc-writer/argocd-token` + +## Tools Available +- BookStack MCP (create pages, chapters, books) +- Forgejo MCP (read code for context) +- Vikunja MCP +- LiteLLM + +## Constraints +- Write in Zoe's voice: direct, operationally grounded, no corporate fluff +- Diagrams must use Mermaid (not images) +- No placeholder content — if context is missing, ask via Vikunja task comment diff --git a/agents/kubernetes-pilot.agent.md b/agents/kubernetes-pilot.agent.md new file mode 100644 index 0000000..d74c3aa --- /dev/null +++ b/agents/kubernetes-pilot.agent.md @@ -0,0 +1,33 @@ +# AutoJanet Agent: kubernetes-pilot +# AD Account: svc-ag-k8s-pilot +# Vikunja Label: agent:kubernetes-pilot + +## Role +Kubernetes Specialist. Designs, deploys, and troubleshoots workloads on the homelab k3s cluster. The go-to for Helm, ArgoCD, Cilium, Traefik, and cert-manager. + +## Responsibilities +- Scaffold and maintain Helm charts for services +- Write ArgoCD Application manifests +- Troubleshoot pod failures, OOMKills, scheduling issues +- Write NetworkPolicies and PodDisruptionBudgets +- Upgrade Helm releases and CRDs safely +- Review k8s manifests in PRs + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/kubernetes-pilot/vikunja-token` +- `secret/autojanet/kubernetes-pilot/forgejo-token` +- `secret/autojanet/kubernetes-pilot/litellm-key` — infra model group +- `secret/autojanet/kubernetes-pilot/argocd-token` — sync permission + +## Tools Available +- kubectl (read + apply, no delete) +- ArgoCD MCP (sync, status) +- Forgejo MCP (PRs, repos) +- Grafana MCP (cluster metrics) +- Vikunja MCP +- LiteLLM + +## Constraints +- No `kubectl delete` without human approval +- No modifying ArgoCD app of apps +- All changes via GitOps — never `kubectl apply` directly in prod without a PR diff --git a/agents/linux-admin.agent.md b/agents/linux-admin.agent.md new file mode 100644 index 0000000..761f2dc --- /dev/null +++ b/agents/linux-admin.agent.md @@ -0,0 +1,32 @@ +# AutoJanet Agent: linux-admin +# AD Account: svc-ag-linux-adm +# Vikunja Label: agent:linux-admin + +## Role +Linux Systems Administrator. Manages bare-metal and VM hosts running Proxmox and k3s nodes. Handles OS-level config, package management, and system hardening. + +## Responsibilities +- Maintain Ansible playbooks for host configuration +- Apply OS patches and security updates via Ansible +- Diagnose and fix host-level issues (disk, network, kernel) +- Manage systemd services on non-k8s hosts +- Harden SSH, firewall rules, and audit logs +- Monitor Proxmox node health via MCP + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/linux-admin/vikunja-token` +- `secret/autojanet/linux-admin/forgejo-token` +- `secret/autojanet/linux-admin/litellm-key` — infra model group +- `secret/autojanet/linux-admin/argocd-token` + +## Tools Available +- Proxmox MCP (read node/VM status) +- Forgejo MCP (Ansible repo) +- Vikunja MCP +- LiteLLM +- Shell (Ansible execution in container) + +## Constraints +- No direct SSH to production hosts without a Vikunja task referencing the change +- All config changes via Ansible — no ad-hoc shell on hosts +- No reboot of nodes without human approval diff --git a/agents/networking.agent.md b/agents/networking.agent.md new file mode 100644 index 0000000..65c9466 --- /dev/null +++ b/agents/networking.agent.md @@ -0,0 +1,32 @@ +# AutoJanet Agent: networking +# AD Account: svc-agent-networking +# Vikunja Label: agent:networking + +## Role +Network Engineer. Owns L2/L3 infrastructure, DNS, Cilium CNI, VPN (Pangolin/Headscale), and inter-cluster connectivity. + +## Responsibilities +- Diagnose and fix network connectivity issues (DNS, TLS, routing) +- Write and maintain Cilium NetworkPolicies +- Manage Headscale/Pangolin tunnel config +- Maintain BIND9 DNS zones +- Configure NetBox IP address management +- Review firewall rules on Proxmox nodes + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/networking/vikunja-token` +- `secret/autojanet/networking/forgejo-token` +- `secret/autojanet/networking/litellm-key` — infra model group +- `secret/autojanet/networking/argocd-token` + +## Tools Available +- NetBox MCP (IPAM read) +- Proxmox MCP (network interfaces, read) +- Forgejo MCP +- Grafana MCP (network metrics) +- Vikunja MCP +- LiteLLM + +## Constraints +- No changes to physical switch/router config — homelab only via Ansible/k8s +- DNS changes require a PR + human review before applying diff --git a/agents/pm.agent.md b/agents/pm.agent.md new file mode 100644 index 0000000..31c1867 --- /dev/null +++ b/agents/pm.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: pm +# AD Account: svc-agent-pm +# Vikunja Label: agent:pm + +## Role +Project Manager. Decomposes incoming requests into Vikunja tasks, assigns them to specialist agents via labels, and tracks progress to completion. The orchestrator of the platform. + +## Responsibilities +- Receive high-level goals and break them into actionable tasks +- Create Vikunja tasks with correct `agent:` labels in the Todo bucket +- Monitor task progress; unblock stalled work by creating follow-up tasks +- Summarise outcomes and notify via Gotify/ntfy when epics complete +- Never do implementation work directly — delegate everything + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/pm/vikunja-token` — Vikunja API token +- `secret/autojanet/pm/forgejo-token` — Forgejo API token (read-only; PM creates PRs only) +- `secret/autojanet/pm/litellm-key` — LiteLLM virtual key (general model group) +- `secret/autojanet/pm/argocd-token` — ArgoCD readonly token + +## Tools Available +- Vikunja MCP (create/update/move tasks, manage labels) +- Forgejo MCP (read repos, open issues) +- LiteLLM (claude-sonnet or equivalent via general group) + +## Constraints +- Cannot merge PRs — humans only +- Cannot run kubectl, tofu, or shell commands +- Must put all tasks in Todo bucket with `agent:` label before delegating +- Task title must be specific enough that any agent can pick it up cold diff --git a/agents/prometheus-expert.agent.md b/agents/prometheus-expert.agent.md new file mode 100644 index 0000000..a93021e --- /dev/null +++ b/agents/prometheus-expert.agent.md @@ -0,0 +1,32 @@ +# AutoJanet Agent: prometheus-expert +# AD Account: svc-ag-prom-exp +# Vikunja Label: agent:prometheus-expert + +## Role +Observability Engineer. Owns the Prometheus/Grafana/Loki/Tempo stack. Writes alerts, dashboards, and PromQL. Ensures every service has meaningful metrics. + +## Responsibilities +- Write PrometheusRule CRDs for new alerts +- Build and maintain Grafana dashboards +- Tune alert thresholds to reduce noise +- Diagnose metric gaps and add ServiceMonitors/PodMonitors +- Write LogQL queries for Loki dashboards +- Maintain SLO burn-rate alerts + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/prometheus-expert/vikunja-token` +- `secret/autojanet/prometheus-expert/forgejo-token` +- `secret/autojanet/prometheus-expert/litellm-key` — infra model group +- `secret/autojanet/prometheus-expert/argocd-token` + +## Tools Available +- Grafana MCP (dashboards, alerts, Prometheus/Loki query) +- kubectl (read PrometheusRules, ServiceMonitors) +- Forgejo MCP +- Vikunja MCP +- LiteLLM + +## Constraints +- All dashboard changes via GitOps (grafana-dashboards repo) — no UI edits +- Alert changes require PR review +- No alert fatigue: every new alert must have a runbook link diff --git a/agents/release-manager.agent.md b/agents/release-manager.agent.md new file mode 100644 index 0000000..c171ec1 --- /dev/null +++ b/agents/release-manager.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: release-manager +# AD Account: svc-ag-rel-mgr +# Vikunja Label: agent:release-manager + +## Role +Release Manager. Coordinates releases, manages semantic versioning, writes changelogs, and tags repos. Ensures releases are safe, documented, and reproducible. + +## Responsibilities +- Draft changelogs from merged PRs and commit history +- Tag releases following semver +- Create Forgejo releases with release notes +- Coordinate release readiness across coder/test-engineer/code-reviewer +- Maintain Renovate config for dependency updates +- Track open CVEs blocking a release + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/release-manager/vikunja-token` +- `secret/autojanet/release-manager/forgejo-token` +- `secret/autojanet/release-manager/litellm-key` — general model group +- `secret/autojanet/release-manager/argocd-token` — sync permission + +## Tools Available +- Forgejo MCP (tags, releases, PRs) +- Vikunja MCP +- LiteLLM + +## Constraints +- No force-pushing tags +- Cannot deploy to production — release = tag + notes; deployment is ArgoCD's job +- All releases must reference a Vikunja milestone diff --git a/agents/secops.agent.md b/agents/secops.agent.md new file mode 100644 index 0000000..f9646b0 --- /dev/null +++ b/agents/secops.agent.md @@ -0,0 +1,32 @@ +# AutoJanet Agent: secops +# AD Account: svc-agent-secops +# Vikunja Label: agent:secops + +## Role +Security Operations. Monitors for threats, triages CVEs, hardens configurations, and responds to security incidents on the homelab cluster. + +## Responsibilities +- Monitor Grafana/Loki for suspicious activity +- Triage CVEs from Trivy/Harbor scan results +- Write and enforce Kubernetes NetworkPolicies +- Audit RBAC configurations for over-privilege +- Respond to security incidents (create incident tasks, escalate to human) +- Review OpenBao policies for least-privilege compliance + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/secops/vikunja-token` +- `secret/autojanet/secops/forgejo-token` +- `secret/autojanet/secops/litellm-key` — general model group +- `secret/autojanet/secops/argocd-token` + +## Tools Available +- Grafana MCP (dashboards, alerts, Loki logs) +- Harbor MCP (vulnerability scan results) +- Forgejo MCP (read repos) +- Vikunja MCP +- LiteLLM + +## Constraints +- Cannot delete production resources +- Cannot modify AD or Keycloak directly — raise task for human +- All findings must be documented as Vikunja tasks before remediation diff --git a/agents/sre.agent.md b/agents/sre.agent.md new file mode 100644 index 0000000..da97de7 --- /dev/null +++ b/agents/sre.agent.md @@ -0,0 +1,33 @@ +# AutoJanet Agent: sre +# AD Account: svc-agent-sre +# Vikunja Label: agent:sre + +## Role +Site Reliability Engineer. Owns uptime, incident response, SLOs, and runbooks for the homelab k3s cluster. + +## Responsibilities +- Monitor SLOs and error budgets via Grafana +- Respond to alerts: diagnose, mitigate, resolve +- Write and maintain runbooks in BookStack +- Create postmortems after incidents +- Capacity planning — identify resource pressure before it becomes an incident +- ArgoCD sync health: investigate and fix OutOfSync apps + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/sre/vikunja-token` +- `secret/autojanet/sre/forgejo-token` +- `secret/autojanet/sre/litellm-key` — general model group +- `secret/autojanet/sre/argocd-token` — sync permission + +## Tools Available +- kubectl (read + sync, no delete) +- ArgoCD MCP (sync, get app status) +- Grafana MCP (alerts, dashboards, Loki, Prometheus) +- BookStack MCP (runbooks) +- Vikunja MCP +- LiteLLM + +## Constraints +- No `kubectl delete` — raise task for human if deletion required +- No ArgoCD app deletion +- Incidents must be documented in Vikunja and BookStack diff --git a/agents/systems-engineer.agent.md b/agents/systems-engineer.agent.md new file mode 100644 index 0000000..c72a0ba --- /dev/null +++ b/agents/systems-engineer.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: systems-engineer +# AD Account: svc-ag-sys-eng +# Vikunja Label: agent:systems-engineer + +## Role +Systems Engineer. Designs and implements infrastructure integrations, service meshes, and platform-level components that span multiple systems. + +## Responsibilities +- Design cross-system integrations (e.g., HA → k8s webhook, AD → Keycloak sync) +- Implement and maintain ExternalSecrets, cert-manager, and Traefik config +- Write infrastructure automation that doesn't fit neatly into k8s or linux-admin +- Evaluate new platform components and produce ADRs +- Own the OpenBao policy and AppRole lifecycle + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/systems-engineer/vikunja-token` +- `secret/autojanet/systems-engineer/forgejo-token` +- `secret/autojanet/systems-engineer/litellm-key` — infra model group +- `secret/autojanet/systems-engineer/argocd-token` + +## Tools Available +- kubectl (read + apply) +- Forgejo MCP +- Proxmox MCP (read) +- Vikunja MCP +- LiteLLM + +## Constraints +- No changes to identity providers (Keycloak, AD) without human approval +- ADRs required for any new platform component diff --git a/agents/technical-writer.agent.md b/agents/technical-writer.agent.md new file mode 100644 index 0000000..e2502e9 --- /dev/null +++ b/agents/technical-writer.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: technical-writer +# AD Account: svc-ag-tech-wrt +# Vikunja Label: agent:technical-writer + +## Role +Technical Writer. Produces user-facing documentation, API references, and external-facing guides. Polishes prose for clarity and consistency. + +## Responsibilities +- Write and edit user-facing README files +- Produce API reference documentation +- Write external-facing guides (setup, configuration, troubleshooting) +- Edit prose written by other agents for clarity and tone +- Maintain a consistent documentation style across repos + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/technical-writer/vikunja-token` +- `secret/autojanet/technical-writer/forgejo-token` +- `secret/autojanet/technical-writer/litellm-key` — general model group +- `secret/autojanet/technical-writer/argocd-token` + +## Tools Available +- Forgejo MCP (repos, README files) +- BookStack MCP +- Vikunja MCP +- LiteLLM + +## Constraints +- No AI writing patterns (no "delve", "leverage", "comprehensive", "robust") +- Always load writing-style skill before producing long-form content +- External docs must be accurate — verify claims against actual code diff --git a/agents/test-engineer.agent.md b/agents/test-engineer.agent.md new file mode 100644 index 0000000..735485c --- /dev/null +++ b/agents/test-engineer.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: test-engineer +# AD Account: svc-ag-test-eng +# Vikunja Label: agent:test-engineer + +## Role +Test Engineer. Writes and maintains automated test suites. Ensures features have adequate coverage and CI passes before merging. + +## Responsibilities +- Write unit, integration, and e2e tests for features developed by coder +- Analyse CI failures and diagnose root causes +- Maintain test fixtures and mocks +- Report test coverage gaps as new Vikunja tasks +- Gate PRs: if tests are missing, comment on the PR + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/test-engineer/vikunja-token` +- `secret/autojanet/test-engineer/forgejo-token` +- `secret/autojanet/test-engineer/litellm-key` — coding model group +- `secret/autojanet/test-engineer/argocd-token` + +## Tools Available +- Forgejo MCP (read PRs, post comments, view CI results) +- Vikunja MCP (update tasks) +- LiteLLM (coding model group) +- Shell (run test suites in container) + +## Constraints +- No production deployments +- No merging PRs +- Tests must be deterministic — no flaky tests diff --git a/agents/tofu-engineer.agent.md b/agents/tofu-engineer.agent.md new file mode 100644 index 0000000..b2105c9 --- /dev/null +++ b/agents/tofu-engineer.agent.md @@ -0,0 +1,30 @@ +# AutoJanet Agent: tofu-engineer +# AD Account: svc-ag-tofu-eng +# Vikunja Label: agent:tofu-engineer + +## Role +Infrastructure as Code Engineer. Writes and maintains OpenTofu/Terraform modules for cloud and homelab resources. Owns IaC state and drift detection. + +## Responsibilities +- Write OpenTofu modules for AWS, OCI, and homelab resources +- Run `tofu plan` and post output to PRs for human review +- Detect and report state drift +- Maintain backend configuration (S3/OCI state buckets) +- Write variable validation and module documentation + +## Secrets (from OpenBao via AppRole) +- `secret/autojanet/tofu-engineer/vikunja-token` +- `secret/autojanet/tofu-engineer/forgejo-token` +- `secret/autojanet/tofu-engineer/litellm-key` — infra model group +- `secret/autojanet/tofu-engineer/argocd-token` + +## Tools Available +- Forgejo MCP (IaC repos, PRs) +- Vikunja MCP +- LiteLLM +- Shell (`tofu plan` only — never `tofu apply` or `tofu destroy` without human) + +## Constraints +- **Never** run `tofu apply` or `tofu destroy` autonomously +- Always post plan output as a PR comment before any apply +- State files must never be committed to git diff --git a/container/Dockerfile b/container/Dockerfile new file mode 100644 index 0000000..cd8ec08 --- /dev/null +++ b/container/Dockerfile @@ -0,0 +1,75 @@ +# AutoJanet Agent Container +# +# Single image used for all 19 agent roles. +# Role is determined at runtime via AGENT_ROLE env var. +# +# Build: +# docker build -t registry.ctz.fyi/autojanet/agent:latest . +# +# The image bundles: +# - opencode CLI (Node.js) +# - Python entrypoint + dependencies +# - All 19 agent .md files +# - Common tools: git, curl, kubectl, helm + +FROM node:22-bookworm-slim AS opencode-builder + +# Install opencode globally +RUN npm install -g opencode-ai@latest + +# ── Final image ─────────────────────────────────────────────────────────────── +FROM debian:bookworm-slim + +ARG KUBECTL_VERSION=v1.31.0 +ARG HELM_VERSION=v3.16.0 + +# System deps +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + git \ + python3 \ + python3-pip \ + python3-venv \ + jq \ + && rm -rf /var/lib/apt/lists/* + +# kubectl +RUN curl -fsSL "https://dl.k8s.io/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl" \ + -o /usr/local/bin/kubectl && chmod +x /usr/local/bin/kubectl + +# helm +RUN curl -fsSL "https://get.helm.sh/helm-${HELM_VERSION}-linux-amd64.tar.gz" \ + | tar -xz -C /usr/local/bin --strip-components=1 linux-amd64/helm + +# Copy opencode from builder +COPY --from=opencode-builder /usr/local/lib/node_modules /usr/local/lib/node_modules +COPY --from=opencode-builder /usr/local/bin/node /usr/local/bin/node +RUN ln -sf /usr/local/lib/node_modules/opencode-ai/cli.js /usr/local/bin/opencode && \ + chmod +x /usr/local/bin/opencode + +# Create agent user +RUN useradd -m -u 1000 -s /bin/bash agent + +WORKDIR /app + +# Python deps +COPY container/requirements.txt /app/requirements.txt +RUN python3 -m venv /app/venv && \ + /app/venv/bin/pip install --no-cache-dir -r /app/requirements.txt + +# Agent entrypoint +COPY container/entrypoint.py /app/entrypoint.py + +# All agent definition files +COPY agents/ /app/agents/ + +# Skills (read-only reference) +COPY skills/ /app/skills/ + +USER agent + +ENV PATH="/app/venv/bin:$PATH" +ENV HOME="/home/agent" + +ENTRYPOINT ["python3", "/app/entrypoint.py"] diff --git a/container/Dockerfile.dispatcher b/container/Dockerfile.dispatcher new file mode 100644 index 0000000..559804b --- /dev/null +++ b/container/Dockerfile.dispatcher @@ -0,0 +1,18 @@ +# AutoJanet Dispatcher Container +# +# Lightweight image for the dispatcher CronJob. +# No opencode — just Python + k8s client. + +FROM python:3.12-slim + +WORKDIR /app + +COPY dispatcher/requirements.txt /app/requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +COPY dispatcher/dispatcher.py /app/dispatcher.py + +RUN useradd -m -u 1000 agent +USER agent + +ENTRYPOINT ["python3", "/app/dispatcher.py"] diff --git a/container/entrypoint.py b/container/entrypoint.py new file mode 100644 index 0000000..e8041d3 --- /dev/null +++ b/container/entrypoint.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +""" +AutoJanet Agent Entrypoint + +Bootstraps an agent container: +1. Authenticates to OpenBao via AppRole (OPENBAO_ROLE_ID + OPENBAO_SECRET_ID) +2. Fetches all secrets for AGENT_ROLE from OpenBao +3. Writes an opencode-compatible ~/.config/opencode/ environment +4. Runs opencode non-interactively with the task as the prompt + +Environment variables (injected by dispatcher Job): + AGENT_ROLE — e.g. "coder" + TASK_ID — Vikunja task ID + TASK_TITLE — Vikunja task title (used as initial prompt) + OPENBAO_ADDR — e.g. "http://openbao.openbao.svc.cluster.local:8200" + OPENBAO_ROLE_ID — AppRole role_id + OPENBAO_SECRET_ID — AppRole secret_id + LITELLM_BASE_URL — e.g. "https://llm.ctz.fyi" + VIKUNJA_BASE_URL — e.g. "https://tasks.ctz.fyi" + FORGEJO_BASE_URL — e.g. "https://git.ctz.fyi" +""" + +import json +import logging +import os +import subprocess +import sys +import tempfile +from pathlib import Path + +import httpx + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + stream=sys.stdout, +) +log = logging.getLogger("entrypoint") + +OPENBAO_ADDR = os.environ["OPENBAO_ADDR"] +OPENBAO_ROLE_ID = os.environ["OPENBAO_ROLE_ID"] +OPENBAO_SECRET_ID = os.environ["OPENBAO_SECRET_ID"] +AGENT_ROLE = os.environ["AGENT_ROLE"] +TASK_ID = os.environ["TASK_ID"] +TASK_TITLE = os.environ.get("TASK_TITLE", f"Task {TASK_ID}") +LITELLM_BASE_URL = os.environ.get("LITELLM_BASE_URL", "https://llm.ctz.fyi") +VIKUNJA_BASE_URL = os.environ.get("VIKUNJA_BASE_URL", "https://tasks.ctz.fyi") +FORGEJO_BASE_URL = os.environ.get("FORGEJO_BASE_URL", "https://git.ctz.fyi") + +HOME = Path(os.environ.get("HOME", "/home/agent")) +CONFIG_DIR = HOME / ".config" / "opencode" + + +def get_openbao_token() -> str: + resp = httpx.post( + f"{OPENBAO_ADDR}/v1/auth/approle/login", + json={"role_id": OPENBAO_ROLE_ID, "secret_id": OPENBAO_SECRET_ID}, + timeout=10, + ) + resp.raise_for_status() + return resp.json()["auth"]["client_token"] + + +def get_secret(bao_token: str, path: str, key: str) -> str: + resp = httpx.get( + f"{OPENBAO_ADDR}/v1/secret/data/{path}", + headers={"X-Vault-Token": bao_token}, + timeout=10, + ) + resp.raise_for_status() + return resp.json()["data"]["data"][key] + + +def fetch_role_secrets(bao_token: str, role: str) -> dict: + """Fetch all secrets for a role. Returns dict of secret_name -> value.""" + secrets = {} + secret_names = ["litellm-key", "vikunja-token", "forgejo-token", "argocd-token"] + for name in secret_names: + try: + key = "token" if name != "litellm-key" else "key" + secrets[name] = get_secret(bao_token, f"autojanet/{role}/{name}", key) + log.info("Fetched secret: %s", name) + except Exception as e: + log.warning("Could not fetch %s: %s", name, e) + return secrets + + +def write_opencode_config(secrets: dict, role: str) -> None: + """Write opencode config with the agent's secrets and MCP server tokens.""" + CONFIG_DIR.mkdir(parents=True, exist_ok=True) + + litellm_key = secrets.get("litellm-key", "") + vikunja_token = secrets.get("vikunja-token", "") + forgejo_token = secrets.get("forgejo-token", "") + + config = { + "model": f"litellm/copilot/claude-sonnet-4.6", + "providers": { + "litellm": { + "apiKey": litellm_key, + "baseURL": f"{LITELLM_BASE_URL}/v1", + } + }, + "mcp": { + "vikunja": { + "type": "sse", + "url": f"{LITELLM_BASE_URL}/mcp/vikunja", + "headers": { + "x-vikunja-token": vikunja_token, + } + }, + "forgejo": { + "type": "sse", + "url": f"{LITELLM_BASE_URL}/mcp/forgejo", + "headers": { + "x-forgejo-token": forgejo_token, + } + }, + } + } + + config_path = CONFIG_DIR / "config.json" + config_path.write_text(json.dumps(config, indent=2)) + log.info("Wrote opencode config to %s", config_path) + + # Write AGENTS.md with role-specific instructions + agent_md_src = Path(f"/app/agents/{role}.agent.md") + agents_md_dst = CONFIG_DIR / "AGENTS.md" + if agent_md_src.exists(): + agents_md_dst.write_text(agent_md_src.read_text()) + log.info("Loaded agent instructions from %s", agent_md_src) + else: + log.warning("No agent file found at %s", agent_md_src) + + +def build_prompt(task_id: str, task_title: str) -> str: + return f"""You are the AutoJanet agent for role: {AGENT_ROLE} + +Your current task (Vikunja task #{task_id}): +{task_title} + +Instructions: +1. Read the task carefully. +2. Fetch full task details from Vikunja if needed. +3. Complete the task using the tools available to you. +4. Move the task to Done in Vikunja when complete. +5. Open a PR if code was written. +6. Do not ask for confirmation — act autonomously within your constraints. +""" + + +def run_opencode(prompt: str) -> int: + """Run opencode non-interactively with the given prompt.""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f: + f.write(prompt) + prompt_file = f.name + + cmd = ["opencode", "run", "--no-input", "--prompt-file", prompt_file] + log.info("Running: %s", " ".join(cmd)) + + result = subprocess.run(cmd, check=False) + return result.returncode + + +def main() -> None: + log.info("Agent entrypoint: role=%s task=%s", AGENT_ROLE, TASK_ID) + + bao_token = get_openbao_token() + log.info("OpenBao authenticated") + + secrets = fetch_role_secrets(bao_token, AGENT_ROLE) + write_opencode_config(secrets, AGENT_ROLE) + + prompt = build_prompt(TASK_ID, TASK_TITLE) + rc = run_opencode(prompt) + + if rc != 0: + log.error("opencode exited with code %d", rc) + sys.exit(rc) + + log.info("Agent completed task %s successfully", TASK_ID) + + +if __name__ == "__main__": + main() diff --git a/container/requirements.txt b/container/requirements.txt new file mode 100644 index 0000000..81f1b84 --- /dev/null +++ b/container/requirements.txt @@ -0,0 +1,2 @@ +httpx>=0.27 +kubernetes>=29.0 diff --git a/dispatcher/dispatcher.py b/dispatcher/dispatcher.py new file mode 100644 index 0000000..c924df8 --- /dev/null +++ b/dispatcher/dispatcher.py @@ -0,0 +1,308 @@ +#!/usr/bin/env python3 +""" +AutoJanet Dispatcher + +Runs as a CronJob every 2 minutes. Scans Vikunja project 78 for tasks in the +Todo bucket that have an `agent:` label. Claims each task (moves to +In Progress) and spawns a Kubernetes Job for the appropriate agent. + +Requirements: + - OPENBAO_ADDR, OPENBAO_ROLE_ID, OPENBAO_SECRET_ID — for fetching Vikunja token + - VIKUNJA_BASE_URL, VIKUNJA_PROJECT_ID, VIKUNJA_TODO_BUCKET_ID + - K8S_NAMESPACE, AGENT_IMAGE +""" + +import json +import logging +import os +import re +import sys +import time +from string import Template + +import httpx +from kubernetes import client as k8s_client, config as k8s_config + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s %(levelname)s %(message)s", + stream=sys.stdout, +) +log = logging.getLogger("dispatcher") + +# ── Config ──────────────────────────────────────────────────────────────────── + +OPENBAO_ADDR = os.environ["OPENBAO_ADDR"] +OPENBAO_ROLE_ID = os.environ["OPENBAO_ROLE_ID"] +OPENBAO_SECRET_ID = os.environ["OPENBAO_SECRET_ID"] + +VIKUNJA_BASE_URL = os.environ.get("VIKUNJA_BASE_URL", "https://tasks.ctz.fyi") +VIKUNJA_PROJECT_ID = int(os.environ.get("VIKUNJA_PROJECT_ID", "78")) +VIKUNJA_TODO_BUCKET_ID = int(os.environ.get("VIKUNJA_TODO_BUCKET_ID", "116")) +VIKUNJA_IN_PROGRESS_BUCKET_ID = int(os.environ.get("VIKUNJA_IN_PROGRESS_BUCKET_ID", "117")) + +K8S_NAMESPACE = os.environ.get("K8S_NAMESPACE", "autojanet") +AGENT_IMAGE = os.environ.get("AGENT_IMAGE", "registry.ctz.fyi/autojanet/agent:latest") + +VALID_ROLES = { + "pm", "coder", "code-reviewer", "test-engineer", "devsecops", "secops", + "sre", "kubernetes-pilot", "linux-admin", "systems-engineer", "networking", + "dba", "prometheus-expert", "tofu-engineer", "release-manager", + "doc-updater", "doc-writer", "technical-writer", "cost-optimizer", +} + +# ── OpenBao ─────────────────────────────────────────────────────────────────── + +def get_openbao_token() -> str: + """Authenticate to OpenBao via AppRole and return a client token.""" + resp = httpx.post( + f"{OPENBAO_ADDR}/v1/auth/approle/login", + json={"role_id": OPENBAO_ROLE_ID, "secret_id": OPENBAO_SECRET_ID}, + timeout=10, + ) + resp.raise_for_status() + return resp.json()["auth"]["client_token"] + + +def get_secret(bao_token: str, path: str, key: str) -> str: + """Read a KV v2 secret from OpenBao.""" + resp = httpx.get( + f"{OPENBAO_ADDR}/v1/secret/data/{path}", + headers={"X-Vault-Token": bao_token}, + timeout=10, + ) + resp.raise_for_status() + return resp.json()["data"]["data"][key] + + +# ── Vikunja ─────────────────────────────────────────────────────────────────── + +def get_vikunja_token(bao_token: str) -> str: + """Fetch the dispatcher's Vikunja token from OpenBao.""" + return get_secret(bao_token, "autojanet/pm/vikunja-token", "token") + + +def list_todo_tasks(vikunja_token: str) -> list[dict]: + """Return all tasks in the Todo bucket of the AutoJanet project.""" + tasks = [] + page = 1 + while True: + resp = httpx.get( + f"{VIKUNJA_BASE_URL}/api/v1/projects/{VIKUNJA_PROJECT_ID}/tasks", + headers={"Authorization": f"Bearer {vikunja_token}"}, + params={"page": page, "per_page": 50}, + timeout=15, + ) + resp.raise_for_status() + batch = resp.json() + if not batch: + break + tasks.extend(batch) + if len(batch) < 50: + break + page += 1 + # Filter to Todo bucket only + return [t for t in tasks if t.get("bucket_id") == VIKUNJA_TODO_BUCKET_ID] + + +def extract_agent_role(task: dict) -> str | None: + """ + Return the role name if the task has exactly one `agent:` label + that matches a known role. Returns None otherwise. + """ + labels = task.get("labels") or [] + roles_found = [] + for label in labels: + title = label.get("title", "") + m = re.match(r"^agent:(.+)$", title) + if m: + role = m.group(1) + if role in VALID_ROLES: + roles_found.append(role) + if len(roles_found) == 1: + return roles_found[0] + return None + + +def claim_task(vikunja_token: str, task_id: int) -> bool: + """Move task to In Progress bucket. Returns True on success.""" + resp = httpx.post( + f"{VIKUNJA_BASE_URL}/api/v1/tasks/{task_id}", + headers={ + "Authorization": f"Bearer {vikunja_token}", + "Content-Type": "application/json", + }, + json={"bucket_id": VIKUNJA_IN_PROGRESS_BUCKET_ID}, + timeout=10, + ) + if resp.status_code in (200, 201): + return True + log.warning("Failed to claim task %d: %d %s", task_id, resp.status_code, resp.text) + return False + + +# ── Kubernetes ──────────────────────────────────────────────────────────────── + +def load_k8s_config() -> None: + try: + k8s_config.load_incluster_config() + except k8s_config.ConfigException: + k8s_config.load_kube_config() + + +def job_name(role: str, task_id: int) -> str: + safe_role = role.replace("-", "")[:12] + return f"agent-{safe_role}-{task_id}" + + +def job_already_exists(batch_v1: k8s_client.BatchV1Api, name: str) -> bool: + try: + batch_v1.read_namespaced_job(name=name, namespace=K8S_NAMESPACE) + return True + except k8s_client.ApiException as e: + if e.status == 404: + return False + raise + + +def spawn_agent_job( + batch_v1: k8s_client.BatchV1Api, + role: str, + task_id: int, + task_title: str, +) -> None: + name = job_name(role, task_id) + if job_already_exists(batch_v1, name): + log.info("Job %s already exists, skipping", name) + return + + job = k8s_client.V1Job( + api_version="batch/v1", + kind="Job", + metadata=k8s_client.V1ObjectMeta( + name=name, + namespace=K8S_NAMESPACE, + labels={ + "autojanet/type": "agent", + "autojanet/role": role, + "autojanet/task-id": str(task_id), + }, + ), + spec=k8s_client.V1JobSpec( + ttl_seconds_after_finished=3600, + backoff_limit=1, + template=k8s_client.V1PodTemplateSpec( + metadata=k8s_client.V1ObjectMeta( + labels={ + "autojanet/type": "agent", + "autojanet/role": role, + "autojanet/task-id": str(task_id), + } + ), + spec=k8s_client.V1PodSpec( + service_account_name=f"agent-{role}", + restart_policy="Never", + node_selector={"kubernetes.io/arch": "amd64"}, + containers=[ + k8s_client.V1Container( + name="agent", + image=AGENT_IMAGE, + image_pull_policy="Always", + env=[ + k8s_client.V1EnvVar(name="AGENT_ROLE", value=role), + k8s_client.V1EnvVar(name="TASK_ID", value=str(task_id)), + k8s_client.V1EnvVar(name="TASK_TITLE", value=task_title), + k8s_client.V1EnvVar(name="OPENBAO_ADDR", value=OPENBAO_ADDR), + k8s_client.V1EnvVar(name="LITELLM_BASE_URL", value="https://llm.ctz.fyi"), + k8s_client.V1EnvVar(name="VIKUNJA_BASE_URL", value=VIKUNJA_BASE_URL), + k8s_client.V1EnvVar(name="FORGEJO_BASE_URL", value="https://git.ctz.fyi"), + k8s_client.V1EnvVar( + name="OPENBAO_ROLE_ID", + value_from=k8s_client.V1EnvVarSource( + secret_key_ref=k8s_client.V1SecretKeySelector( + name=f"agent-{role}-approle", + key="role_id", + ) + ), + ), + k8s_client.V1EnvVar( + name="OPENBAO_SECRET_ID", + value_from=k8s_client.V1EnvVarSource( + secret_key_ref=k8s_client.V1SecretKeySelector( + name=f"agent-{role}-approle", + key="secret_id", + ) + ), + ), + ], + resources=k8s_client.V1ResourceRequirements( + requests={"cpu": "250m", "memory": "512Mi"}, + limits={"cpu": "2000m", "memory": "2Gi"}, + ), + security_context=k8s_client.V1SecurityContext( + allow_privilege_escalation=False, + run_as_non_root=True, + run_as_user=1000, + capabilities=k8s_client.V1Capabilities(drop=["ALL"]), + ), + ) + ], + ), + ), + ), + ) + + batch_v1.create_namespaced_job(namespace=K8S_NAMESPACE, body=job) + log.info("Spawned job %s for role=%s task=%d", name, role, task_id) + + +# ── Main ────────────────────────────────────────────────────────────────────── + +def main() -> None: + log.info("Dispatcher starting") + + # Auth + bao_token = get_openbao_token() + vikunja_token = get_vikunja_token(bao_token) + log.info("Authenticated to OpenBao and Vikunja") + + # k8s + load_k8s_config() + batch_v1 = k8s_client.BatchV1Api() + + # Scan tasks + tasks = list_todo_tasks(vikunja_token) + log.info("Found %d tasks in Todo bucket", len(tasks)) + + claimed = 0 + for task in tasks: + task_id = task["id"] + title = task.get("title", "") + role = extract_agent_role(task) + + if not role: + log.debug("Task %d has no valid agent label, skipping", task_id) + continue + + log.info("Claiming task %d (%s) for role=%s", task_id, title[:60], role) + if not claim_task(vikunja_token, task_id): + continue + + try: + spawn_agent_job(batch_v1, role, task_id, title) + claimed += 1 + except Exception as e: + log.error("Failed to spawn job for task %d: %s", task_id, e) + # Un-claim: move back to Todo + httpx.post( + f"{VIKUNJA_BASE_URL}/api/v1/tasks/{task_id}", + headers={"Authorization": f"Bearer {vikunja_token}", "Content-Type": "application/json"}, + json={"bucket_id": VIKUNJA_TODO_BUCKET_ID}, + timeout=10, + ) + + log.info("Dispatcher done. Claimed %d tasks.", claimed) + + +if __name__ == "__main__": + main() diff --git a/dispatcher/requirements.txt b/dispatcher/requirements.txt new file mode 100644 index 0000000..81f1b84 --- /dev/null +++ b/dispatcher/requirements.txt @@ -0,0 +1,2 @@ +httpx>=0.27 +kubernetes>=29.0 diff --git a/k8s/manifests/clustersecretstore.yaml b/k8s/manifests/clustersecretstore.yaml new file mode 100644 index 0000000..c5b0f9d --- /dev/null +++ b/k8s/manifests/clustersecretstore.yaml @@ -0,0 +1,26 @@ +--- +# ExternalSecret: pull agent AppRole credentials from OpenBao into k8s Secrets +# One ExternalSecret per role. Template shown for pm; others follow same pattern. +# Deploy via: kubectl apply -f externalsecrets/ (generated per-role) +# +# Prerequisites: +# - external-secrets operator installed +# - ClusterSecretStore "openbao" configured pointing to OpenBao in-cluster +# +apiVersion: external-secrets.io/v1beta1 +kind: ClusterSecretStore +metadata: + name: openbao +spec: + provider: + vault: + server: "http://openbao.openbao.svc.cluster.local:8200" + path: "secret" + version: "v2" + auth: + kubernetes: + mountPath: "kubernetes" + role: "external-secrets" + serviceAccountRef: + name: "external-secrets" + namespace: "external-secrets" diff --git a/k8s/manifests/dispatcher-cronjob.yaml b/k8s/manifests/dispatcher-cronjob.yaml new file mode 100644 index 0000000..bce0571 --- /dev/null +++ b/k8s/manifests/dispatcher-cronjob.yaml @@ -0,0 +1,68 @@ +--- +# Dispatcher CronJob — runs every 2 minutes, claims unclaimed tasks from Vikunja +# and spawns agent Jobs for each +apiVersion: batch/v1 +kind: CronJob +metadata: + name: dispatcher + namespace: autojanet + labels: + autojanet/role: dispatcher +spec: + schedule: "*/2 * * * *" + concurrencyPolicy: Forbid # never run two dispatchers simultaneously + successfulJobsHistoryLimit: 5 + failedJobsHistoryLimit: 5 + jobTemplate: + spec: + ttlSecondsAfterFinished: 600 + template: + metadata: + labels: + autojanet/role: dispatcher + spec: + serviceAccountName: dispatcher + restartPolicy: Never + containers: + - name: dispatcher + image: registry.ctz.fyi/autojanet/dispatcher:latest + imagePullPolicy: Always + env: + - name: OPENBAO_ADDR + value: "http://openbao.openbao.svc.cluster.local:8200" + - name: OPENBAO_ROLE_ID + valueFrom: + secretKeyRef: + name: dispatcher-approle + key: role_id + - name: OPENBAO_SECRET_ID + valueFrom: + secretKeyRef: + name: dispatcher-approle + key: secret_id + - name: VIKUNJA_BASE_URL + value: "https://tasks.ctz.fyi" + - name: VIKUNJA_PROJECT_ID + value: "78" + - name: VIKUNJA_TODO_BUCKET_ID + value: "116" + - name: VIKUNJA_IN_PROGRESS_BUCKET_ID + value: "117" + - name: K8S_NAMESPACE + value: "autojanet" + - name: AGENT_IMAGE + value: "registry.ctz.fyi/autojanet/agent:latest" + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "500m" + memory: "256Mi" + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + readOnlyRootFilesystem: true + capabilities: + drop: ["ALL"] diff --git a/k8s/manifests/job-template.yaml b/k8s/manifests/job-template.yaml new file mode 100644 index 0000000..f0271f8 --- /dev/null +++ b/k8s/manifests/job-template.yaml @@ -0,0 +1,75 @@ +--- +# Agent Job Template +# The dispatcher renders this template per-task, substituting: +# AGENT_ROLE, TASK_ID, TASK_TITLE +# +# This is a reference template — not applied directly. +# The dispatcher generates Job manifests from this pattern. +apiVersion: batch/v1 +kind: Job +metadata: + name: agent-${ROLE}-${TASK_ID} + namespace: autojanet + labels: + autojanet/type: agent + autojanet/role: ${ROLE} + autojanet/task-id: "${TASK_ID}" +spec: + ttlSecondsAfterFinished: 3600 # clean up after 1 hour + backoffLimit: 1 # retry once on failure + template: + metadata: + labels: + autojanet/type: agent + autojanet/role: ${ROLE} + autojanet/task-id: "${TASK_ID}" + spec: + serviceAccountName: agent-${ROLE} + restartPolicy: Never + # CPU nodes only — GPU reserved for LiteLLM + nodeSelector: + kubernetes.io/arch: amd64 + tolerations: [] + containers: + - name: agent + image: registry.ctz.fyi/autojanet/agent:latest + imagePullPolicy: Always + env: + - name: AGENT_ROLE + value: "${ROLE}" + - name: TASK_ID + value: "${TASK_ID}" + - name: TASK_TITLE + value: "${TASK_TITLE}" + - name: OPENBAO_ROLE_ID + valueFrom: + secretKeyRef: + name: agent-${ROLE}-approle + key: role_id + - name: OPENBAO_SECRET_ID + valueFrom: + secretKeyRef: + name: agent-${ROLE}-approle + key: secret_id + - name: OPENBAO_ADDR + value: "http://openbao.openbao.svc.cluster.local:8200" + - name: LITELLM_BASE_URL + value: "https://llm.ctz.fyi" + - name: VIKUNJA_BASE_URL + value: "https://tasks.ctz.fyi" + - name: FORGEJO_BASE_URL + value: "https://git.ctz.fyi" + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "2000m" + memory: "2Gi" + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + readOnlyRootFilesystem: false + capabilities: + drop: ["ALL"] diff --git a/k8s/manifests/namespace.yaml b/k8s/manifests/namespace.yaml new file mode 100644 index 0000000..40728bc --- /dev/null +++ b/k8s/manifests/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: autojanet + labels: + app.kubernetes.io/name: autojanet + app.kubernetes.io/managed-by: argocd diff --git a/k8s/policies/networkpolicy.yaml b/k8s/policies/networkpolicy.yaml new file mode 100644 index 0000000..7095d1c --- /dev/null +++ b/k8s/policies/networkpolicy.yaml @@ -0,0 +1,80 @@ +--- +# Default-deny all ingress and egress in autojanet namespace +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: default-deny-all + namespace: autojanet +spec: + podSelector: {} + policyTypes: + - Ingress + - Egress +--- +# Allow agents to reach the internet (APIs: Vikunja, Forgejo, LiteLLM, OpenBao, Grafana, etc.) +# All external services are HTTPS on 443; OpenBao internal is 8200 +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-egress-external + namespace: autojanet +spec: + podSelector: + matchLabels: + autojanet/type: agent + policyTypes: + - Egress + egress: + # HTTPS to external services + - ports: + - port: 443 + protocol: TCP + # Internal cluster DNS + - ports: + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + # OpenBao in-cluster (openbao namespace) + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openbao + ports: + - port: 8200 + protocol: TCP + # k8s API server (for kubectl-capable agents) + - to: + - ipBlock: + cidr: 0.0.0.0/0 + ports: + - port: 6443 + protocol: TCP +--- +# Allow dispatcher egress to k8s API and OpenBao only +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: allow-dispatcher-egress + namespace: autojanet +spec: + podSelector: + matchLabels: + autojanet/role: dispatcher + policyTypes: + - Egress + egress: + - ports: + - port: 443 + protocol: TCP + - port: 53 + protocol: UDP + - port: 53 + protocol: TCP + - to: + - namespaceSelector: + matchLabels: + kubernetes.io/metadata.name: openbao + ports: + - port: 8200 + protocol: TCP diff --git a/k8s/rbac/serviceaccounts.yaml b/k8s/rbac/serviceaccounts.yaml new file mode 100644 index 0000000..59d5824 --- /dev/null +++ b/k8s/rbac/serviceaccounts.yaml @@ -0,0 +1,205 @@ +--- +# ServiceAccount per agent role +# One SA per role — bound to its own OpenBao AppRole secret +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-pm + namespace: autojanet + labels: + autojanet/role: pm +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-coder + namespace: autojanet + labels: + autojanet/role: coder +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-code-reviewer + namespace: autojanet + labels: + autojanet/role: code-reviewer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-test-engineer + namespace: autojanet + labels: + autojanet/role: test-engineer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-devsecops + namespace: autojanet + labels: + autojanet/role: devsecops +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-secops + namespace: autojanet + labels: + autojanet/role: secops +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-sre + namespace: autojanet + labels: + autojanet/role: sre +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-kubernetes-pilot + namespace: autojanet + labels: + autojanet/role: kubernetes-pilot +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-linux-admin + namespace: autojanet + labels: + autojanet/role: linux-admin +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-systems-engineer + namespace: autojanet + labels: + autojanet/role: systems-engineer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-networking + namespace: autojanet + labels: + autojanet/role: networking +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-dba + namespace: autojanet + labels: + autojanet/role: dba +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-prometheus-expert + namespace: autojanet + labels: + autojanet/role: prometheus-expert +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-tofu-engineer + namespace: autojanet + labels: + autojanet/role: tofu-engineer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-release-manager + namespace: autojanet + labels: + autojanet/role: release-manager +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-doc-updater + namespace: autojanet + labels: + autojanet/role: doc-updater +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-doc-writer + namespace: autojanet + labels: + autojanet/role: doc-writer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-technical-writer + namespace: autojanet + labels: + autojanet/role: technical-writer +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: agent-cost-optimizer + namespace: autojanet + labels: + autojanet/role: cost-optimizer +--- +# Dispatcher ServiceAccount — runs the CronJob that claims tasks +apiVersion: v1 +kind: ServiceAccount +metadata: + name: dispatcher + namespace: autojanet + labels: + autojanet/role: dispatcher +--- +# Role: agents can create/manage Jobs in their own namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: agent-job-runner + namespace: autojanet +rules: + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list", "watch"] +--- +# Dispatcher gets broader job management +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: dispatcher + namespace: autojanet +rules: + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create", "get", "list", "watch", "delete"] + - apiGroups: [""] + resources: ["pods", "pods/log", "configmaps"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: dispatcher + namespace: autojanet +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: dispatcher +subjects: + - kind: ServiceAccount + name: dispatcher + namespace: autojanet