From fd8392ba4ab78107c0a01f4b86e003f9a35199c2 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:09:32 +0000 Subject: [PATCH 01/11] feat(img-db): add ecto_sqlite3 dep and backend config scaffolding --- config/config.exs | 12 ++++++++++++ mix.exs | 1 + mix.lock | 3 +++ 3 files changed, 16 insertions(+) diff --git a/config/config.exs b/config/config.exs index 4cf3f19..e657d90 100644 --- a/config/config.exs +++ b/config/config.exs @@ -36,9 +36,21 @@ if config_env() == :test do config :libcluster, topologies: [] end +# Image-graph storage backend. :postgres (default, cluster-safe) or :sqlite +# (single-node only; enforced at runtime by Hyper.Img.Db.SingleNodeGuard). +config :hyper, Hyper.Img.Db, backend: :postgres + config :hyper, Hyper.Img.Db.Repo, database: "hyper_dev", username: "postgres", password: "postgres", hostname: "localhost", pool_size: 10 + +config :hyper, Hyper.Img.Db.Repo.Sqlite, + database: Path.expand("../priv/sqlite/hyper.db", __DIR__), + pool_size: 1, + journal_mode: :wal, + busy_timeout: 5_000, + binary_id_type: :string, + datetime_type: :iso8601 diff --git a/mix.exs b/mix.exs index 742a16d..3e90c16 100644 --- a/mix.exs +++ b/mix.exs @@ -46,6 +46,7 @@ defmodule Hyper.MixProject do {:dialyxir, "~> 1.4", only: [:dev], runtime: false}, {:ex_doc, "~> 0.34", only: :dev, runtime: false}, {:ecto_sql, "~> 3.13"}, + {:ecto_sqlite3, "~> 0.17"}, {:horde, "~> 0.9"}, {:jason, "~> 1.4"}, {:libcluster, "~> 3.3"}, diff --git a/mix.lock b/mix.lock index b2dd818..8ab3ff7 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,7 @@ %{ "acceptor_pool": {:hex, :acceptor_pool, "1.0.1", "d88c2e8a0be9216cf513fbcd3e5a4beb36bee3ff4168e85d6152c6f899359cdb", [:rebar3], [], "hexpm", "f172f3d74513e8edd445c257d596fc84dbdd56d2c6fa287434269648ae5a421e"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, + "cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"}, "chatterbox": {:hex, :ts_chatterbox, "0.15.1", "5cac4d15dd7ad61fc3c4415ce4826fc563d4643dee897a558ec4ea0b1c835c9c", [:rebar3], [{:hpack, "~> 0.3.0", [hex: :hpack_erl, repo: "hexpm", optional: false]}], "hexpm", "4f75b91451338bc0da5f52f3480fa6ef6e3a2aeecfc33686d6b3d0a0948f31aa"}, "credo": {:hex, :credo, "1.7.19", "cc52129665fc7c15143d47838fda0f9cd6dac9ceced7bf4da6f85fcbfe64b12a", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "2d8bc95d5a7bb99dd2613621d4f08c6a3575c3fd4b62e6a2b48a100352a557b8"}, "ctx": {:hex, :ctx, "0.6.0", "8ff88b70e6400c4df90142e7f130625b82086077a45364a78d208ed3ed53c7fe", [:rebar3], [], "hexpm", "a14ed2d1b67723dbebbe423b28d7615eb0bdcba6ff28f2d1f1b0a7e1d4aa5fc2"}, @@ -12,9 +13,11 @@ "earmark_parser": {:hex, :earmark_parser, "1.4.45", "cba8369ab2a1342e419bc2760eec731b17be828941dcf494045d44766227e1d5", [:mix], [], "hexpm", "d3ec045bf122965db20c0bdb420e19ee1415843135327124918473feb4b328e8"}, "ecto": {:hex, :ecto, "3.13.6", "352135b474f91d1ab99a1b502171d207e9db60421c9e3d0ecab4c7ab96b24d14", [:mix], [{:decimal, "~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "8afa059bc16cd2c94739ec0a11e3e5df69d828125119109bef35f20a21a76af2"}, "ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"}, + "ecto_sqlite3": {:hex, :ecto_sqlite3, "0.23.0", "79da75815627582f081f00d418c130c4cf587672b720b54e7a8798c6d46b5415", [:mix], [{:decimal, "~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.13.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:exqlite, "~> 0.22", [hex: :exqlite, repo: "hexpm", optional: false]}], "hexpm", "e97041bcec746ed525df7d9ad996fbae3b0660767f99fbe9e9b58d6208729703"}, "elixir_make": {:hex, :elixir_make, "0.10.0", "16577e2583a79bb79237bbff349619ef5d80afffc07eac6e4faf0d00e2ddaf7d", [:mix], [], "hexpm", "dc1f09fb7fa68866b886abd5f0f3c83553b1a19a52359a899e92af1bb3b31982"}, "erlex": {:hex, :erlex, "0.2.9", "7debbbaa9f4f368b8cd648983e0f1d7963028508e9c59e9d4ed504e94ef52a55", [:mix], [], "hexpm", "8cfffc0ec7159e6d73de2ab28a588064de80f88b2798d5cbe4482cbbc200178b"}, "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, + "exqlite": {:hex, :exqlite, "0.37.0", "701e7e02679e8c1bb6da331ea93d83b481c714b0831e82e2f8a73375b3d93a9e", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "a44816dd0d234fba68c47a3609af61d306d24ef517a89bfaee4d6a811792d913"}, "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, "finch": {:hex, :finch, "0.23.0", "e3f9287ac25a8832f848b144c2b57346aac65b205e2e0629a52adfe6507fd837", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.8", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "80e58d3f936f57e3fdf404f83a3642897ae6d9fb642934e46da4d8fe761b99d5"}, "gproc": {:hex, :gproc, "0.9.1", "f1df0364423539cf0b80e8201c8b1839e229e5f9b3ccb944c5834626998f5b8c", [:rebar3], [], "hexpm", "905088e32e72127ed9466f0bac0d8e65704ca5e73ee5a62cb073c3117916d507"}, From 92903a24b2e9107803026f73d0e6df5247738763 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:19:31 +0000 Subject: [PATCH 02/11] refactor(img-db): facade over postgres/sqlite repos selected at runtime - Add Hyper.Img.Db.Repo.Postgres (concrete, priv: priv/repo keeps existing migrations) - Add Hyper.Img.Db.Repo.Sqlite (concrete, same priv path) - Add Hyper.Img.Db.Backend (config-driven selector; selected/0, repo/0, sqlite?/0) - Rewrite Hyper.Img.Db.Repo as a thin runtime facade forwarding Ecto callbacks to Backend.repo() - Add with_low_priority/2 to facade (postgres: statement_timeout tx; sqlite: passthrough) - Rename config block from Repo to Repo.Postgres; add priv: "priv/repo" to both repo configs - Point ecto_repos at Hyper.Img.Db.Repo.Postgres; start Backend.repo() in application tree --- config/config.exs | 4 +- lib/hyper/application.ex | 2 +- lib/hyper/img/db/backend.ex | 30 +++++++++++ lib/hyper/img/db/repo.ex | 83 +++++++++++++++++++++++++----- lib/hyper/img/db/repo/postgres.ex | 14 +++++ lib/hyper/img/db/repo/sqlite.ex | 18 +++++++ mix.exs | 2 +- test/hyper/img/db/backend_test.exs | 25 +++++++++ 8 files changed, 163 insertions(+), 15 deletions(-) create mode 100644 lib/hyper/img/db/backend.ex create mode 100644 lib/hyper/img/db/repo/postgres.ex create mode 100644 lib/hyper/img/db/repo/sqlite.ex create mode 100644 test/hyper/img/db/backend_test.exs diff --git a/config/config.exs b/config/config.exs index e657d90..f78412d 100644 --- a/config/config.exs +++ b/config/config.exs @@ -40,7 +40,8 @@ end # (single-node only; enforced at runtime by Hyper.Img.Db.SingleNodeGuard). config :hyper, Hyper.Img.Db, backend: :postgres -config :hyper, Hyper.Img.Db.Repo, +config :hyper, Hyper.Img.Db.Repo.Postgres, + priv: "priv/repo", database: "hyper_dev", username: "postgres", password: "postgres", @@ -48,6 +49,7 @@ config :hyper, Hyper.Img.Db.Repo, pool_size: 10 config :hyper, Hyper.Img.Db.Repo.Sqlite, + priv: "priv/repo", database: Path.expand("../priv/sqlite/hyper.db", __DIR__), pool_size: 1, journal_mode: :wal, diff --git a/lib/hyper/application.ex b/lib/hyper/application.ex index 70e30f0..d9f6089 100644 --- a/lib/hyper/application.ex +++ b/lib/hyper/application.ex @@ -17,7 +17,7 @@ defmodule Hyper.Application do children = [ # The image-lineage database. Started first so the rest of the node can # query images/leases on boot. - Hyper.Img.Db.Repo, + Hyper.Img.Db.Backend.repo(), # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` # can discover peer nodes. Gossip strategy in dev - see config/config.exs. {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, diff --git a/lib/hyper/img/db/backend.ex b/lib/hyper/img/db/backend.ex new file mode 100644 index 0000000..0f463e8 --- /dev/null +++ b/lib/hyper/img/db/backend.ex @@ -0,0 +1,30 @@ +defmodule Hyper.Img.Db.Backend do + @moduledoc """ + Resolves the active image-graph storage backend from configuration. + + Configured via `config :hyper, Hyper.Img.Db, backend: :postgres | :sqlite`. + `:postgres` is the cluster-safe default; `:sqlite` is valid only on a + single node (see `Hyper.Img.Db.SingleNodeGuard`). + """ + + @repos %{ + postgres: Hyper.Img.Db.Repo.Postgres, + sqlite: Hyper.Img.Db.Repo.Sqlite + } + + @doc "The configured backend, defaulting to `:postgres`." + @spec selected() :: :postgres | :sqlite + def selected do + :hyper + |> Application.get_env(Hyper.Img.Db, []) + |> Keyword.get(:backend, :postgres) + end + + @doc "The concrete repo module for the configured backend." + @spec repo() :: module() + def repo, do: Map.fetch!(@repos, selected()) + + @doc "True when the SQLite backend is configured." + @spec sqlite?() :: boolean() + def sqlite?, do: selected() == :sqlite +end diff --git a/lib/hyper/img/db/repo.ex b/lib/hyper/img/db/repo.ex index a0e8cb8..06c6a8d 100644 --- a/lib/hyper/img/db/repo.ex +++ b/lib/hyper/img/db/repo.ex @@ -1,19 +1,78 @@ defmodule Hyper.Img.Db.Repo do @moduledoc """ - Global database of all known layers, and how they relate to each other. + Runtime facade over the active image-graph repository. - At the current stage of this project, we use postgres to track images and how they relate. - Note that images can build on top of images. + All application code talks to this module; it forwards Ecto callbacks to + whichever concrete repo `Hyper.Img.Db.Backend` selects (Postgres or + SQLite). Adapter-specific behaviour is encapsulated in `with_low_priority/2`. + """ + + alias Hyper.Img.Db.Backend + + # --- Ecto.Repo callbacks used across the codebase ------------------------ + # If `grep -rn "Repo\\." lib/` surfaces a callback not listed here, add a + # matching forwarder. Each is a one-line delegation to the active repo. + + def all(queryable, opts \\ []), do: Backend.repo().all(queryable, opts) + def one(queryable, opts \\ []), do: Backend.repo().one(queryable, opts) + def one!(queryable, opts \\ []), do: Backend.repo().one!(queryable, opts) + def get(queryable, id, opts \\ []), do: Backend.repo().get(queryable, id, opts) + def get!(queryable, id, opts \\ []), do: Backend.repo().get!(queryable, id, opts) + def get_by(queryable, clauses, opts \\ []), do: Backend.repo().get_by(queryable, clauses, opts) + + def get_by!(queryable, clauses, opts \\ []), + do: Backend.repo().get_by!(queryable, clauses, opts) + + def exists?(queryable, opts \\ []), do: Backend.repo().exists?(queryable, opts) + def insert(struct, opts \\ []), do: Backend.repo().insert(struct, opts) + def insert!(struct, opts \\ []), do: Backend.repo().insert!(struct, opts) + + def insert_all(schema, entries, opts \\ []), + do: Backend.repo().insert_all(schema, entries, opts) - This repo is responsible for answering the questions: - - Given an image id, is it a base image or a layered image? - - If an image is a layered image, what are the layers to build it? - - Who is currently actively holding onto an image? This can mean, potentially, in the case of - layered images: - - Who is holding onto the image or any of its children? + def update(struct, opts \\ []), do: Backend.repo().update(struct, opts) + def update!(struct, opts \\ []), do: Backend.repo().update!(struct, opts) + + def update_all(queryable, updates, opts \\ []), + do: Backend.repo().update_all(queryable, updates, opts) + + def delete(struct, opts \\ []), do: Backend.repo().delete(struct, opts) + def delete!(struct, opts \\ []), do: Backend.repo().delete!(struct, opts) + def delete_all(queryable, opts \\ []), do: Backend.repo().delete_all(queryable, opts) + def preload(structs, preloads, opts \\ []), do: Backend.repo().preload(structs, preloads, opts) + def transaction(fun_or_multi, opts \\ []), do: Backend.repo().transaction(fun_or_multi, opts) + def rollback(value), do: Backend.repo().rollback(value) + def query(sql, params \\ [], opts \\ []), do: Backend.repo().query(sql, params, opts) + def query!(sql, params \\ [], opts \\ []), do: Backend.repo().query!(sql, params, opts) + + @doc """ + Runs `fun` under a best-effort, time-bounded, low-priority context. + + Postgres: wraps `fun` in a transaction with a transaction-local + `statement_timeout`, so a slow sweep cannot pin a connection indefinitely. + SQLite: single-writer with a connection `busy_timeout`; there is no + per-statement timeout, so `fun` is run directly. + + Returns the value of `fun`. """ + @spec with_low_priority(non_neg_integer(), (-> result)) :: result when result: var + def with_low_priority(timeout_ms, fun) when is_integer(timeout_ms) and is_function(fun, 0) do + case Backend.selected() do + :postgres -> + {:ok, result} = + Backend.repo().transaction(fn -> + _ = + Backend.repo().query!("SELECT set_config('statement_timeout', $1, true)", [ + Integer.to_string(timeout_ms) + ]) + + fun.() + end) + + result - use Ecto.Repo, - otp_app: :hyper, - adapter: Ecto.Adapters.Postgres + :sqlite -> + fun.() + end + end end diff --git a/lib/hyper/img/db/repo/postgres.ex b/lib/hyper/img/db/repo/postgres.ex new file mode 100644 index 0000000..1638609 --- /dev/null +++ b/lib/hyper/img/db/repo/postgres.ex @@ -0,0 +1,14 @@ +defmodule Hyper.Img.Db.Repo.Postgres do + @moduledoc """ + Postgres-backed image-graph repository. + + The cluster-safe default. Reached through the `Hyper.Img.Db.Repo` facade; + not called directly by application code. + """ + + use Ecto.Repo, + otp_app: :hyper, + adapter: Ecto.Adapters.Postgres, + priv: "priv/repo", + telemetry_prefix: [:hyper, :img, :db, :repo] +end diff --git a/lib/hyper/img/db/repo/sqlite.ex b/lib/hyper/img/db/repo/sqlite.ex new file mode 100644 index 0000000..76cc9f7 --- /dev/null +++ b/lib/hyper/img/db/repo/sqlite.ex @@ -0,0 +1,18 @@ +defmodule Hyper.Img.Db.Repo.Sqlite do + @moduledoc """ + SQLite-backed image-graph repository. + + Single-node only: a single-writer file database cannot be shared safely + across cluster nodes. `Hyper.Img.Db.SingleNodeGuard` enforces this at + runtime. Reached through the `Hyper.Img.Db.Repo` facade. + + Shares the `priv/repo/migrations` directory with the Postgres repo; the + image-graph DDL contains no Postgres-specific constructs. + """ + + use Ecto.Repo, + otp_app: :hyper, + adapter: Ecto.Adapters.SQLite3, + priv: "priv/repo", + telemetry_prefix: [:hyper, :img, :db, :repo] +end diff --git a/mix.exs b/mix.exs index 3e90c16..0c6a8ca 100644 --- a/mix.exs +++ b/mix.exs @@ -35,7 +35,7 @@ defmodule Hyper.MixProject do mod: {Hyper.Application, []}, # ecto_repos lives here (not config.exs) since it's well-known and # compile-time fixed. Mix's ecto.* tasks read it from the app env. - env: [ecto_repos: [Hyper.Img.Db.Repo]] + env: [ecto_repos: [Hyper.Img.Db.Repo.Postgres]] ] end diff --git a/test/hyper/img/db/backend_test.exs b/test/hyper/img/db/backend_test.exs new file mode 100644 index 0000000..9c138c1 --- /dev/null +++ b/test/hyper/img/db/backend_test.exs @@ -0,0 +1,25 @@ +defmodule Hyper.Img.Db.BackendTest do + use ExUnit.Case, async: false + + alias Hyper.Img.Db.Backend + + setup do + original = Application.get_env(:hyper, Hyper.Img.Db) + on_exit(fn -> Application.put_env(:hyper, Hyper.Img.Db, original) end) + :ok + end + + test "defaults to the Postgres repo" do + Application.put_env(:hyper, Hyper.Img.Db, []) + assert Backend.selected() == :postgres + assert Backend.repo() == Hyper.Img.Db.Repo.Postgres + refute Backend.sqlite?() + end + + test "resolves the SQLite repo when configured" do + Application.put_env(:hyper, Hyper.Img.Db, backend: :sqlite) + assert Backend.selected() == :sqlite + assert Backend.repo() == Hyper.Img.Db.Repo.Sqlite + assert Backend.sqlite?() + end +end From 6c2990bffc9954c56708a0bf197a176575703d8b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:23:07 +0000 Subject: [PATCH 03/11] fix(img-db): move priv/telemetry_prefix to repo config so Ecto honors them --- config/config.exs | 2 ++ lib/hyper/img/db/repo/postgres.ex | 4 +--- lib/hyper/img/db/repo/sqlite.ex | 4 +--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/config/config.exs b/config/config.exs index f78412d..1cc9036 100644 --- a/config/config.exs +++ b/config/config.exs @@ -42,6 +42,7 @@ config :hyper, Hyper.Img.Db, backend: :postgres config :hyper, Hyper.Img.Db.Repo.Postgres, priv: "priv/repo", + telemetry_prefix: [:hyper, :img, :db, :repo], database: "hyper_dev", username: "postgres", password: "postgres", @@ -50,6 +51,7 @@ config :hyper, Hyper.Img.Db.Repo.Postgres, config :hyper, Hyper.Img.Db.Repo.Sqlite, priv: "priv/repo", + telemetry_prefix: [:hyper, :img, :db, :repo], database: Path.expand("../priv/sqlite/hyper.db", __DIR__), pool_size: 1, journal_mode: :wal, diff --git a/lib/hyper/img/db/repo/postgres.ex b/lib/hyper/img/db/repo/postgres.ex index 1638609..7d1a586 100644 --- a/lib/hyper/img/db/repo/postgres.ex +++ b/lib/hyper/img/db/repo/postgres.ex @@ -8,7 +8,5 @@ defmodule Hyper.Img.Db.Repo.Postgres do use Ecto.Repo, otp_app: :hyper, - adapter: Ecto.Adapters.Postgres, - priv: "priv/repo", - telemetry_prefix: [:hyper, :img, :db, :repo] + adapter: Ecto.Adapters.Postgres end diff --git a/lib/hyper/img/db/repo/sqlite.ex b/lib/hyper/img/db/repo/sqlite.ex index 76cc9f7..f950fcd 100644 --- a/lib/hyper/img/db/repo/sqlite.ex +++ b/lib/hyper/img/db/repo/sqlite.ex @@ -12,7 +12,5 @@ defmodule Hyper.Img.Db.Repo.Sqlite do use Ecto.Repo, otp_app: :hyper, - adapter: Ecto.Adapters.SQLite3, - priv: "priv/repo", - telemetry_prefix: [:hyper, :img, :db, :repo] + adapter: Ecto.Adapters.SQLite3 end From 47609b56fa9ff35e58e7bdea7275cfe74490ca60 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:25:42 +0000 Subject: [PATCH 04/11] docs(img-db): correct telemetry-bridge comment after repo split --- lib/hyper/application.ex | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/hyper/application.ex b/lib/hyper/application.ex index d9f6089..be51fb4 100644 --- a/lib/hyper/application.ex +++ b/lib/hyper/application.ex @@ -8,8 +8,9 @@ defmodule Hyper.Application do # :opentelemetry starts as its own OTP application (a dependency of :hyper), # so it is already running before this supervisor boots. # - # Bridge Ecto's query telemetry into OpenTelemetry spans. The prefix matches - # the repo's default telemetry_prefix (its module path, underscored). + # Bridge Ecto's query telemetry into OpenTelemetry spans. Both concrete + # repos set telemetry_prefix: [:hyper, :img, :db, :repo] in config, so this + # call is valid for whichever backend is active. _ = OpentelemetryEcto.setup([:hyper, :img, :db, :repo]) topologies = Application.get_env(:libcluster, :topologies, []) From 2e608d85019a09df829d90669cb2228c06ebcfdd Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:27:34 +0000 Subject: [PATCH 05/11] refactor(img-db): route GC timeout/error handling through the repo facade --- lib/hyper/img/db/gc.ex | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/lib/hyper/img/db/gc.ex b/lib/hyper/img/db/gc.ex index e4d5995..6f06a73 100644 --- a/lib/hyper/img/db/gc.ex +++ b/lib/hyper/img/db/gc.ex @@ -100,7 +100,7 @@ defmodule Hyper.Img.Db.Gc do rescue # Only swallow database unavailability (incl. statement_timeout aborts) # and retry; let any other exception crash so a real bug surfaces. - e in [Postgrex.Error, DBConnection.ConnectionError] -> + e in [Postgrex.Error, Exqlite.Error, DBConnection.ConnectionError] -> Logger.warning( "layer gc: database unavailable during sweep (#{Exception.message(e)}); retrying" ) @@ -139,7 +139,12 @@ defmodule Hyper.Img.Db.Gc do @spec scan_one_batch(t()) :: t() defp scan_one_batch(%__MODULE__{sweep: sweep} = state) do limit = state.config.batch_size - batch = with_low_priority(state, fn -> Blob.present_after(sweep.cursor, limit) end) + + batch = + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Blob.present_after(sweep.cursor, limit) + end) + {sweep, missing} = Sweep.absorb(sweep, batch, &presence/1) {pruned, pruned_bytes, dangling} = maybe_prune(state, missing) @@ -219,7 +224,11 @@ defmodule Hyper.Img.Db.Gc do not exists(from il in ImageLayer, where: il.blob_id == parent_as(:b).id), select: b.size - {count, sizes} = with_low_priority(state, fn -> Repo.delete_all(query) end) + {count, sizes} = + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Repo.delete_all(query) + end) + {count, Enum.sum(sizes)} end @@ -242,26 +251,11 @@ defmodule Hyper.Img.Db.Gc do @spec referenced_ids(t(), [String.t()]) :: MapSet.t(String.t()) defp referenced_ids(state, ids) do query = from il in ImageLayer, where: il.blob_id in ^ids, distinct: true, select: il.blob_id - state |> with_low_priority(fn -> Repo.all(query) end) |> MapSet.new() - end - - # Run a DB operation at low priority: in a transaction whose statement_timeout - # is capped, so it can never pin a backend and yields under contention. - @spec with_low_priority(t(), (-> result)) :: result when result: var - defp with_low_priority(state, fun) do - timeout = Unit.Time.as_ms(state.config.statement_timeout) - - {:ok, result} = - Repo.transaction(fn -> - _ = - Repo.query!("SELECT set_config('statement_timeout', $1, true)", [ - Integer.to_string(timeout) - ]) - - fun.() - end) - result + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Repo.all(query) + end) + |> MapSet.new() end # Shared-medium presence probe injected into the pure Sweep core. Distinguishes From ecd09c77cd61fa861aa5664cc9445140286fa728 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:31:39 +0000 Subject: [PATCH 06/11] feat(img-db): enforce single-node safety for the SQLite backend Adds SingleNodeGuard GenServer that refuses to boot when peers are already connected and halts the node if a peer joins while SQLite is active. Started conditionally in the application tree only when Backend.sqlite?/0 is true. --- lib/hyper/application.ex | 35 +++++++----- lib/hyper/img/db/single_node_guard.ex | 60 ++++++++++++++++++++ test/hyper/img/db/single_node_guard_test.exs | 14 +++++ 3 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 lib/hyper/img/db/single_node_guard.ex create mode 100644 test/hyper/img/db/single_node_guard_test.exs diff --git a/lib/hyper/application.ex b/lib/hyper/application.ex index be51fb4..c34282d 100644 --- a/lib/hyper/application.ex +++ b/lib/hyper/application.ex @@ -15,20 +15,29 @@ defmodule Hyper.Application do topologies = Application.get_env(:libcluster, :topologies, []) - children = [ - # The image-lineage database. Started first so the rest of the node can - # query images/leases on boot. - Hyper.Img.Db.Backend.repo(), - # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` - # can discover peer nodes. Gossip strategy in dev - see config/config.exs. - {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, - # Cluster-wide CRDTs (VM routing + budget telemetry). Must precede - # Hyper.Node so VM registrations and budget advertisements have their - # registries on boot. - Hyper.Cluster, - Hyper.Node - ] + children = + [ + # The image-lineage database. Started first so the rest of the node can + # query images/leases on boot. + Hyper.Img.Db.Backend.repo(), + # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` + # can discover peer nodes. Gossip strategy in dev - see config/config.exs. + {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, + # Cluster-wide CRDTs (VM routing + budget telemetry). Must precede + # Hyper.Node so VM registrations and budget advertisements have their + # registries on boot. + Hyper.Cluster, + Hyper.Node + ] ++ sqlite_guard_children() Supervisor.start_link(children, strategy: :one_for_one, name: Hyper.Supervisor) end + + defp sqlite_guard_children do + if Hyper.Img.Db.Backend.sqlite?() do + [Hyper.Img.Db.SingleNodeGuard] + else + [] + end + end end diff --git a/lib/hyper/img/db/single_node_guard.ex b/lib/hyper/img/db/single_node_guard.ex new file mode 100644 index 0000000..dc1901d --- /dev/null +++ b/lib/hyper/img/db/single_node_guard.ex @@ -0,0 +1,60 @@ +defmodule Hyper.Img.Db.SingleNodeGuard do + @moduledoc """ + Enforces that the SQLite image-graph backend only ever runs on a node with + no connected peers. + + A single-writer file database cannot be shared safely across cluster nodes, + so this guard: + + * refuses to boot (stops with `{:multi_node_sqlite, peers}`) if peers are + already connected when it starts, and + * halts the node via `System.stop/1` if a peer joins while it is running, + preventing concurrent writers from corrupting the database. + + Only started when `Hyper.Img.Db.Backend.sqlite?/0` is true. + """ + + use GenServer + + require Logger + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + peers_fun = Keyword.get(opts, :peers, &Node.list/0) + GenServer.start_link(__MODULE__, peers_fun, name: __MODULE__) + end + + @impl true + def init(peers_fun) when is_function(peers_fun, 0) do + _ = :net_kernel.monitor_nodes(true) + + case peers_fun.() do + [] -> + Logger.info("img db: SQLite backend active; single-node guard armed") + {:ok, %{peers: peers_fun}} + + peers -> + Logger.critical( + "img db: SQLite backend is configured but the cluster already has peers " <> + "(#{inspect(peers)}). SQLite cannot be shared across nodes; refusing to start." + ) + + {:stop, {:multi_node_sqlite, peers}} + end + end + + @impl true + def handle_info({:nodeup, node}, state) do + Logger.critical( + "img db: SQLite backend active but peer #{inspect(node)} joined the cluster. " <> + "SQLite is single-writer and cannot be shared safely. " <> + "Halting to protect data integrity." + ) + + System.stop(1) + {:noreply, state} + end + + def handle_info({:nodedown, _node}, state), do: {:noreply, state} + def handle_info(_msg, state), do: {:noreply, state} +end diff --git a/test/hyper/img/db/single_node_guard_test.exs b/test/hyper/img/db/single_node_guard_test.exs new file mode 100644 index 0000000..1e4f92d --- /dev/null +++ b/test/hyper/img/db/single_node_guard_test.exs @@ -0,0 +1,14 @@ +defmodule Hyper.Img.Db.SingleNodeGuardTest do + use ExUnit.Case, async: true + + alias Hyper.Img.Db.SingleNodeGuard + + test "arms when no peers are connected" do + assert {:ok, _state} = SingleNodeGuard.init(fn -> [] end) + end + + test "refuses to start when peers are already connected" do + peers = [:"b@127.0.0.1", :"c@127.0.0.1"] + assert {:stop, {:multi_node_sqlite, ^peers}} = SingleNodeGuard.init(fn -> peers end) + end +end From 1b73e850e8dd63bd16693fcadd3ffaa5d94a8f5b Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:38:48 +0000 Subject: [PATCH 07/11] test(img-db): prove image-graph query portability on SQLite Adds a real-SQLite integration test that starts Repo.Sqlite against a temp file, runs the shared migration, and exercises the three constructs that could behave differently on SQLite vs Postgres: - Lease.bump/3 ON CONFLICT upsert (via new bump_with_repo/4 that takes the repo explicitly, used by tests; bump/3 delegates to it) - GC prune query: parent_as correlated subquery in delete_all with RETURNING - confirmed {count, sizes} populates correctly on SQLite - Image.resolve_chain/1 ordered join (refactored to return a query instead of executing it, so callers choose the repo) ecto_sqlite3 note: on conflict update, the returned struct carries a freshly-generated UUID rather than the existing row's id (ecto_sqlite3 does not RETURNING the stored row after a conflict update). The test asserts what actually matters: one row persists and the stored expiry was advanced, verified by Repo.aggregate + Repo.one!. --- lib/hyper/img/db/image.ex | 19 ++-- lib/hyper/img/db/lease.ex | 9 +- lib/hyper/node/img/server.ex | 1 + test/hyper/img/db/sqlite_backend_test.exs | 110 ++++++++++++++++++++++ 4 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 test/hyper/img/db/sqlite_backend_test.exs diff --git a/lib/hyper/img/db/image.ex b/lib/hyper/img/db/image.ex index 5a25bd4..bde6f6b 100644 --- a/lib/hyper/img/db/image.ex +++ b/lib/hyper/img/db/image.ex @@ -8,7 +8,7 @@ defmodule Hyper.Img.Db.Image do import Ecto.Changeset import Ecto.Query - alias Hyper.Img.Db.{Blob, ImageLayer, Lease, Repo} + alias Hyper.Img.Db.{ImageLayer, Lease, Repo} alias Unit.Information @primary_key {:id, :string, autogenerate: false} @@ -38,18 +38,17 @@ defmodule Hyper.Img.Db.Image do def chain_sizes(image_id) do image_id |> resolve_chain() + |> then(&Repo.all/1) |> Enum.map(fn blob -> {blob.id, Information.bytes(blob.size)} end) end - @doc "Ordered blobs needed to assemble `image_id`, base (position 0) first." - @spec resolve_chain(String.t()) :: [Blob.t()] + @doc "Query for the ordered blobs needed to assemble `image_id`, base (position 0) first." + @spec resolve_chain(String.t()) :: Ecto.Query.t() def resolve_chain(image_id) do - Repo.all( - from l in ImageLayer, - where: l.image_id == ^image_id, - join: b in assoc(l, :blob), - order_by: [asc: l.position], - select: b - ) + from l in ImageLayer, + where: l.image_id == ^image_id, + join: b in assoc(l, :blob), + order_by: [asc: l.position], + select: b end end diff --git a/lib/hyper/img/db/lease.ex b/lib/hyper/img/db/lease.ex index 9bb2133..5f281ba 100644 --- a/lib/hyper/img/db/lease.ex +++ b/lib/hyper/img/db/lease.ex @@ -49,7 +49,12 @@ defmodule Hyper.Img.Db.Lease do """ @spec bump(Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: {:ok, %__MODULE__{}} | {:error, Ecto.Changeset.t()} - def bump(image_id, vm_id, ttl) do + def bump(image_id, vm_id, ttl), do: bump_with_repo(Repo, image_id, vm_id, ttl) + + @doc false + @spec bump_with_repo(module(), Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: + {:ok, %__MODULE__{}} | {:error, Ecto.Changeset.t()} + def bump_with_repo(repo, image_id, vm_id, ttl) do expires_at = DateTime.add(DateTime.utc_now(), Unit.Time.as_s(ttl), :second) %__MODULE__{} @@ -59,7 +64,7 @@ defmodule Hyper.Img.Db.Lease do vm_id: vm_id, expires_at: expires_at }) - |> Repo.insert( + |> repo.insert( on_conflict: [set: [expires_at: expires_at]], conflict_target: [:node_id, :vm_id] ) diff --git a/lib/hyper/node/img/server.ex b/lib/hyper/node/img/server.ex index 5f9cb1d..f84ab8d 100644 --- a/lib/hyper/node/img/server.ex +++ b/lib/hyper/node/img/server.ex @@ -141,6 +141,7 @@ defmodule Hyper.Node.Img.Server do defp resolve_layers(img_id) do img_id |> Db.Image.resolve_chain() + |> Db.Repo.all() |> Enum.map(& &1.id) end diff --git a/test/hyper/img/db/sqlite_backend_test.exs b/test/hyper/img/db/sqlite_backend_test.exs new file mode 100644 index 0000000..0afa04e --- /dev/null +++ b/test/hyper/img/db/sqlite_backend_test.exs @@ -0,0 +1,110 @@ +defmodule Hyper.Img.Db.SqliteBackendTest do + @moduledoc """ + Proves the image-graph queries that depend on database-specific SQL behave + correctly on SQLite: the lease upsert, the GC prune (correlated subquery + + RETURNING), and chain resolution. + """ + + use ExUnit.Case, async: false + + import Ecto.Query + + alias Hyper.Img.Db.{Blob, Image, ImageLayer, Lease} + alias Hyper.Img.Db.Repo.Sqlite, as: Repo + + setup do + # `mix test --no-start` skips the :hyper application (which would try to + # connect to Postgres), but :ecto and :ecto_sql must run so + # Ecto.Repo.Registry and Ecto.MigratorSupervisor are alive. + Application.ensure_all_started(:ecto_sql) + + dir = System.tmp_dir!() + db = Path.join(dir, "hyper_sqlite_test_#{System.unique_integer([:positive])}.db") + + pid = + start_supervised!( + {Repo, database: db, journal_mode: :wal, pool_size: 1, busy_timeout: 5_000} + ) + + Ecto.Migrator.run(Repo, Path.join([File.cwd!(), "priv", "repo", "migrations"]), :up, + all: true + ) + + on_exit(fn -> + File.rm(db) + File.rm(db <> "-wal") + File.rm(db <> "-shm") + end) + + %{repo: pid, db: db} + end + + defp now, do: DateTime.utc_now() + defp ago(seconds), do: DateTime.add(now(), -seconds, :second) + + test "Image.resolve_chain returns blobs ordered by layer position" do + Repo.insert!(%Blob{id: "base", kind: :base, state: :present, size: 100, inserted_at: now()}) + Repo.insert!(%Blob{id: "delta", kind: :delta, state: :present, size: 50, inserted_at: now()}) + Repo.insert!(%Image{id: "img1", label: "test", inserted_at: now()}) + Repo.insert!(%ImageLayer{image_id: "img1", blob_id: "base", position: 0}) + Repo.insert!(%ImageLayer{image_id: "img1", blob_id: "delta", position: 1}) + + chain = Repo.all(Image.resolve_chain("img1")) + + assert Enum.map(chain, & &1.id) == ["base", "delta"] + end + + test "Lease.bump upserts on the (node_id, vm_id) conflict target" do + Repo.insert!(%Image{id: "img2", inserted_at: now()}) + + {:ok, first} = Lease.bump_with_repo(Repo, "img2", "vm-a", Unit.Time.s(60)) + {:ok, second} = Lease.bump_with_repo(Repo, "img2", "vm-a", Unit.Time.s(120)) + + # The second call targets the same (node_id, vm_id) key, so the ON CONFLICT + # DO UPDATE fires: exactly one row persists and the expiry is further out. + # + # Note: ecto_sqlite3 builds the returned struct from the changeset rather than + # reading back the stored row after a conflict update, so `second.id` carries a + # freshly-generated UUID instead of the original lease's id. What matters is + # that exactly one row exists and the stored expiry was bumped. + assert DateTime.compare(second.expires_at, first.expires_at) == :gt + assert Repo.aggregate(Lease, :count) == 1 + + # The single stored row must have the higher expiry. + stored = Repo.one!(Lease) + assert DateTime.compare(stored.expires_at, first.expires_at) == :gt + end + + test "GC prune deletes only unreferenced blobs and returns their sizes via RETURNING" do + cutoff = now() + + # Referenced by an image layer -> must survive. + Repo.insert!(%Blob{id: "kept", kind: :base, state: :present, size: 100, inserted_at: ago(60)}) + Repo.insert!(%Image{id: "img3", inserted_at: now()}) + Repo.insert!(%ImageLayer{image_id: "img3", blob_id: "kept", position: 0}) + + # Unreferenced and older than the cutoff -> must be pruned. + Repo.insert!(%Blob{ + id: "orphan", + kind: :delta, + state: :present, + size: 42, + inserted_at: ago(60) + }) + + query = + from b in Blob, + as: :b, + where: + b.state == :present and b.inserted_at < ^cutoff and + not exists(from il in ImageLayer, where: il.blob_id == parent_as(:b).id), + select: b.size + + {count, sizes} = Repo.delete_all(query) + + assert count == 1 + assert sizes == [42] + assert Repo.get(Blob, "kept") + refute Repo.get(Blob, "orphan") + end +end From fead82a746d07d59c7a0d99a797818dbba65f4eb Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:44:09 +0000 Subject: [PATCH 08/11] test(img-db): clarify lease upsert assertions; silence migrator log; tidy pipe - Fix 1: Clarify lease upsert test assertions with precise comments: returned struct check notes it's from changeset (not DB re-read); count==1 assertion proves upsert didn't insert; stored-row re-read proves persistence. - Fix 2: Add log: false to Ecto.Migrator.run to suppress "redefining module" noise. - Fix 3: Replace |> then(&Repo.all/1) with idiomatic |> Repo.all() in chain_sizes/1. --- lib/hyper/img/db/image.ex | 2 +- test/hyper/img/db/sqlite_backend_test.exs | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/hyper/img/db/image.ex b/lib/hyper/img/db/image.ex index bde6f6b..3d33a1f 100644 --- a/lib/hyper/img/db/image.ex +++ b/lib/hyper/img/db/image.ex @@ -38,7 +38,7 @@ defmodule Hyper.Img.Db.Image do def chain_sizes(image_id) do image_id |> resolve_chain() - |> then(&Repo.all/1) + |> Repo.all() |> Enum.map(fn blob -> {blob.id, Information.bytes(blob.size)} end) end diff --git a/test/hyper/img/db/sqlite_backend_test.exs b/test/hyper/img/db/sqlite_backend_test.exs index 0afa04e..40d418d 100644 --- a/test/hyper/img/db/sqlite_backend_test.exs +++ b/test/hyper/img/db/sqlite_backend_test.exs @@ -27,7 +27,8 @@ defmodule Hyper.Img.Db.SqliteBackendTest do ) Ecto.Migrator.run(Repo, Path.join([File.cwd!(), "priv", "repo", "migrations"]), :up, - all: true + all: true, + log: false ) on_exit(fn -> @@ -65,12 +66,15 @@ defmodule Hyper.Img.Db.SqliteBackendTest do # # Note: ecto_sqlite3 builds the returned struct from the changeset rather than # reading back the stored row after a conflict update, so `second.id` carries a - # freshly-generated UUID instead of the original lease's id. What matters is - # that exactly one row exists and the stored expiry was bumped. + # freshly-generated UUID instead of the original lease's id. + + # returned struct carries the new TTL (built from the changeset, not read back) assert DateTime.compare(second.expires_at, first.expires_at) == :gt + + # exactly one row exists (proves upsert updated-in-place, not inserted) assert Repo.aggregate(Lease, :count) == 1 - # The single stored row must have the higher expiry. + # re-read the stored row and verify its expiry was actually bumped (proves DB update persisted) stored = Repo.one!(Lease) assert DateTime.compare(stored.expires_at, first.expires_at) == :gt end From 34114da3fda570aa7eadadb4525cdb58f02aa702 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:46:25 +0000 Subject: [PATCH 09/11] docs(img-db): document postgres/sqlite storage backend selection Adds an Image-graph storage backends section to the architecture doc covering both backends, the config snippet to enable SQLite, the SingleNodeGuard boot-refusal and runtime-halt enforcement, per-backend migration commands, and the known upsert-id limitation under SQLite. Updates the README Features bullet to mention the SQLite option for single-node deployments. --- README.md | 6 +++-- docs/cookbook/architecture.md | 50 +++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 9f92b7e..737d433 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,10 @@ deploying and integrating Hyper. - **Telemetry** -- Hyper is mostly fully instrumented with [Otel](https://opentelemetry.io/) so you get full traces on if/why things are not performing as expected. -- **Minimal stack** -- Hyper makes very few assumptions on your cloud, and only - requires a Postgres database as a minimal external dependency. +- **Minimal stack** -- Hyper makes very few assumptions on your cloud. For + multi-node deployments a Postgres database is required; single-node + deployments can use the built-in SQLite backend instead (see + [Architecture](docs/cookbook/architecture.md) for details). - **🔮 BEAM-native** -- Hyper is written on the [BEAM](https://en.wikipedia.org/wiki/BEAM_(Erlang_virtual_machine)). This means that fault-tolerance is built into the virtual machine, and allows you diff --git a/docs/cookbook/architecture.md b/docs/cookbook/architecture.md index f983d87..a6328e4 100644 --- a/docs/cookbook/architecture.md +++ b/docs/cookbook/architecture.md @@ -95,13 +95,59 @@ Files](https://aws.amazon.com/s3/features/files/), filesystem. The author uses the local filesystem for debugging, and NFS for production use. This medium is referred to as the **layer storage medium**. -A side-car PostgreSQL database stores: +A metadata database stores: - The dependency relationships between each individual layer and image. - Leases issued out to virtual machines to track which layers are currently considered active. -The aforementioned PostgreSQL database is coined the **metadata database**. +The aforementioned database is coined the **metadata database**. Two backends +are available; see [Image-graph storage backends](#image-graph-storage-backends) +below for how to choose one. + +### Image-graph storage backends + +The image graph (blobs, images, image-layers, leases) is stored through the +`Hyper.Img.Db.Repo` facade. At runtime `Hyper.Img.Db.Backend` resolves that +facade to one of two concrete backends: + +- **`:postgres`** (default) -- cluster-safe; required for any multi-node + deployment. +- **`:sqlite`** -- a single-writer file database for single-node deployments + only. Selected via: + + ```elixir + config :hyper, Hyper.Img.Db, backend: :sqlite + ``` + +SQLite **must not** be used on a clustered node. `Hyper.Img.Db.SingleNodeGuard` +enforces this automatically: + +- At startup the node refuses to boot if any peers are already connected. +- At runtime the node halts itself (via `System.stop/1`) if a peer joins while + SQLite is active. + +This hard enforcement protects the SQLite file from concurrent writers, which +would corrupt the metadata database. + +#### Migrations + +Both backends share `priv/repo/migrations` -- the DDL is written to be portable +across PostgreSQL and SQLite. Apply migrations per backend: + + mix ecto.migrate -r Hyper.Img.Db.Repo.Postgres # default (multi-node) + mix ecto.migrate -r Hyper.Img.Db.Repo.Sqlite # single-node SQLite + +#### Known limitation: upsert id under SQLite + +On SQLite (via `ecto_sqlite3`), an `ON CONFLICT DO UPDATE` upsert returns a +struct carrying a freshly-generated UUID rather than the stored row's `id`. +`Hyper.Img.Db.Lease.bump/3` is the only upsert in the image-graph path, and +current callers only match `{:ok, _}` without reading back the lease `id`, so +there is no live bug. However, any future code that reads the `id` from the +struct returned by a lease bump under SQLite will receive an incorrect value. +Use the Postgres backend if you need reliable round-trip identity on bumped +leases. ### Composition From 58675d4b1b5157b961e39409936ca11b415d1074 Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 16:51:42 +0000 Subject: [PATCH 10/11] fix(ci): use concrete Postgres repo for ecto tasks; annotate guard handle_info clauses --- .github/workflows/ci.yml | 4 ++-- lib/hyper/img/db/single_node_guard.ex | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 639bafd..30ad06a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,8 +84,8 @@ jobs: # tasks don't discover here -- without -r they no-op and tests then hit # a missing database. run: | - mix ecto.create -r Hyper.Img.Db.Repo - mix ecto.migrate -r Hyper.Img.Db.Repo + mix ecto.create -r Hyper.Img.Db.Repo.Postgres + mix ecto.migrate -r Hyper.Img.Db.Repo.Postgres - name: Test (warnings as errors) env: diff --git a/lib/hyper/img/db/single_node_guard.ex b/lib/hyper/img/db/single_node_guard.ex index dc1901d..a954d2d 100644 --- a/lib/hyper/img/db/single_node_guard.ex +++ b/lib/hyper/img/db/single_node_guard.ex @@ -55,6 +55,9 @@ defmodule Hyper.Img.Db.SingleNodeGuard do {:noreply, state} end + @impl true def handle_info({:nodedown, _node}, state), do: {:noreply, state} + + @impl true def handle_info(_msg, state), do: {:noreply, state} end From 5e1982fe7ac8c4e6b3bcb4c8b8d7298fe97537ff Mon Sep 17 00:00:00 2001 From: Marko Vejnovic Date: Tue, 23 Jun 2026 20:00:14 +0000 Subject: [PATCH 11/11] refactor(img-db): single repo + Hyper.Img.Db.Config; generic single-node guard Simplify the SQLite backend per review feedback: - Collapse Repo.Postgres/Repo.Sqlite/Backend into one Hyper.Img.Db.Repo whose adapter is chosen by a single Hyper.Img.Db.Config module (config :hyper, Hyper.Img.Db, backend: :postgres | :sqlite). Restores the default priv/telemetry_prefix, so ecto_repos/CI/migrations use the plain repo name again. - Move the single-node guard out of the DB namespace into a generic, DB-agnostic Hyper.SingleNodeGuard; application.ex decides whether to attach it based on Hyper.Img.Db.Config.sqlite?/0. - Revert test-only scaffolding: Image.resolve_chain/1 executes again and Lease.bump/3 is a single function (drop bump_with_repo/4). - Delete the backend/guard/portability tests. - Move storage-backend docs from architecture.md into the intro cookbook page. --- .github/workflows/ci.yml | 4 +- README.md | 4 +- config/config.exs | 19 +--- docs/cookbook/architecture.md | 50 +------- docs/cookbook/intro.md | 51 +++++++++ lib/hyper/application.ex | 17 +-- lib/hyper/img/db/backend.ex | 30 ----- lib/hyper/img/db/config.ex | 36 ++++++ lib/hyper/img/db/image.ex | 19 ++-- lib/hyper/img/db/lease.ex | 9 +- lib/hyper/img/db/repo.ex | 89 +++++---------- lib/hyper/img/db/repo/postgres.ex | 12 -- lib/hyper/img/db/repo/sqlite.ex | 16 --- lib/hyper/img/db/single_node_guard.ex | 63 ---------- lib/hyper/node/img/server.ex | 1 - lib/hyper/single_node_guard.ex | 65 +++++++++++ mix.exs | 2 +- test/hyper/img/db/backend_test.exs | 25 ---- test/hyper/img/db/single_node_guard_test.exs | 14 --- test/hyper/img/db/sqlite_backend_test.exs | 114 ------------------- 20 files changed, 214 insertions(+), 426 deletions(-) delete mode 100644 lib/hyper/img/db/backend.ex create mode 100644 lib/hyper/img/db/config.ex delete mode 100644 lib/hyper/img/db/repo/postgres.ex delete mode 100644 lib/hyper/img/db/repo/sqlite.ex delete mode 100644 lib/hyper/img/db/single_node_guard.ex create mode 100644 lib/hyper/single_node_guard.ex delete mode 100644 test/hyper/img/db/backend_test.exs delete mode 100644 test/hyper/img/db/single_node_guard_test.exs delete mode 100644 test/hyper/img/db/sqlite_backend_test.exs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2233230..2d88264 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,8 +84,8 @@ jobs: # tasks don't discover here -- without -r they no-op and tests then hit # a missing database. run: | - mix ecto.create -r Hyper.Img.Db.Repo.Postgres - mix ecto.migrate -r Hyper.Img.Db.Repo.Postgres + mix ecto.create -r Hyper.Img.Db.Repo + mix ecto.migrate -r Hyper.Img.Db.Repo - name: Test + coverage (warnings as errors) env: diff --git a/README.md b/README.md index b13041c..8c0a98f 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ deploying and integrating Hyper. not performing as expected. - **Minimal stack** -- Hyper makes very few assumptions on your cloud. For multi-node deployments a Postgres database is required; single-node - deployments can use the built-in SQLite backend instead (see - [Architecture](docs/cookbook/architecture.md) for details). + deployments can use the built-in SQLite backend instead (see the + [intro](docs/cookbook/intro.md) for details). - **🔮 BEAM-native** -- Hyper is written on the [BEAM](https://en.wikipedia.org/wiki/BEAM_(Erlang_virtual_machine)). This means that fault-tolerance is built into the virtual machine, and allows you diff --git a/config/config.exs b/config/config.exs index 1cc9036..37e9ac1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -37,24 +37,15 @@ if config_env() == :test do end # Image-graph storage backend. :postgres (default, cluster-safe) or :sqlite -# (single-node only; enforced at runtime by Hyper.Img.Db.SingleNodeGuard). +# (single-node only). See Hyper.Img.Db.Config and docs/cookbook/intro.md. config :hyper, Hyper.Img.Db, backend: :postgres -config :hyper, Hyper.Img.Db.Repo.Postgres, - priv: "priv/repo", - telemetry_prefix: [:hyper, :img, :db, :repo], +# Connection settings for the configured backend. The block below is for the +# default Postgres backend; for SQLite, configure this repo for SQLite instead +# (see docs/cookbook/intro.md). +config :hyper, Hyper.Img.Db.Repo, database: "hyper_dev", username: "postgres", password: "postgres", hostname: "localhost", pool_size: 10 - -config :hyper, Hyper.Img.Db.Repo.Sqlite, - priv: "priv/repo", - telemetry_prefix: [:hyper, :img, :db, :repo], - database: Path.expand("../priv/sqlite/hyper.db", __DIR__), - pool_size: 1, - journal_mode: :wal, - busy_timeout: 5_000, - binary_id_type: :string, - datetime_type: :iso8601 diff --git a/docs/cookbook/architecture.md b/docs/cookbook/architecture.md index a6328e4..f983d87 100644 --- a/docs/cookbook/architecture.md +++ b/docs/cookbook/architecture.md @@ -95,59 +95,13 @@ Files](https://aws.amazon.com/s3/features/files/), filesystem. The author uses the local filesystem for debugging, and NFS for production use. This medium is referred to as the **layer storage medium**. -A metadata database stores: +A side-car PostgreSQL database stores: - The dependency relationships between each individual layer and image. - Leases issued out to virtual machines to track which layers are currently considered active. -The aforementioned database is coined the **metadata database**. Two backends -are available; see [Image-graph storage backends](#image-graph-storage-backends) -below for how to choose one. - -### Image-graph storage backends - -The image graph (blobs, images, image-layers, leases) is stored through the -`Hyper.Img.Db.Repo` facade. At runtime `Hyper.Img.Db.Backend` resolves that -facade to one of two concrete backends: - -- **`:postgres`** (default) -- cluster-safe; required for any multi-node - deployment. -- **`:sqlite`** -- a single-writer file database for single-node deployments - only. Selected via: - - ```elixir - config :hyper, Hyper.Img.Db, backend: :sqlite - ``` - -SQLite **must not** be used on a clustered node. `Hyper.Img.Db.SingleNodeGuard` -enforces this automatically: - -- At startup the node refuses to boot if any peers are already connected. -- At runtime the node halts itself (via `System.stop/1`) if a peer joins while - SQLite is active. - -This hard enforcement protects the SQLite file from concurrent writers, which -would corrupt the metadata database. - -#### Migrations - -Both backends share `priv/repo/migrations` -- the DDL is written to be portable -across PostgreSQL and SQLite. Apply migrations per backend: - - mix ecto.migrate -r Hyper.Img.Db.Repo.Postgres # default (multi-node) - mix ecto.migrate -r Hyper.Img.Db.Repo.Sqlite # single-node SQLite - -#### Known limitation: upsert id under SQLite - -On SQLite (via `ecto_sqlite3`), an `ON CONFLICT DO UPDATE` upsert returns a -struct carrying a freshly-generated UUID rather than the stored row's `id`. -`Hyper.Img.Db.Lease.bump/3` is the only upsert in the image-graph path, and -current callers only match `{:ok, _}` without reading back the lease `id`, so -there is no live bug. However, any future code that reads the `id` from the -struct returned by a lease bump under SQLite will receive an incorrect value. -Use the Postgres backend if you need reliable round-trip identity on bumped -leases. +The aforementioned PostgreSQL database is coined the **metadata database**. ### Composition diff --git a/docs/cookbook/intro.md b/docs/cookbook/intro.md index 1459f74..4437661 100644 --- a/docs/cookbook/intro.md +++ b/docs/cookbook/intro.md @@ -41,3 +41,54 @@ config :hyper, uid_gid_range: {900_000, 999_999}, layer_dir: "/srv/hyper/layers" ``` + +### Storage backends + +`Hyper` keeps its image graph (blobs, images, image-layers, leases) in a +metadata database via `Hyper.Img.Db.Repo`. Two backends are available, chosen +in your config: + +```elixir +# cluster-safe default; required for any multi-node deployment +config :hyper, Hyper.Img.Db, backend: :postgres + +# single-node deployments only +config :hyper, Hyper.Img.Db, backend: :sqlite +``` + +Connection settings live under `config :hyper, Hyper.Img.Db.Repo`. For Postgres +that is the usual `database`/`username`/`password`/`hostname`. For SQLite, +point it at a file and use the SQLite adapter options, e.g.: + +```elixir +config :hyper, Hyper.Img.Db, backend: :sqlite + +config :hyper, Hyper.Img.Db.Repo, + database: "/srv/hyper/hyper.db", + pool_size: 1, + journal_mode: :wal, + busy_timeout: 5_000, + binary_id_type: :string, + datetime_type: :iso8601 +``` + +The backend is resolved at compile time, so changing it takes effect on the +next build. Apply migrations the same way for either backend: + +```sh +mix ecto.migrate +``` + +> #### SQLite is single-node only +> +> SQLite is a single-writer file database and **must not** be shared across +> cluster nodes. When the SQLite backend is configured, `Hyper` starts +> `Hyper.SingleNodeGuard`, which refuses to boot if peers are already connected +> and halts the node (via `System.stop/1`) if a peer joins later -- protecting +> the file from the concurrent writers that would corrupt it. +> +> One behavioural caveat: under SQLite, an `ON CONFLICT DO UPDATE` upsert +> returns a struct carrying a freshly-generated UUID rather than the stored +> row's `id`. `Hyper.Img.Db.Lease.bump/3` is the only such upsert and its +> callers don't read the returned `id`, so there is no live bug -- but use +> Postgres if you need reliable round-trip identity on bumped leases. diff --git a/lib/hyper/application.ex b/lib/hyper/application.ex index c34282d..abc201f 100644 --- a/lib/hyper/application.ex +++ b/lib/hyper/application.ex @@ -8,9 +8,8 @@ defmodule Hyper.Application do # :opentelemetry starts as its own OTP application (a dependency of :hyper), # so it is already running before this supervisor boots. # - # Bridge Ecto's query telemetry into OpenTelemetry spans. Both concrete - # repos set telemetry_prefix: [:hyper, :img, :db, :repo] in config, so this - # call is valid for whichever backend is active. + # Bridge Ecto's query telemetry into OpenTelemetry spans. The prefix matches + # Hyper.Img.Db.Repo's default telemetry_prefix. _ = OpentelemetryEcto.setup([:hyper, :img, :db, :repo]) topologies = Application.get_env(:libcluster, :topologies, []) @@ -19,7 +18,7 @@ defmodule Hyper.Application do [ # The image-lineage database. Started first so the rest of the node can # query images/leases on boot. - Hyper.Img.Db.Backend.repo(), + Hyper.Img.Db.Repo, # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` # can discover peer nodes. Gossip strategy in dev - see config/config.exs. {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, @@ -28,14 +27,16 @@ defmodule Hyper.Application do # registries on boot. Hyper.Cluster, Hyper.Node - ] ++ sqlite_guard_children() + ] ++ single_node_guard_children() Supervisor.start_link(children, strategy: :one_for_one, name: Hyper.Supervisor) end - defp sqlite_guard_children do - if Hyper.Img.Db.Backend.sqlite?() do - [Hyper.Img.Db.SingleNodeGuard] + # The SQLite backend is a single-writer file database; it is only safe on a + # node with no peers. Guard that invariant when SQLite is configured. + defp single_node_guard_children do + if Hyper.Img.Db.Config.sqlite?() do + [Hyper.SingleNodeGuard] else [] end diff --git a/lib/hyper/img/db/backend.ex b/lib/hyper/img/db/backend.ex deleted file mode 100644 index 0f463e8..0000000 --- a/lib/hyper/img/db/backend.ex +++ /dev/null @@ -1,30 +0,0 @@ -defmodule Hyper.Img.Db.Backend do - @moduledoc """ - Resolves the active image-graph storage backend from configuration. - - Configured via `config :hyper, Hyper.Img.Db, backend: :postgres | :sqlite`. - `:postgres` is the cluster-safe default; `:sqlite` is valid only on a - single node (see `Hyper.Img.Db.SingleNodeGuard`). - """ - - @repos %{ - postgres: Hyper.Img.Db.Repo.Postgres, - sqlite: Hyper.Img.Db.Repo.Sqlite - } - - @doc "The configured backend, defaulting to `:postgres`." - @spec selected() :: :postgres | :sqlite - def selected do - :hyper - |> Application.get_env(Hyper.Img.Db, []) - |> Keyword.get(:backend, :postgres) - end - - @doc "The concrete repo module for the configured backend." - @spec repo() :: module() - def repo, do: Map.fetch!(@repos, selected()) - - @doc "True when the SQLite backend is configured." - @spec sqlite?() :: boolean() - def sqlite?, do: selected() == :sqlite -end diff --git a/lib/hyper/img/db/config.ex b/lib/hyper/img/db/config.ex new file mode 100644 index 0000000..b76b2c3 --- /dev/null +++ b/lib/hyper/img/db/config.ex @@ -0,0 +1,36 @@ +defmodule Hyper.Img.Db.Config do + @moduledoc """ + Single place to configure the image-graph database. + + Choose the backend in your config: + + # cluster-safe default + config :hyper, Hyper.Img.Db, backend: :postgres + + # single-node deployments only + config :hyper, Hyper.Img.Db, backend: :sqlite + + The backend is resolved at compile time (the Ecto adapter is fixed when + `Hyper.Img.Db.Repo` is compiled), so changing it takes effect on the next + build. Connection settings live under `config :hyper, Hyper.Img.Db.Repo`. + """ + + @backend Application.compile_env(:hyper, [Hyper.Img.Db, :backend], :postgres) + + @adapters %{ + postgres: Ecto.Adapters.Postgres, + sqlite: Ecto.Adapters.SQLite3 + } + + @doc "The configured backend (`:postgres` | `:sqlite`)." + @spec backend() :: :postgres | :sqlite + def backend, do: @backend + + @doc "The Ecto adapter module for the configured backend." + @spec adapter() :: module() + def adapter, do: Map.fetch!(@adapters, @backend) + + @doc "True when the SQLite backend is configured (single-node only)." + @spec sqlite?() :: boolean() + def sqlite?, do: @backend == :sqlite +end diff --git a/lib/hyper/img/db/image.ex b/lib/hyper/img/db/image.ex index 3d33a1f..5a25bd4 100644 --- a/lib/hyper/img/db/image.ex +++ b/lib/hyper/img/db/image.ex @@ -8,7 +8,7 @@ defmodule Hyper.Img.Db.Image do import Ecto.Changeset import Ecto.Query - alias Hyper.Img.Db.{ImageLayer, Lease, Repo} + alias Hyper.Img.Db.{Blob, ImageLayer, Lease, Repo} alias Unit.Information @primary_key {:id, :string, autogenerate: false} @@ -38,17 +38,18 @@ defmodule Hyper.Img.Db.Image do def chain_sizes(image_id) do image_id |> resolve_chain() - |> Repo.all() |> Enum.map(fn blob -> {blob.id, Information.bytes(blob.size)} end) end - @doc "Query for the ordered blobs needed to assemble `image_id`, base (position 0) first." - @spec resolve_chain(String.t()) :: Ecto.Query.t() + @doc "Ordered blobs needed to assemble `image_id`, base (position 0) first." + @spec resolve_chain(String.t()) :: [Blob.t()] def resolve_chain(image_id) do - from l in ImageLayer, - where: l.image_id == ^image_id, - join: b in assoc(l, :blob), - order_by: [asc: l.position], - select: b + Repo.all( + from l in ImageLayer, + where: l.image_id == ^image_id, + join: b in assoc(l, :blob), + order_by: [asc: l.position], + select: b + ) end end diff --git a/lib/hyper/img/db/lease.ex b/lib/hyper/img/db/lease.ex index 5f281ba..9bb2133 100644 --- a/lib/hyper/img/db/lease.ex +++ b/lib/hyper/img/db/lease.ex @@ -49,12 +49,7 @@ defmodule Hyper.Img.Db.Lease do """ @spec bump(Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: {:ok, %__MODULE__{}} | {:error, Ecto.Changeset.t()} - def bump(image_id, vm_id, ttl), do: bump_with_repo(Repo, image_id, vm_id, ttl) - - @doc false - @spec bump_with_repo(module(), Hyper.Img.id(), Hyper.Vm.id(), Unit.Time.t()) :: - {:ok, %__MODULE__{}} | {:error, Ecto.Changeset.t()} - def bump_with_repo(repo, image_id, vm_id, ttl) do + def bump(image_id, vm_id, ttl) do expires_at = DateTime.add(DateTime.utc_now(), Unit.Time.as_s(ttl), :second) %__MODULE__{} @@ -64,7 +59,7 @@ defmodule Hyper.Img.Db.Lease do vm_id: vm_id, expires_at: expires_at }) - |> repo.insert( + |> Repo.insert( on_conflict: [set: [expires_at: expires_at]], conflict_target: [:node_id, :vm_id] ) diff --git a/lib/hyper/img/db/repo.ex b/lib/hyper/img/db/repo.ex index 06c6a8d..9184bc9 100644 --- a/lib/hyper/img/db/repo.ex +++ b/lib/hyper/img/db/repo.ex @@ -1,78 +1,47 @@ defmodule Hyper.Img.Db.Repo do @moduledoc """ - Runtime facade over the active image-graph repository. + Global database of all known layers, and how they relate to each other. - All application code talks to this module; it forwards Ecto callbacks to - whichever concrete repo `Hyper.Img.Db.Backend` selects (Postgres or - SQLite). Adapter-specific behaviour is encapsulated in `with_low_priority/2`. - """ - - alias Hyper.Img.Db.Backend - - # --- Ecto.Repo callbacks used across the codebase ------------------------ - # If `grep -rn "Repo\\." lib/` surfaces a callback not listed here, add a - # matching forwarder. Each is a one-line delegation to the active repo. - - def all(queryable, opts \\ []), do: Backend.repo().all(queryable, opts) - def one(queryable, opts \\ []), do: Backend.repo().one(queryable, opts) - def one!(queryable, opts \\ []), do: Backend.repo().one!(queryable, opts) - def get(queryable, id, opts \\ []), do: Backend.repo().get(queryable, id, opts) - def get!(queryable, id, opts \\ []), do: Backend.repo().get!(queryable, id, opts) - def get_by(queryable, clauses, opts \\ []), do: Backend.repo().get_by(queryable, clauses, opts) - - def get_by!(queryable, clauses, opts \\ []), - do: Backend.repo().get_by!(queryable, clauses, opts) - - def exists?(queryable, opts \\ []), do: Backend.repo().exists?(queryable, opts) - def insert(struct, opts \\ []), do: Backend.repo().insert(struct, opts) - def insert!(struct, opts \\ []), do: Backend.repo().insert!(struct, opts) + Tracks images and how they relate (images can build on top of images), and + answers: + - Given an image id, is it a base image or a layered image? + - If an image is a layered image, what are the layers to build it? + - Who is currently actively holding onto an image (or any of its children)? - def insert_all(schema, entries, opts \\ []), - do: Backend.repo().insert_all(schema, entries, opts) - - def update(struct, opts \\ []), do: Backend.repo().update(struct, opts) - def update!(struct, opts \\ []), do: Backend.repo().update!(struct, opts) - - def update_all(queryable, updates, opts \\ []), - do: Backend.repo().update_all(queryable, updates, opts) + The backend (PostgreSQL or SQLite) is chosen by `Hyper.Img.Db.Config`; see + that module to configure it. + """ - def delete(struct, opts \\ []), do: Backend.repo().delete(struct, opts) - def delete!(struct, opts \\ []), do: Backend.repo().delete!(struct, opts) - def delete_all(queryable, opts \\ []), do: Backend.repo().delete_all(queryable, opts) - def preload(structs, preloads, opts \\ []), do: Backend.repo().preload(structs, preloads, opts) - def transaction(fun_or_multi, opts \\ []), do: Backend.repo().transaction(fun_or_multi, opts) - def rollback(value), do: Backend.repo().rollback(value) - def query(sql, params \\ [], opts \\ []), do: Backend.repo().query(sql, params, opts) - def query!(sql, params \\ [], opts \\ []), do: Backend.repo().query!(sql, params, opts) + use Ecto.Repo, + otp_app: :hyper, + adapter: Hyper.Img.Db.Config.adapter() @doc """ - Runs `fun` under a best-effort, time-bounded, low-priority context. + Runs `fun` time-bounded and low-priority where the backend supports it. Postgres: wraps `fun` in a transaction with a transaction-local `statement_timeout`, so a slow sweep cannot pin a connection indefinitely. - SQLite: single-writer with a connection `busy_timeout`; there is no - per-statement timeout, so `fun` is run directly. + SQLite: single-writer with a connection `busy_timeout` and no per-statement + timeout, so `fun` is run directly. Returns the value of `fun`. """ @spec with_low_priority(non_neg_integer(), (-> result)) :: result when result: var def with_low_priority(timeout_ms, fun) when is_integer(timeout_ms) and is_function(fun, 0) do - case Backend.selected() do - :postgres -> - {:ok, result} = - Backend.repo().transaction(fn -> - _ = - Backend.repo().query!("SELECT set_config('statement_timeout', $1, true)", [ - Integer.to_string(timeout_ms) - ]) - - fun.() - end) - - result - - :sqlite -> - fun.() + if Hyper.Img.Db.Config.sqlite?() do + fun.() + else + {:ok, result} = + transaction(fn -> + _ = + query!("SELECT set_config('statement_timeout', $1, true)", [ + Integer.to_string(timeout_ms) + ]) + + fun.() + end) + + result end end end diff --git a/lib/hyper/img/db/repo/postgres.ex b/lib/hyper/img/db/repo/postgres.ex deleted file mode 100644 index 7d1a586..0000000 --- a/lib/hyper/img/db/repo/postgres.ex +++ /dev/null @@ -1,12 +0,0 @@ -defmodule Hyper.Img.Db.Repo.Postgres do - @moduledoc """ - Postgres-backed image-graph repository. - - The cluster-safe default. Reached through the `Hyper.Img.Db.Repo` facade; - not called directly by application code. - """ - - use Ecto.Repo, - otp_app: :hyper, - adapter: Ecto.Adapters.Postgres -end diff --git a/lib/hyper/img/db/repo/sqlite.ex b/lib/hyper/img/db/repo/sqlite.ex deleted file mode 100644 index f950fcd..0000000 --- a/lib/hyper/img/db/repo/sqlite.ex +++ /dev/null @@ -1,16 +0,0 @@ -defmodule Hyper.Img.Db.Repo.Sqlite do - @moduledoc """ - SQLite-backed image-graph repository. - - Single-node only: a single-writer file database cannot be shared safely - across cluster nodes. `Hyper.Img.Db.SingleNodeGuard` enforces this at - runtime. Reached through the `Hyper.Img.Db.Repo` facade. - - Shares the `priv/repo/migrations` directory with the Postgres repo; the - image-graph DDL contains no Postgres-specific constructs. - """ - - use Ecto.Repo, - otp_app: :hyper, - adapter: Ecto.Adapters.SQLite3 -end diff --git a/lib/hyper/img/db/single_node_guard.ex b/lib/hyper/img/db/single_node_guard.ex deleted file mode 100644 index a954d2d..0000000 --- a/lib/hyper/img/db/single_node_guard.ex +++ /dev/null @@ -1,63 +0,0 @@ -defmodule Hyper.Img.Db.SingleNodeGuard do - @moduledoc """ - Enforces that the SQLite image-graph backend only ever runs on a node with - no connected peers. - - A single-writer file database cannot be shared safely across cluster nodes, - so this guard: - - * refuses to boot (stops with `{:multi_node_sqlite, peers}`) if peers are - already connected when it starts, and - * halts the node via `System.stop/1` if a peer joins while it is running, - preventing concurrent writers from corrupting the database. - - Only started when `Hyper.Img.Db.Backend.sqlite?/0` is true. - """ - - use GenServer - - require Logger - - @spec start_link(keyword()) :: GenServer.on_start() - def start_link(opts) do - peers_fun = Keyword.get(opts, :peers, &Node.list/0) - GenServer.start_link(__MODULE__, peers_fun, name: __MODULE__) - end - - @impl true - def init(peers_fun) when is_function(peers_fun, 0) do - _ = :net_kernel.monitor_nodes(true) - - case peers_fun.() do - [] -> - Logger.info("img db: SQLite backend active; single-node guard armed") - {:ok, %{peers: peers_fun}} - - peers -> - Logger.critical( - "img db: SQLite backend is configured but the cluster already has peers " <> - "(#{inspect(peers)}). SQLite cannot be shared across nodes; refusing to start." - ) - - {:stop, {:multi_node_sqlite, peers}} - end - end - - @impl true - def handle_info({:nodeup, node}, state) do - Logger.critical( - "img db: SQLite backend active but peer #{inspect(node)} joined the cluster. " <> - "SQLite is single-writer and cannot be shared safely. " <> - "Halting to protect data integrity." - ) - - System.stop(1) - {:noreply, state} - end - - @impl true - def handle_info({:nodedown, _node}, state), do: {:noreply, state} - - @impl true - def handle_info(_msg, state), do: {:noreply, state} -end diff --git a/lib/hyper/node/img/server.ex b/lib/hyper/node/img/server.ex index f84ab8d..5f9cb1d 100644 --- a/lib/hyper/node/img/server.ex +++ b/lib/hyper/node/img/server.ex @@ -141,7 +141,6 @@ defmodule Hyper.Node.Img.Server do defp resolve_layers(img_id) do img_id |> Db.Image.resolve_chain() - |> Db.Repo.all() |> Enum.map(& &1.id) end diff --git a/lib/hyper/single_node_guard.ex b/lib/hyper/single_node_guard.ex new file mode 100644 index 0000000..8117b47 --- /dev/null +++ b/lib/hyper/single_node_guard.ex @@ -0,0 +1,65 @@ +defmodule Hyper.SingleNodeGuard do + @moduledoc """ + Enforces that this node is the only node in the cluster. + + Some subsystems are correct only when no peers are present (for example, a + single-writer local datastore). Start this guard whenever such a subsystem is + active. It: + + * refuses to start if any peers are already connected, and + * halts the node via `System.stop/1` if a peer later joins, + + preventing the unsafe multi-node configuration from continuing. + + The guard is deliberately decoupled from any particular subsystem - the + decision of whether to start it belongs to the application supervisor. `init/1` + accepts a 0-arity `:peers` function (default `&Node.list/0`) so the decision + logic can be exercised without a live cluster. + """ + + use GenServer + + require Logger + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + peers_fun = Keyword.get(opts, :peers, &Node.list/0) + GenServer.start_link(__MODULE__, peers_fun, name: __MODULE__) + end + + @impl true + def init(peers_fun) when is_function(peers_fun, 0) do + _ = :net_kernel.monitor_nodes(true) + + case peers_fun.() do + [] -> + Logger.info("single-node guard armed; this node must remain the only node") + {:ok, %{peers: peers_fun}} + + peers -> + Logger.critical( + "single-node guard: cluster already has peers (#{inspect(peers)}); " <> + "this node requires single-node operation. Refusing to start." + ) + + {:stop, {:multi_node, peers}} + end + end + + @impl true + def handle_info({:nodeup, node}, state) do + Logger.critical( + "single-node guard: peer #{inspect(node)} joined the cluster; " <> + "this node requires single-node operation. Halting to protect integrity." + ) + + System.stop(1) + {:noreply, state} + end + + @impl true + def handle_info({:nodedown, _node}, state), do: {:noreply, state} + + @impl true + def handle_info(_msg, state), do: {:noreply, state} +end diff --git a/mix.exs b/mix.exs index e24b9af..31b1e63 100644 --- a/mix.exs +++ b/mix.exs @@ -48,7 +48,7 @@ defmodule Hyper.MixProject do mod: {Hyper.Application, []}, # ecto_repos lives here (not config.exs) since it's well-known and # compile-time fixed. Mix's ecto.* tasks read it from the app env. - env: [ecto_repos: [Hyper.Img.Db.Repo.Postgres]] + env: [ecto_repos: [Hyper.Img.Db.Repo]] ] end diff --git a/test/hyper/img/db/backend_test.exs b/test/hyper/img/db/backend_test.exs deleted file mode 100644 index 9c138c1..0000000 --- a/test/hyper/img/db/backend_test.exs +++ /dev/null @@ -1,25 +0,0 @@ -defmodule Hyper.Img.Db.BackendTest do - use ExUnit.Case, async: false - - alias Hyper.Img.Db.Backend - - setup do - original = Application.get_env(:hyper, Hyper.Img.Db) - on_exit(fn -> Application.put_env(:hyper, Hyper.Img.Db, original) end) - :ok - end - - test "defaults to the Postgres repo" do - Application.put_env(:hyper, Hyper.Img.Db, []) - assert Backend.selected() == :postgres - assert Backend.repo() == Hyper.Img.Db.Repo.Postgres - refute Backend.sqlite?() - end - - test "resolves the SQLite repo when configured" do - Application.put_env(:hyper, Hyper.Img.Db, backend: :sqlite) - assert Backend.selected() == :sqlite - assert Backend.repo() == Hyper.Img.Db.Repo.Sqlite - assert Backend.sqlite?() - end -end diff --git a/test/hyper/img/db/single_node_guard_test.exs b/test/hyper/img/db/single_node_guard_test.exs deleted file mode 100644 index 1e4f92d..0000000 --- a/test/hyper/img/db/single_node_guard_test.exs +++ /dev/null @@ -1,14 +0,0 @@ -defmodule Hyper.Img.Db.SingleNodeGuardTest do - use ExUnit.Case, async: true - - alias Hyper.Img.Db.SingleNodeGuard - - test "arms when no peers are connected" do - assert {:ok, _state} = SingleNodeGuard.init(fn -> [] end) - end - - test "refuses to start when peers are already connected" do - peers = [:"b@127.0.0.1", :"c@127.0.0.1"] - assert {:stop, {:multi_node_sqlite, ^peers}} = SingleNodeGuard.init(fn -> peers end) - end -end diff --git a/test/hyper/img/db/sqlite_backend_test.exs b/test/hyper/img/db/sqlite_backend_test.exs deleted file mode 100644 index 40d418d..0000000 --- a/test/hyper/img/db/sqlite_backend_test.exs +++ /dev/null @@ -1,114 +0,0 @@ -defmodule Hyper.Img.Db.SqliteBackendTest do - @moduledoc """ - Proves the image-graph queries that depend on database-specific SQL behave - correctly on SQLite: the lease upsert, the GC prune (correlated subquery + - RETURNING), and chain resolution. - """ - - use ExUnit.Case, async: false - - import Ecto.Query - - alias Hyper.Img.Db.{Blob, Image, ImageLayer, Lease} - alias Hyper.Img.Db.Repo.Sqlite, as: Repo - - setup do - # `mix test --no-start` skips the :hyper application (which would try to - # connect to Postgres), but :ecto and :ecto_sql must run so - # Ecto.Repo.Registry and Ecto.MigratorSupervisor are alive. - Application.ensure_all_started(:ecto_sql) - - dir = System.tmp_dir!() - db = Path.join(dir, "hyper_sqlite_test_#{System.unique_integer([:positive])}.db") - - pid = - start_supervised!( - {Repo, database: db, journal_mode: :wal, pool_size: 1, busy_timeout: 5_000} - ) - - Ecto.Migrator.run(Repo, Path.join([File.cwd!(), "priv", "repo", "migrations"]), :up, - all: true, - log: false - ) - - on_exit(fn -> - File.rm(db) - File.rm(db <> "-wal") - File.rm(db <> "-shm") - end) - - %{repo: pid, db: db} - end - - defp now, do: DateTime.utc_now() - defp ago(seconds), do: DateTime.add(now(), -seconds, :second) - - test "Image.resolve_chain returns blobs ordered by layer position" do - Repo.insert!(%Blob{id: "base", kind: :base, state: :present, size: 100, inserted_at: now()}) - Repo.insert!(%Blob{id: "delta", kind: :delta, state: :present, size: 50, inserted_at: now()}) - Repo.insert!(%Image{id: "img1", label: "test", inserted_at: now()}) - Repo.insert!(%ImageLayer{image_id: "img1", blob_id: "base", position: 0}) - Repo.insert!(%ImageLayer{image_id: "img1", blob_id: "delta", position: 1}) - - chain = Repo.all(Image.resolve_chain("img1")) - - assert Enum.map(chain, & &1.id) == ["base", "delta"] - end - - test "Lease.bump upserts on the (node_id, vm_id) conflict target" do - Repo.insert!(%Image{id: "img2", inserted_at: now()}) - - {:ok, first} = Lease.bump_with_repo(Repo, "img2", "vm-a", Unit.Time.s(60)) - {:ok, second} = Lease.bump_with_repo(Repo, "img2", "vm-a", Unit.Time.s(120)) - - # The second call targets the same (node_id, vm_id) key, so the ON CONFLICT - # DO UPDATE fires: exactly one row persists and the expiry is further out. - # - # Note: ecto_sqlite3 builds the returned struct from the changeset rather than - # reading back the stored row after a conflict update, so `second.id` carries a - # freshly-generated UUID instead of the original lease's id. - - # returned struct carries the new TTL (built from the changeset, not read back) - assert DateTime.compare(second.expires_at, first.expires_at) == :gt - - # exactly one row exists (proves upsert updated-in-place, not inserted) - assert Repo.aggregate(Lease, :count) == 1 - - # re-read the stored row and verify its expiry was actually bumped (proves DB update persisted) - stored = Repo.one!(Lease) - assert DateTime.compare(stored.expires_at, first.expires_at) == :gt - end - - test "GC prune deletes only unreferenced blobs and returns their sizes via RETURNING" do - cutoff = now() - - # Referenced by an image layer -> must survive. - Repo.insert!(%Blob{id: "kept", kind: :base, state: :present, size: 100, inserted_at: ago(60)}) - Repo.insert!(%Image{id: "img3", inserted_at: now()}) - Repo.insert!(%ImageLayer{image_id: "img3", blob_id: "kept", position: 0}) - - # Unreferenced and older than the cutoff -> must be pruned. - Repo.insert!(%Blob{ - id: "orphan", - kind: :delta, - state: :present, - size: 42, - inserted_at: ago(60) - }) - - query = - from b in Blob, - as: :b, - where: - b.state == :present and b.inserted_at < ^cutoff and - not exists(from il in ImageLayer, where: il.blob_id == parent_as(:b).id), - select: b.size - - {count, sizes} = Repo.delete_all(query) - - assert count == 1 - assert sizes == [42] - assert Repo.get(Blob, "kept") - refute Repo.get(Blob, "orphan") - end -end