diff --git a/README.md b/README.md index f3cafc8..8c0a98f 100644 --- a/README.md +++ b/README.md @@ -43,8 +43,10 @@ deploying and integrating Hyper. - **Telemetry** -- Hyper is mostly fully instrumented with [Otel](https://opentelemetry.io/) so you get full traces on if/why things are not performing as expected. -- **Minimal stack** -- Hyper makes very few assumptions on your cloud, and only - requires a Postgres database as a minimal external dependency. +- **Minimal stack** -- Hyper makes very few assumptions on your cloud. For + multi-node deployments a Postgres database is required; single-node + deployments can use the built-in SQLite backend instead (see the + [intro](docs/cookbook/intro.md) for details). - **🔮 BEAM-native** -- Hyper is written on the [BEAM](https://en.wikipedia.org/wiki/BEAM_(Erlang_virtual_machine)). This means that fault-tolerance is built into the virtual machine, and allows you diff --git a/config/config.exs b/config/config.exs index 4cf3f19..37e9ac1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -36,6 +36,13 @@ if config_env() == :test do config :libcluster, topologies: [] end +# Image-graph storage backend. :postgres (default, cluster-safe) or :sqlite +# (single-node only). See Hyper.Img.Db.Config and docs/cookbook/intro.md. +config :hyper, Hyper.Img.Db, backend: :postgres + +# Connection settings for the configured backend. The block below is for the +# default Postgres backend; for SQLite, configure this repo for SQLite instead +# (see docs/cookbook/intro.md). config :hyper, Hyper.Img.Db.Repo, database: "hyper_dev", username: "postgres", diff --git a/docs/cookbook/intro.md b/docs/cookbook/intro.md index 1459f74..4437661 100644 --- a/docs/cookbook/intro.md +++ b/docs/cookbook/intro.md @@ -41,3 +41,54 @@ config :hyper, uid_gid_range: {900_000, 999_999}, layer_dir: "/srv/hyper/layers" ``` + +### Storage backends + +`Hyper` keeps its image graph (blobs, images, image-layers, leases) in a +metadata database via `Hyper.Img.Db.Repo`. Two backends are available, chosen +in your config: + +```elixir +# cluster-safe default; required for any multi-node deployment +config :hyper, Hyper.Img.Db, backend: :postgres + +# single-node deployments only +config :hyper, Hyper.Img.Db, backend: :sqlite +``` + +Connection settings live under `config :hyper, Hyper.Img.Db.Repo`. For Postgres +that is the usual `database`/`username`/`password`/`hostname`. For SQLite, +point it at a file and use the SQLite adapter options, e.g.: + +```elixir +config :hyper, Hyper.Img.Db, backend: :sqlite + +config :hyper, Hyper.Img.Db.Repo, + database: "/srv/hyper/hyper.db", + pool_size: 1, + journal_mode: :wal, + busy_timeout: 5_000, + binary_id_type: :string, + datetime_type: :iso8601 +``` + +The backend is resolved at compile time, so changing it takes effect on the +next build. Apply migrations the same way for either backend: + +```sh +mix ecto.migrate +``` + +> #### SQLite is single-node only +> +> SQLite is a single-writer file database and **must not** be shared across +> cluster nodes. When the SQLite backend is configured, `Hyper` starts +> `Hyper.SingleNodeGuard`, which refuses to boot if peers are already connected +> and halts the node (via `System.stop/1`) if a peer joins later -- protecting +> the file from the concurrent writers that would corrupt it. +> +> One behavioural caveat: under SQLite, an `ON CONFLICT DO UPDATE` upsert +> returns a struct carrying a freshly-generated UUID rather than the stored +> row's `id`. `Hyper.Img.Db.Lease.bump/3` is the only such upsert and its +> callers don't read the returned `id`, so there is no live bug -- but use +> Postgres if you need reliable round-trip identity on bumped leases. diff --git a/lib/hyper/application.ex b/lib/hyper/application.ex index 70e30f0..abc201f 100644 --- a/lib/hyper/application.ex +++ b/lib/hyper/application.ex @@ -9,25 +9,36 @@ defmodule Hyper.Application do # so it is already running before this supervisor boots. # # Bridge Ecto's query telemetry into OpenTelemetry spans. The prefix matches - # the repo's default telemetry_prefix (its module path, underscored). + # Hyper.Img.Db.Repo's default telemetry_prefix. _ = OpentelemetryEcto.setup([:hyper, :img, :db, :repo]) topologies = Application.get_env(:libcluster, :topologies, []) - children = [ - # The image-lineage database. Started first so the rest of the node can - # query images/leases on boot. - Hyper.Img.Db.Repo, - # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` - # can discover peer nodes. Gossip strategy in dev - see config/config.exs. - {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, - # Cluster-wide CRDTs (VM routing + budget telemetry). Must precede - # Hyper.Node so VM registrations and budget advertisements have their - # registries on boot. - Hyper.Cluster, - Hyper.Node - ] + children = + [ + # The image-lineage database. Started first so the rest of the node can + # query images/leases on boot. + Hyper.Img.Db.Repo, + # Form the BEAM cluster (Distributed Erlang) so Horde's `members: :auto` + # can discover peer nodes. Gossip strategy in dev - see config/config.exs. + {Cluster.Supervisor, [topologies, [name: Hyper.ClusterSupervisor]]}, + # Cluster-wide CRDTs (VM routing + budget telemetry). Must precede + # Hyper.Node so VM registrations and budget advertisements have their + # registries on boot. + Hyper.Cluster, + Hyper.Node + ] ++ single_node_guard_children() Supervisor.start_link(children, strategy: :one_for_one, name: Hyper.Supervisor) end + + # The SQLite backend is a single-writer file database; it is only safe on a + # node with no peers. Guard that invariant when SQLite is configured. + defp single_node_guard_children do + if Hyper.Img.Db.Config.sqlite?() do + [Hyper.SingleNodeGuard] + else + [] + end + end end diff --git a/lib/hyper/img/db/config.ex b/lib/hyper/img/db/config.ex new file mode 100644 index 0000000..b76b2c3 --- /dev/null +++ b/lib/hyper/img/db/config.ex @@ -0,0 +1,36 @@ +defmodule Hyper.Img.Db.Config do + @moduledoc """ + Single place to configure the image-graph database. + + Choose the backend in your config: + + # cluster-safe default + config :hyper, Hyper.Img.Db, backend: :postgres + + # single-node deployments only + config :hyper, Hyper.Img.Db, backend: :sqlite + + The backend is resolved at compile time (the Ecto adapter is fixed when + `Hyper.Img.Db.Repo` is compiled), so changing it takes effect on the next + build. Connection settings live under `config :hyper, Hyper.Img.Db.Repo`. + """ + + @backend Application.compile_env(:hyper, [Hyper.Img.Db, :backend], :postgres) + + @adapters %{ + postgres: Ecto.Adapters.Postgres, + sqlite: Ecto.Adapters.SQLite3 + } + + @doc "The configured backend (`:postgres` | `:sqlite`)." + @spec backend() :: :postgres | :sqlite + def backend, do: @backend + + @doc "The Ecto adapter module for the configured backend." + @spec adapter() :: module() + def adapter, do: Map.fetch!(@adapters, @backend) + + @doc "True when the SQLite backend is configured (single-node only)." + @spec sqlite?() :: boolean() + def sqlite?, do: @backend == :sqlite +end diff --git a/lib/hyper/img/db/gc.ex b/lib/hyper/img/db/gc.ex index e4d5995..6f06a73 100644 --- a/lib/hyper/img/db/gc.ex +++ b/lib/hyper/img/db/gc.ex @@ -100,7 +100,7 @@ defmodule Hyper.Img.Db.Gc do rescue # Only swallow database unavailability (incl. statement_timeout aborts) # and retry; let any other exception crash so a real bug surfaces. - e in [Postgrex.Error, DBConnection.ConnectionError] -> + e in [Postgrex.Error, Exqlite.Error, DBConnection.ConnectionError] -> Logger.warning( "layer gc: database unavailable during sweep (#{Exception.message(e)}); retrying" ) @@ -139,7 +139,12 @@ defmodule Hyper.Img.Db.Gc do @spec scan_one_batch(t()) :: t() defp scan_one_batch(%__MODULE__{sweep: sweep} = state) do limit = state.config.batch_size - batch = with_low_priority(state, fn -> Blob.present_after(sweep.cursor, limit) end) + + batch = + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Blob.present_after(sweep.cursor, limit) + end) + {sweep, missing} = Sweep.absorb(sweep, batch, &presence/1) {pruned, pruned_bytes, dangling} = maybe_prune(state, missing) @@ -219,7 +224,11 @@ defmodule Hyper.Img.Db.Gc do not exists(from il in ImageLayer, where: il.blob_id == parent_as(:b).id), select: b.size - {count, sizes} = with_low_priority(state, fn -> Repo.delete_all(query) end) + {count, sizes} = + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Repo.delete_all(query) + end) + {count, Enum.sum(sizes)} end @@ -242,26 +251,11 @@ defmodule Hyper.Img.Db.Gc do @spec referenced_ids(t(), [String.t()]) :: MapSet.t(String.t()) defp referenced_ids(state, ids) do query = from il in ImageLayer, where: il.blob_id in ^ids, distinct: true, select: il.blob_id - state |> with_low_priority(fn -> Repo.all(query) end) |> MapSet.new() - end - - # Run a DB operation at low priority: in a transaction whose statement_timeout - # is capped, so it can never pin a backend and yields under contention. - @spec with_low_priority(t(), (-> result)) :: result when result: var - defp with_low_priority(state, fun) do - timeout = Unit.Time.as_ms(state.config.statement_timeout) - - {:ok, result} = - Repo.transaction(fn -> - _ = - Repo.query!("SELECT set_config('statement_timeout', $1, true)", [ - Integer.to_string(timeout) - ]) - - fun.() - end) - result + Repo.with_low_priority(Unit.Time.as_ms(state.config.statement_timeout), fn -> + Repo.all(query) + end) + |> MapSet.new() end # Shared-medium presence probe injected into the pure Sweep core. Distinguishes diff --git a/lib/hyper/img/db/repo.ex b/lib/hyper/img/db/repo.ex index a0e8cb8..9184bc9 100644 --- a/lib/hyper/img/db/repo.ex +++ b/lib/hyper/img/db/repo.ex @@ -2,18 +2,46 @@ defmodule Hyper.Img.Db.Repo do @moduledoc """ Global database of all known layers, and how they relate to each other. - At the current stage of this project, we use postgres to track images and how they relate. - Note that images can build on top of images. - - This repo is responsible for answering the questions: + Tracks images and how they relate (images can build on top of images), and + answers: - Given an image id, is it a base image or a layered image? - If an image is a layered image, what are the layers to build it? - - Who is currently actively holding onto an image? This can mean, potentially, in the case of - layered images: - - Who is holding onto the image or any of its children? + - Who is currently actively holding onto an image (or any of its children)? + + The backend (PostgreSQL or SQLite) is chosen by `Hyper.Img.Db.Config`; see + that module to configure it. """ use Ecto.Repo, otp_app: :hyper, - adapter: Ecto.Adapters.Postgres + adapter: Hyper.Img.Db.Config.adapter() + + @doc """ + Runs `fun` time-bounded and low-priority where the backend supports it. + + Postgres: wraps `fun` in a transaction with a transaction-local + `statement_timeout`, so a slow sweep cannot pin a connection indefinitely. + SQLite: single-writer with a connection `busy_timeout` and no per-statement + timeout, so `fun` is run directly. + + Returns the value of `fun`. + """ + @spec with_low_priority(non_neg_integer(), (-> result)) :: result when result: var + def with_low_priority(timeout_ms, fun) when is_integer(timeout_ms) and is_function(fun, 0) do + if Hyper.Img.Db.Config.sqlite?() do + fun.() + else + {:ok, result} = + transaction(fn -> + _ = + query!("SELECT set_config('statement_timeout', $1, true)", [ + Integer.to_string(timeout_ms) + ]) + + fun.() + end) + + result + end + end end diff --git a/lib/hyper/single_node_guard.ex b/lib/hyper/single_node_guard.ex new file mode 100644 index 0000000..8117b47 --- /dev/null +++ b/lib/hyper/single_node_guard.ex @@ -0,0 +1,65 @@ +defmodule Hyper.SingleNodeGuard do + @moduledoc """ + Enforces that this node is the only node in the cluster. + + Some subsystems are correct only when no peers are present (for example, a + single-writer local datastore). Start this guard whenever such a subsystem is + active. It: + + * refuses to start if any peers are already connected, and + * halts the node via `System.stop/1` if a peer later joins, + + preventing the unsafe multi-node configuration from continuing. + + The guard is deliberately decoupled from any particular subsystem - the + decision of whether to start it belongs to the application supervisor. `init/1` + accepts a 0-arity `:peers` function (default `&Node.list/0`) so the decision + logic can be exercised without a live cluster. + """ + + use GenServer + + require Logger + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts) do + peers_fun = Keyword.get(opts, :peers, &Node.list/0) + GenServer.start_link(__MODULE__, peers_fun, name: __MODULE__) + end + + @impl true + def init(peers_fun) when is_function(peers_fun, 0) do + _ = :net_kernel.monitor_nodes(true) + + case peers_fun.() do + [] -> + Logger.info("single-node guard armed; this node must remain the only node") + {:ok, %{peers: peers_fun}} + + peers -> + Logger.critical( + "single-node guard: cluster already has peers (#{inspect(peers)}); " <> + "this node requires single-node operation. Refusing to start." + ) + + {:stop, {:multi_node, peers}} + end + end + + @impl true + def handle_info({:nodeup, node}, state) do + Logger.critical( + "single-node guard: peer #{inspect(node)} joined the cluster; " <> + "this node requires single-node operation. Halting to protect integrity." + ) + + System.stop(1) + {:noreply, state} + end + + @impl true + def handle_info({:nodedown, _node}, state), do: {:noreply, state} + + @impl true + def handle_info(_msg, state), do: {:noreply, state} +end diff --git a/mix.exs b/mix.exs index e8ce13b..31b1e63 100644 --- a/mix.exs +++ b/mix.exs @@ -60,6 +60,7 @@ defmodule Hyper.MixProject do {:dialyxir, "~> 1.4", only: [:dev], runtime: false}, {:ex_doc, "~> 0.34", only: :dev, runtime: false}, {:ecto_sql, "~> 3.13"}, + {:ecto_sqlite3, "~> 0.17"}, {:horde, "~> 0.9"}, {:jason, "~> 1.4"}, {:libcluster, "~> 3.3"}, diff --git a/mix.lock b/mix.lock index e6794db..8f4dac6 100644 --- a/mix.lock +++ b/mix.lock @@ -1,6 +1,7 @@ %{ "acceptor_pool": {:hex, :acceptor_pool, "1.0.1", "d88c2e8a0be9216cf513fbcd3e5a4beb36bee3ff4168e85d6152c6f899359cdb", [:rebar3], [], "hexpm", "f172f3d74513e8edd445c257d596fc84dbdd56d2c6fa287434269648ae5a421e"}, "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"}, + "cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"}, "chatterbox": {:hex, :ts_chatterbox, "0.15.1", "5cac4d15dd7ad61fc3c4415ce4826fc563d4643dee897a558ec4ea0b1c835c9c", [:rebar3], [{:hpack, "~> 0.3.0", [hex: :hpack_erl, repo: "hexpm", optional: false]}], "hexpm", "4f75b91451338bc0da5f52f3480fa6ef6e3a2aeecfc33686d6b3d0a0948f31aa"}, "credo": {:hex, :credo, "1.7.19", "cc52129665fc7c15143d47838fda0f9cd6dac9ceced7bf4da6f85fcbfe64b12a", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "2d8bc95d5a7bb99dd2613621d4f08c6a3575c3fd4b62e6a2b48a100352a557b8"}, "ctx": {:hex, :ctx, "0.6.0", "8ff88b70e6400c4df90142e7f130625b82086077a45364a78d208ed3ed53c7fe", [:rebar3], [], "hexpm", "a14ed2d1b67723dbebbe423b28d7615eb0bdcba6ff28f2d1f1b0a7e1d4aa5fc2"}, @@ -12,10 +13,12 @@ "earmark_parser": {:hex, :earmark_parser, "1.4.45", "cba8369ab2a1342e419bc2760eec731b17be828941dcf494045d44766227e1d5", [:mix], [], "hexpm", "d3ec045bf122965db20c0bdb420e19ee1415843135327124918473feb4b328e8"}, "ecto": {:hex, :ecto, "3.13.6", "352135b474f91d1ab99a1b502171d207e9db60421c9e3d0ecab4c7ab96b24d14", [:mix], [{:decimal, "~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "8afa059bc16cd2c94739ec0a11e3e5df69d828125119109bef35f20a21a76af2"}, "ecto_sql": {:hex, :ecto_sql, "3.13.5", "2f8282b2ad97bf0f0d3217ea0a6fff320ead9e2f8770f810141189d182dc304e", [:mix], [{:db_connection, "~> 2.4.1 or ~> 2.5", [hex: :db_connection, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:myxql, "~> 0.7", [hex: :myxql, repo: "hexpm", optional: true]}, {:postgrex, "~> 0.19 or ~> 1.0", [hex: :postgrex, repo: "hexpm", optional: true]}, {:tds, "~> 2.1.1 or ~> 2.2", [hex: :tds, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.0 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "aa36751f4e6a2b56ae79efb0e088042e010ff4935fc8684e74c23b1f49e25fdc"}, + "ecto_sqlite3": {:hex, :ecto_sqlite3, "0.23.0", "79da75815627582f081f00d418c130c4cf587672b720b54e7a8798c6d46b5415", [:mix], [{:decimal, "~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ecto, "~> 3.13.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:ecto_sql, "~> 3.13.0", [hex: :ecto_sql, repo: "hexpm", optional: false]}, {:exqlite, "~> 0.22", [hex: :exqlite, repo: "hexpm", optional: false]}], "hexpm", "e97041bcec746ed525df7d9ad996fbae3b0660767f99fbe9e9b58d6208729703"}, "elixir_make": {:hex, :elixir_make, "0.10.0", "16577e2583a79bb79237bbff349619ef5d80afffc07eac6e4faf0d00e2ddaf7d", [:mix], [], "hexpm", "dc1f09fb7fa68866b886abd5f0f3c83553b1a19a52359a899e92af1bb3b31982"}, "erlex": {:hex, :erlex, "0.2.9", "7debbbaa9f4f368b8cd648983e0f1d7963028508e9c59e9d4ed504e94ef52a55", [:mix], [], "hexpm", "8cfffc0ec7159e6d73de2ab28a588064de80f88b2798d5cbe4482cbbc200178b"}, "ex_doc": {:hex, :ex_doc, "0.40.3", "4a972ffe64bc07dc605af487e98fc19b72a4185f55ca031b94c0552d6071c1d9", [:mix], [{:earmark_parser, "~> 1.4.44", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.0", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14 or ~> 1.0", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1 or ~> 1.0", [hex: :makeup_erlang, repo: "hexpm", optional: false]}, {:makeup_html, ">= 0.1.0", [hex: :makeup_html, repo: "hexpm", optional: true]}], "hexpm", "2756e357742fecd9749b489b85d67c9ce99c465f2e75728d9e6dc8d704b973de"}, "excoveralls": {:hex, :excoveralls, "0.18.5", "e229d0a65982613332ec30f07940038fe451a2e5b29bce2a5022165f0c9b157e", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "523fe8a15603f86d64852aab2abe8ddbd78e68579c8525ae765facc5eae01562"}, + "exqlite": {:hex, :exqlite, "0.37.0", "701e7e02679e8c1bb6da331ea93d83b481c714b0831e82e2f8a73375b3d93a9e", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:db_connection, "~> 2.1", [hex: :db_connection, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.8", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:table, "~> 0.1.0", [hex: :table, repo: "hexpm", optional: true]}], "hexpm", "a44816dd0d234fba68c47a3609af61d306d24ef517a89bfaee4d6a811792d913"}, "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"}, "finch": {:hex, :finch, "0.23.0", "e3f9287ac25a8832f848b144c2b57346aac65b205e2e0629a52adfe6507fd837", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.8", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "80e58d3f936f57e3fdf404f83a3642897ae6d9fb642934e46da4d8fe761b99d5"}, "gproc": {:hex, :gproc, "0.9.1", "f1df0364423539cf0b80e8201c8b1839e229e5f9b3ccb944c5834626998f5b8c", [:rebar3], [], "hexpm", "905088e32e72127ed9466f0bac0d8e65704ca5e73ee5a62cb073c3117916d507"},