From 0ee13872b4a4ed4aa4f88c5bf0636157b1e578d1 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 26 Jun 2026 13:26:34 -0400 Subject: [PATCH 1/4] feat(storage)!: resolve repositories to per-keypair Tigris buckets Repositories were served from one shared bucket chrooted by a raw, variable-depth path. Every git request now resolves through a pluggable repofs.Resolver hook keyed on a validated {orgID}/{repoName} path (the .git suffix is stripped). The production resolver (internal/tigrisfs) treats the HTTP Basic-auth pair as a Tigris keypair, builds and caches one storage.Client per keypair, and gives each repository its own bucket (objgit-), created on the push path only. Smart-HTTP now uses ServeMux wildcards (paths are fixed-depth), threads the credential into resolution, and logs every authorization decision; auth.Operation/Decision gain String() and s3fs.S3Client is exported so the resolver can cache the hardened client. BREAKING CHANGE: repository URLs must be {orgID}/{repoName}; repositories moved from one shared bucket to a bucket per repo, so repositories created under the old layout no longer resolve. Assisted-by: Claude Opus 4.8 via Claude Code Signed-off-by: Xe Iaso --- cmd/objgitd/example_hook_test.go | 8 +- cmd/objgitd/git_protocol.go | 58 +++++--- cmd/objgitd/git_protocol_test.go | 53 ++++---- cmd/objgitd/head_test.go | 20 +-- cmd/objgitd/hooks_test.go | 14 +- cmd/objgitd/http.go | 161 ++++++++++++++-------- cmd/objgitd/http_test.go | 123 +++++++++++++---- cmd/objgitd/main.go | 18 ++- cmd/objgitd/ssh.go | 48 ++++--- cmd/objgitd/ssh_test.go | 15 ++- internal/auth/auth.go | 26 ++++ internal/repofs/repofs.go | 84 ++++++++++++ internal/repofs/repofs_test.go | 105 +++++++++++++++ internal/s3fs/filesystem.go | 5 + internal/tigrisfs/tigrisfs.go | 205 +++++++++++++++++++++++++++++ internal/tigrisfs/tigrisfs_test.go | 159 ++++++++++++++++++++++ 16 files changed, 930 insertions(+), 172 deletions(-) create mode 100644 internal/repofs/repofs.go create mode 100644 internal/repofs/repofs_test.go create mode 100644 internal/tigrisfs/tigrisfs.go create mode 100644 internal/tigrisfs/tigrisfs_test.go diff --git a/cmd/objgitd/example_hook_test.go b/cmd/objgitd/example_hook_test.go index fe58d87..199368a 100644 --- a/cmd/objgitd/example_hook_test.go +++ b/cmd/objgitd/example_hook_test.go @@ -12,8 +12,8 @@ import ( "time" "github.com/go-git/go-billy/v6/memfs" - "github.com/go-git/go-git/v6/plumbing/transport" "github.com/tigrisdata/objgit/internal/auth" + "github.com/tigrisdata/objgit/internal/repofs" ) // TestExampleHookRuns pushes the repository's own example hook @@ -37,8 +37,8 @@ func TestExampleHookRuns(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, authz: auth.AllowAnonymous{AllowWrite: true}, allowHooks: true, hookTimeout: 30 * time.Second, @@ -51,7 +51,7 @@ func TestExampleHookRuns(t *testing.T) { } go func() { _ = d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/example.git" + remote := "git://" + ln.Addr().String() + "/acme/example.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") runGit(t, work, "config", "user.email", "test@example.com") diff --git a/cmd/objgitd/git_protocol.go b/cmd/objgitd/git_protocol.go index f72c8ef..3b2518b 100644 --- a/cmd/objgitd/git_protocol.go +++ b/cmd/objgitd/git_protocol.go @@ -17,6 +17,7 @@ import ( "github.com/go-git/go-git/v6/storage/filesystem" "github.com/tigrisdata/objgit/internal/auth" "github.com/tigrisdata/objgit/internal/metrics" + "github.com/tigrisdata/objgit/internal/repofs" ) // handshakeTimeout bounds how long a client has to send its git-proto-request. @@ -43,28 +44,42 @@ func (d *daemon) authorize(ctx context.Context, req auth.Request) auth.Decision return dec } -// daemon serves the git:// (TCP) protocol out of a billy filesystem. +// daemon serves the git protocols out of billy filesystems resolved per repo. type daemon struct { - fs billy.Filesystem - loader transport.Loader - authz auth.Authorizer + // sysFS holds daemon-level state that is not scoped to a repository (the SSH + // host key); repository storage is resolved per request via resolver. + sysFS billy.Filesystem + resolver repofs.Resolver + authz auth.Authorizer // allowHooks gates running .objgit/hooks/receive-pack after a push. allowHooks bool hookTimeout time.Duration } -// load opens the storer for repoPath and heals a dangling HEAD before returning -// it (see ensureHEAD). It preserves the loader's error verbatim — notably +// storerFor returns the bare-repository storer rooted at fs, or +// transport.ErrRepositoryNotFound when no repository exists there. It reuses +// go-git's own bare-repo detection (a "config" file at the root) by loading the +// repository at the filesystem root. +func storerFor(fs billy.Filesystem) (storage.Storer, error) { + return transport.NewFilesystemLoader(fs, false).Load(&url.URL{Path: "/"}) +} + +// load resolves the storer for ref and heals a dangling HEAD before returning it +// (see ensureHEAD). It preserves storerFor's error verbatim — notably // transport.ErrRepositoryNotFound, which callers map to a 404 — and treats a // heal failure as non-fatal so a clone is never broken by a transient HEAD write. -func (d *daemon) load(repoPath string) (storage.Storer, error) { - st, err := d.loader.Load(&url.URL{Path: repoPath}) +func (d *daemon) load(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential) (storage.Storer, error) { + fs, err := d.resolver.Resolve(ctx, ref, cred, false) + if err != nil { + return nil, err + } + st, err := storerFor(fs) if err != nil { return nil, err } if err := ensureHEAD(st); err != nil { - slog.Warn("could not repoint dangling HEAD", "path", repoPath, "err", err) + slog.Warn("could not repoint dangling HEAD", "repo", ref.Path(), "err", err) } return st, nil } @@ -140,29 +155,32 @@ func pickDefaultBranch(st storage.Storer) (plumbing.ReferenceName, error) { return first, nil } -// loadOrInit returns the storer for repoPath, creating an empty bare repository -// on demand. Git's daemon never auto-creates; objgitd does, so a first push to -// a new path just works. -func (d *daemon) loadOrInit(repoPath string) (storage.Storer, error) { - st, err := d.load(repoPath) +// loadOrInit returns the storer for ref, creating an empty bare repository on +// demand. Git's daemon never auto-creates; objgitd does, so a first push to a +// new path just works. +func (d *daemon) loadOrInit(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential) (storage.Storer, error) { + fs, err := d.resolver.Resolve(ctx, ref, cred, true) + if err != nil { + return nil, err + } + + st, err := storerFor(fs) if err == nil { + if err := ensureHEAD(st); err != nil { + slog.Warn("could not repoint dangling HEAD", "repo", ref.Path(), "err", err) + } return st, nil } if !errors.Is(err, transport.ErrRepositoryNotFound) { return nil, err } - fs, err := d.fs.Chroot(repoPath) - if err != nil { - return nil, fmt.Errorf("chroot %q: %w", repoPath, err) - } - st = filesystem.NewStorage(fs, cache.NewObjectLRUDefault()) if _, err := git.Init(st, git.WithDefaultBranch(plumbing.NewBranchReferenceName("main"))); err != nil { return nil, fmt.Errorf("init bare repo: %w", err) } metrics.ReposCreated() - slog.Info("created repository", "path", repoPath) + slog.Info("created repository", "repo", ref.Path()) return st, nil } diff --git a/cmd/objgitd/git_protocol_test.go b/cmd/objgitd/git_protocol_test.go index 1e48455..a143d6b 100644 --- a/cmd/objgitd/git_protocol_test.go +++ b/cmd/objgitd/git_protocol_test.go @@ -20,6 +20,7 @@ import ( "github.com/go-git/go-git/v6/plumbing/transport" "github.com/tigrisdata/objgit/internal/auth" "github.com/tigrisdata/objgit/internal/metrics" + "github.com/tigrisdata/objgit/internal/repofs" ) // ServeGitProtocol accepts connections on l until ctx is cancelled or Accept fails. @@ -71,6 +72,12 @@ func (d *daemon) handleGitProtocol(ctx context.Context, conn net.Conn) error { "remote", conn.RemoteAddr().String(), ) + ref, err := repofs.Parse(req.Pathname) + if err != nil { + _, _ = pktline.WriteError(conn, err) + return fmt.Errorf("invalid repo path %q: %w", req.Pathname, err) + } + // ExtraParams carries e.g. "version=2"; transport.ProtocolVersion splits on ":". gitProtocol := strings.Join(req.ExtraParams, ":") @@ -83,7 +90,7 @@ func (d *daemon) handleGitProtocol(ctx context.Context, conn net.Conn) error { start := time.Now() if d.authorize(ctx, auth.Request{ - Repo: req.Pathname, + Repo: ref.Path(), Operation: operationFor(req.RequestCommand), Cred: auth.Anonymous{}, Transport: "git", @@ -93,7 +100,7 @@ func (d *daemon) handleGitProtocol(ctx context.Context, conn net.Conn) error { return fmt.Errorf("access denied for %q (%s)", req.Pathname, req.RequestCommand) } - err := d.serveGit(ctx, conn, r, req, gitProtocol) + err = d.serveGit(ctx, conn, r, req, ref, gitProtocol) status := "ok" if err != nil { status = "error" @@ -104,10 +111,10 @@ func (d *daemon) handleGitProtocol(ctx context.Context, conn net.Conn) error { // serveGit dispatches a parsed, authorized git:// request to the matching // go-git transport command. -func (d *daemon) serveGit(ctx context.Context, conn net.Conn, r io.ReadCloser, req packp.GitProtoRequest, gitProtocol string) error { +func (d *daemon) serveGit(ctx context.Context, conn net.Conn, r io.ReadCloser, req packp.GitProtoRequest, ref repofs.RepoRef, gitProtocol string) error { switch req.RequestCommand { case transport.UploadPackService: - st, err := d.load(req.Pathname) + st, err := d.load(ctx, ref, repofs.Credential{}) if err != nil { _, _ = pktline.WriteError(conn, fmt.Errorf("repository %q not found", req.Pathname)) return fmt.Errorf("loading %q: %w", req.Pathname, err) @@ -117,7 +124,7 @@ func (d *daemon) serveGit(ctx context.Context, conn net.Conn, r io.ReadCloser, r }) case transport.UploadArchiveService: - st, err := d.load(req.Pathname) + st, err := d.load(ctx, ref, repofs.Credential{}) if err != nil { _, _ = pktline.WriteError(conn, fmt.Errorf("repository %q not found", req.Pathname)) return fmt.Errorf("loading %q: %w", req.Pathname, err) @@ -125,12 +132,12 @@ func (d *daemon) serveGit(ctx context.Context, conn net.Conn, r io.ReadCloser, r return transport.UploadArchive(ctx, st, r, conn, &transport.UploadArchiveRequest{}) case transport.ReceivePackService: - st, err := d.loadOrInit(req.Pathname) + st, err := d.loadOrInit(ctx, ref, repofs.Credential{}) if err != nil { _, _ = pktline.WriteError(conn, fmt.Errorf("cannot open repository %q", req.Pathname)) return fmt.Errorf("opening %q for push: %w", req.Pathname, err) } - return d.receivePack(ctx, st, req.Pathname, r, conn, &transport.ReceivePackRequest{ + return d.receivePack(ctx, st, ref.Path(), r, conn, &transport.ReceivePackRequest{ GitProtocol: gitProtocol, }) @@ -150,9 +157,9 @@ func TestDaemonPushCreatesRepo(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: true}, + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: true}, } ctx, cancel := context.WithCancel(context.Background()) @@ -166,7 +173,7 @@ func TestDaemonPushCreatesRepo(t *testing.T) { srvErr := make(chan error, 1) go func() { srvErr <- d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/test.git" + remote := "git://" + ln.Addr().String() + "/acme/test.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") @@ -177,8 +184,8 @@ func TestDaemonPushCreatesRepo(t *testing.T) { // The repository does not exist yet; the push must create it. runGit(t, work, "push", remote, "main") - if _, err := fs.Stat("/test.git/config"); err != nil { - t.Fatalf("expected bare repo to be created on push, but %q is missing: %v", "/test.git/config", err) + if _, err := fs.Stat("/acme/test/config"); err != nil { + t.Fatalf("expected bare repo to be created on push, but %q is missing: %v", "/acme/test/config", err) } // Round-trip: a clone must recover the pushed commit. @@ -207,9 +214,9 @@ func TestDaemonPushDisabled(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: false}, + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: false}, } ctx, cancel := context.WithCancel(context.Background()) @@ -221,7 +228,7 @@ func TestDaemonPushDisabled(t *testing.T) { } go func() { _ = d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/test.git" + remote := "git://" + ln.Addr().String() + "/acme/test.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") @@ -233,7 +240,7 @@ func TestDaemonPushDisabled(t *testing.T) { t.Fatalf("expected push to be rejected when allowPush is false, got success:\n%s", out) } - if _, err := fs.Stat("/test.git/config"); err == nil { + if _, err := fs.Stat("/acme/test/config"); err == nil { t.Fatal("repository must not be created when push is disabled") } } @@ -251,9 +258,9 @@ func TestDaemonPushKeepsPack(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: true}, + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: true}, } ctx, cancel := context.WithCancel(context.Background()) @@ -265,7 +272,7 @@ func TestDaemonPushKeepsPack(t *testing.T) { } go func() { _ = d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/test.git" + remote := "git://" + ln.Addr().String() + "/acme/test.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") @@ -276,7 +283,7 @@ func TestDaemonPushKeepsPack(t *testing.T) { runGit(t, work, "commit", "-m", "initial") // blob + tree + commit runGit(t, work, "push", remote, "main") - assertPackedRepo(t, fs, "/test.git") + assertPackedRepo(t, fs, "/acme/test") } // assertPackedRepo fails unless repoPath holds at least one packfile and no loose diff --git a/cmd/objgitd/head_test.go b/cmd/objgitd/head_test.go index 681cea7..fe0aeff 100644 --- a/cmd/objgitd/head_test.go +++ b/cmd/objgitd/head_test.go @@ -13,9 +13,9 @@ import ( "github.com/go-git/go-billy/v6/memfs" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/cache" - "github.com/go-git/go-git/v6/plumbing/transport" "github.com/go-git/go-git/v6/storage/filesystem" "github.com/tigrisdata/objgit/internal/auth" + "github.com/tigrisdata/objgit/internal/repofs" ) // dummyHash is a stand-in object id for branch refs in unit tests; ensureHEAD @@ -148,11 +148,11 @@ func TestSmartHTTPHealsDanglingHEAD(t *testing.T) { } fs := memfs.New() - ts := httptest.NewServer(&daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: true}, - }) + ts := httptest.NewServer((&daemon{ + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: true}, + }).httpHandler()) t.Cleanup(ts.Close) // Push a single "master" branch (no "main"), like a project whose default @@ -164,7 +164,7 @@ func TestSmartHTTPHealsDanglingHEAD(t *testing.T) { writeFile(t, filepath.Join(work, "README.md"), "hello\n") runGit(t, work, "add", ".") runGit(t, work, "commit", "-m", "initial") - if out, err := tryGit(work, "push", ts.URL+"/go.git", "master"); err != nil { + if out, err := tryGit(work, "push", ts.URL+"/acme/go.git", "master"); err != nil { t.Fatalf("push failed: %v\n%s", err, out) } @@ -172,10 +172,10 @@ func TestSmartHTTPHealsDanglingHEAD(t *testing.T) { // would otherwise have already fixed it): point HEAD back at the dangling // refs/heads/main directly in the backing store. The very next load (this // clone) must heal it on the way to serving the advertisement. - breakHEAD(t, fs, "/go.git") + breakHEAD(t, fs, "/acme/go") dst := t.TempDir() - out, err := tryGit(dst, "clone", ts.URL+"/go.git", "cloned") + out, err := tryGit(dst, "clone", ts.URL+"/acme/go.git", "cloned") if err != nil { t.Fatalf("clone failed: %v\n%s", err, out) } @@ -195,7 +195,7 @@ func TestSmartHTTPHealsDanglingHEAD(t *testing.T) { } // After a load-healed clone, the advertisement now carries the symref. - if body := getInfoRefs(t, ts.URL+"/go.git"); !strings.Contains(body, "symref=HEAD:refs/heads/master") { + if body := getInfoRefs(t, ts.URL+"/acme/go.git"); !strings.Contains(body, "symref=HEAD:refs/heads/master") { t.Errorf("expected symref=HEAD:refs/heads/master after heal; advertisement:\n%q", body) } } diff --git a/cmd/objgitd/hooks_test.go b/cmd/objgitd/hooks_test.go index 90aa06f..9756573 100644 --- a/cmd/objgitd/hooks_test.go +++ b/cmd/objgitd/hooks_test.go @@ -15,8 +15,8 @@ import ( "github.com/go-git/go-billy/v6/memfs" "github.com/go-git/go-git/v6/plumbing" - "github.com/go-git/go-git/v6/plumbing/transport" "github.com/tigrisdata/objgit/internal/auth" + "github.com/tigrisdata/objgit/internal/repofs" ) func TestDiffRefs(t *testing.T) { @@ -106,8 +106,8 @@ func TestReceivePackHook(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, authz: auth.AllowAnonymous{AllowWrite: true}, allowHooks: true, hookTimeout: 30 * time.Second, @@ -122,7 +122,7 @@ func TestReceivePackHook(t *testing.T) { } go func() { _ = d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/hooked.git" + remote := "git://" + ln.Addr().String() + "/acme/hooked.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") @@ -186,8 +186,8 @@ func TestReceivePackHookAbsent(t *testing.T) { fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, authz: auth.AllowAnonymous{AllowWrite: true}, allowHooks: true, hookTimeout: 30 * time.Second, @@ -202,7 +202,7 @@ func TestReceivePackHookAbsent(t *testing.T) { } go func() { _ = d.ServeGitProtocol(ctx, ln) }() - remote := "git://" + ln.Addr().String() + "/plain.git" + remote := "git://" + ln.Addr().String() + "/acme/plain.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") runGit(t, work, "config", "user.email", "test@example.com") diff --git a/cmd/objgitd/http.go b/cmd/objgitd/http.go index ca1ad2c..e5bf428 100644 --- a/cmd/objgitd/http.go +++ b/cmd/objgitd/http.go @@ -7,7 +7,7 @@ import ( "io" "log/slog" "net/http" - "strings" + "path" "time" "github.com/go-git/go-git/v6/plumbing/transport" @@ -15,29 +15,40 @@ import ( "github.com/go-git/go-git/v6/utils/ioutil" "github.com/tigrisdata/objgit/internal/auth" "github.com/tigrisdata/objgit/internal/metrics" + "github.com/tigrisdata/objgit/internal/repofs" ) -// ServeHTTP speaks the git smart-HTTP protocol. It dispatches on the URL suffix -// the way git-http-backend does: repository paths are variable-depth (e.g. -// /foo/bar.git) and precede a fixed endpoint suffix, which http.ServeMux's -// wildcards cannot express. -func (d *daemon) ServeHTTP(w http.ResponseWriter, r *http.Request) { - p := r.URL.Path - switch { - case r.Method == http.MethodGet && strings.HasSuffix(p, "/info/refs"): - d.handleInfoRefs(w, r, strings.TrimSuffix(p, "/info/refs")) - case r.Method == http.MethodPost && strings.HasSuffix(p, "/git-upload-pack"): - d.handleRPC(w, r, transport.UploadPackService, strings.TrimSuffix(p, "/git-upload-pack")) - case r.Method == http.MethodPost && strings.HasSuffix(p, "/git-receive-pack"): - d.handleRPC(w, r, transport.ReceivePackService, strings.TrimSuffix(p, "/git-receive-pack")) - default: - http.NotFound(w, r) +// httpHandler builds the smart-HTTP router. Repository paths are now a fixed +// {orgID}/{repoName} depth, so http.ServeMux wildcards express the routes +// directly (the captured repoName still carries a ".git" the ref parser strips). +// Anything that is not exactly two segments before the endpoint suffix never +// matches a pattern and falls through to ServeMux's 404. +func (d *daemon) httpHandler() http.Handler { + mux := http.NewServeMux() + mux.HandleFunc("GET /{orgID}/{repoName}/info/refs", d.handleInfoRefs) + mux.HandleFunc("POST /{orgID}/{repoName}/git-upload-pack", func(w http.ResponseWriter, r *http.Request) { + d.handleRPC(w, r, transport.UploadPackService) + }) + mux.HandleFunc("POST /{orgID}/{repoName}/git-receive-pack", func(w http.ResponseWriter, r *http.Request) { + d.handleRPC(w, r, transport.ReceivePackService) + }) + return mux +} + +// repoRef builds a RepoRef from the {orgID}/{repoName} path wildcards. It writes +// a 400 and returns ok=false when the pair is not a valid repository path. +func repoRef(w http.ResponseWriter, r *http.Request) (repofs.RepoRef, bool) { + ref, err := repofs.Parse(path.Join(r.PathValue("orgID"), r.PathValue("repoName"))) + if err != nil { + http.Error(w, err.Error(), http.StatusBadRequest) + return repofs.RepoRef{}, false } + return ref, true } // handleInfoRefs serves the reference-discovery phase: -// GET /{repo}/info/refs?service=git-(upload|receive)-pack. -func (d *daemon) handleInfoRefs(w http.ResponseWriter, r *http.Request, repoPath string) { +// GET /{orgID}/{repoName}/info/refs?service=git-(upload|receive)-pack. +func (d *daemon) handleInfoRefs(w http.ResponseWriter, r *http.Request) { service := r.URL.Query().Get("service") switch service { case transport.UploadPackService, transport.ReceivePackService: @@ -46,14 +57,19 @@ func (d *daemon) handleInfoRefs(w http.ResponseWriter, r *http.Request, repoPath return } - st, ok := d.resolve(w, r, service, repoPath) + ref, ok := repoRef(w, r) + if !ok { + return + } + + st, ok := d.resolve(w, r, service, ref) if !ok { return } slog.Info("serving smart-http advertisement", "service", service, - "path", repoPath, + "repo", ref.Path(), "remote", r.RemoteAddr, ) @@ -81,17 +97,23 @@ func (d *daemon) handleInfoRefs(w http.ResponseWriter, r *http.Request, repoPath }) } if err != nil { - slog.Error("smart-http advertisement failed", "service", service, "path", repoPath, "err", err) + slog.Error("smart-http advertisement failed", "service", service, "repo", ref.Path(), "err", err) } } // handleRPC serves a stateless negotiation round: -// POST /{repo}/git-(upload|receive)-pack. -func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service, repoPath string) { +// POST /{orgID}/{repoName}/git-(upload|receive)-pack. +func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service string) { defer metrics.TrackInFlight("http")() start := time.Now() - st, ok := d.resolve(w, r, service, repoPath) + ref, ok := repoRef(w, r) + if !ok { + metrics.ObserveGitOp("http", service, "error", start) + return + } + + st, ok := d.resolve(w, r, service, ref) if !ok { // resolve has already written the HTTP error; a denied authorization is // recorded by d.authorize in auth_requests_total. Count the failed op @@ -113,7 +135,7 @@ func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service, repo slog.Info("serving smart-http rpc", "service", service, - "path", repoPath, + "repo", ref.Path(), "remote", r.RemoteAddr, ) @@ -141,7 +163,7 @@ func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service, repo GitProtocol: gitProtocol, }) case transport.ReceivePackService: - err = d.receivePack(r.Context(), st, repoPath, in, out, &transport.ReceivePackRequest{ + err = d.receivePack(r.Context(), st, ref.Path(), in, out, &transport.ReceivePackRequest{ StatelessRPC: true, GitProtocol: gitProtocol, }) @@ -149,7 +171,7 @@ func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service, repo status := "ok" if err != nil { // The status line is already sent, so this can only be logged. - slog.Error("smart-http rpc failed", "service", service, "path", repoPath, "err", err) + slog.Error("smart-http rpc failed", "service", service, "repo", ref.Path(), "err", err) status = "error" } metrics.ObserveGitOp("http", service, status, start) @@ -157,48 +179,81 @@ func (d *daemon) handleRPC(w http.ResponseWriter, r *http.Request, service, repo // resolve loads the storer for an HTTP request, authorizing via the daemon's // Authorizer before touching the repository. It writes an HTTP error and -// returns ok=false when the request cannot proceed. -func (d *daemon) resolve(w http.ResponseWriter, r *http.Request, service, repoPath string) (storage.Storer, bool) { - switch d.authorize(r.Context(), auth.Request{ - Repo: repoPath, - Operation: operationFor(service), - Cred: credFromRequest(r), +// returns ok=false when the request cannot proceed. The Basic-auth credential +// is threaded into the filesystem resolver so a backend can route per caller. +func (d *daemon) resolve(w http.ResponseWriter, r *http.Request, service string, ref repofs.RepoRef) (storage.Storer, bool) { + authCred, fsCred := credFromRequest(r) + op := operationFor(service) + user, _, hasBasicAuth := r.BasicAuth() + decision := d.authorize(r.Context(), auth.Request{ + Repo: ref.Path(), + Operation: op, + Cred: authCred, Transport: "http", - }) { + }) + slog.Debug("authorizing smart-http request", + "repo", ref.Path(), + "service", service, + "operation", op, + "decision", decision, + "basic_auth", hasBasicAuth, + "user", user, + "remote", r.RemoteAddr, + ) + switch decision { case auth.Allow: // authorized; fall through to repo resolution case auth.Unauthenticated: + slog.Info("smart-http request needs authentication", + "repo", ref.Path(), "service", service, "operation", op, + "basic_auth", hasBasicAuth, "remote", r.RemoteAddr, + ) w.Header().Set("WWW-Authenticate", `Basic realm="objgit"`) http.Error(w, "authentication required", http.StatusUnauthorized) return nil, false default: // auth.Deny + // The most common cause is a push (receive-pack -> Write) while the + // authorizer's write gate is closed (e.g. -allow-push unset). + slog.Warn("smart-http request denied by authorizer", + "repo", ref.Path(), "service", service, "operation", op, + "basic_auth", hasBasicAuth, "user", user, "remote", r.RemoteAddr, + ) http.Error(w, "access denied", http.StatusForbidden) return nil, false } if service == transport.ReceivePackService { - st, err := d.loadOrInit(repoPath) + st, err := d.loadOrInit(r.Context(), ref, fsCred) if err != nil { - slog.Error("opening repository for push", "path", repoPath, "err", err) - http.Error(w, "cannot open repository", http.StatusInternalServerError) - return nil, false + return nil, writeResolveError(w, ref, "opening repository for push", err) } return st, true } - st, err := d.load(repoPath) + st, err := d.load(r.Context(), ref, fsCred) if err != nil { - if errors.Is(err, transport.ErrRepositoryNotFound) { - http.Error(w, "repository not found", http.StatusNotFound) - return nil, false - } - slog.Error("loading repository", "path", repoPath, "err", err) - http.Error(w, "cannot open repository", http.StatusInternalServerError) - return nil, false + return nil, writeResolveError(w, ref, "loading repository", err) } return st, true } +// writeResolveError renders a repository-resolution error in HTTP terms: a +// missing credential is a 401 challenge, a missing repository is a 404, and +// anything else is a logged 500. It always returns false (resolution failed). +func writeResolveError(w http.ResponseWriter, ref repofs.RepoRef, action string, err error) bool { + switch { + case errors.Is(err, repofs.ErrUnauthenticated): + w.Header().Set("WWW-Authenticate", `Basic realm="objgit"`) + http.Error(w, "authentication required", http.StatusUnauthorized) + case errors.Is(err, transport.ErrRepositoryNotFound): + http.Error(w, "repository not found", http.StatusNotFound) + default: + slog.Error(action, "repo", ref.Path(), "err", err) + http.Error(w, "cannot open repository", http.StatusInternalServerError) + } + return false +} + // flushWriter flushes the underlying http.ResponseWriter after every write so // sideband progress (hook output) reaches the client incrementally instead of // being buffered until the handler returns. @@ -213,12 +268,14 @@ func (fw flushWriter) Write(p []byte) (int, error) { return n, err } -// credFromRequest extracts an auth credential from an HTTP request: HTTP Basic -// if present, otherwise anonymous. It does not validate — the Authorizer owns -// the user store. -func credFromRequest(r *http.Request) auth.Credential { +// credFromRequest extracts the credential from an HTTP request: HTTP Basic if +// present, otherwise anonymous. It returns both the auth.Credential for the +// Authorizer and the repofs.Credential for the filesystem resolver. Neither is +// validated — the Authorizer owns the user store and the Resolver decides what +// to do with the credential. +func credFromRequest(r *http.Request) (auth.Credential, repofs.Credential) { if u, p, ok := r.BasicAuth(); ok { - return auth.BasicAuth{Username: u, Password: p} + return auth.BasicAuth{Username: u, Password: p}, repofs.Credential{Username: u, Password: p} } - return auth.Anonymous{} + return auth.Anonymous{}, repofs.Credential{} } diff --git a/cmd/objgitd/http_test.go b/cmd/objgitd/http_test.go index d2e91c8..5a0efdb 100644 --- a/cmd/objgitd/http_test.go +++ b/cmd/objgitd/http_test.go @@ -1,18 +1,21 @@ package main import ( + "context" "log/slog" + "net/http" "net/http/httptest" "os/exec" "path/filepath" "strings" + "sync" "testing" "time" "github.com/go-git/go-billy/v6" "github.com/go-git/go-billy/v6/memfs" - "github.com/go-git/go-git/v6/plumbing/transport" "github.com/tigrisdata/objgit/internal/auth" + "github.com/tigrisdata/objgit/internal/repofs" ) // TestSmartHTTP drives a real git client against the smart-HTTP handler over an @@ -51,7 +54,7 @@ func TestSmartHTTP(t *testing.T) { } { t.Run(tt.name, func(t *testing.T) { ts, fs := newHTTPServer(t, tt.allowPush) - remote := ts.URL + "/test.git" + remote := ts.URL + "/acme/test.git" var srcHead string if tt.doPush { @@ -69,7 +72,7 @@ func TestSmartHTTP(t *testing.T) { } // The bare repo must exist on disk iff a push was expected to land. - _, statErr := fs.Stat("/test.git/config") + _, statErr := fs.Stat("/acme/test/config") pushLanded := tt.doPush && !tt.wantPushErr if pushLanded && statErr != nil { t.Fatalf("expected repo to be created on push, but config missing: %v", statErr) @@ -106,11 +109,11 @@ func newHTTPServer(t *testing.T, allowPush bool) (*httptest.Server, billy.Filesy t.Helper() fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: allowPush}, + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: allowPush}, } - ts := httptest.NewServer(d) + ts := httptest.NewServer(d.httpHandler()) t.Cleanup(ts.Close) return ts, fs } @@ -125,29 +128,29 @@ func TestSmartHTTPAnonymousReadWhilePushDisabled(t *testing.T) { // Seed a repo via a push-enabled server over a shared filesystem. fs := memfs.New() - seed := httptest.NewServer(&daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: true}, - }) + seed := httptest.NewServer((&daemon{ + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: true}, + }).httpHandler()) t.Cleanup(seed.Close) work := seedRepo(t) srcHead := strings.TrimSpace(runGit(t, work, "rev-parse", "HEAD")) - if out, err := tryGit(work, "push", seed.URL+"/test.git", "main"); err != nil { + if out, err := tryGit(work, "push", seed.URL+"/acme/test.git", "main"); err != nil { t.Fatalf("seed push failed: %v\n%s", err, out) } // Serve the same filesystem with push disabled and clone from it. - ro := httptest.NewServer(&daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), - authz: auth.AllowAnonymous{AllowWrite: false}, - }) + ro := httptest.NewServer((&daemon{ + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: false}, + }).httpHandler()) t.Cleanup(ro.Close) dst := t.TempDir() - if out, err := tryGit(dst, "clone", ro.URL+"/test.git", "cloned"); err != nil { + if out, err := tryGit(dst, "clone", ro.URL+"/acme/test.git", "cloned"); err != nil { t.Fatalf("anonymous clone should succeed with push disabled: %v\n%s", err, out) } gotHead := strings.TrimSpace(runGit(t, filepath.Join(dst, "cloned"), "rev-parse", "HEAD")) @@ -172,13 +175,13 @@ func TestSmartHTTPHookStreams(t *testing.T) { defer slog.SetDefault(prev) fs := memfs.New() - ts := httptest.NewServer(&daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), + ts := httptest.NewServer((&daemon{ + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, authz: auth.AllowAnonymous{AllowWrite: true}, allowHooks: true, hookTimeout: 30 * time.Second, - }) + }).httpHandler()) t.Cleanup(ts.Close) work := t.TempDir() @@ -195,7 +198,7 @@ func TestSmartHTTPHookStreams(t *testing.T) { runGit(t, work, "add", ".") runGit(t, work, "commit", "-m", "with hook") - out, err := tryGit(work, "push", ts.URL+"/hooked.git", "main") + out, err := tryGit(work, "push", ts.URL+"/acme/hooked.git", "main") if err != nil { t.Fatalf("push failed: %v\n%s", err, out) } @@ -224,3 +227,75 @@ func seedRepo(t *testing.T) string { runGit(t, work, "commit", "--allow-empty", "-m", "initial") return work } + +// recordingResolver wraps a Resolver and records the last credential it saw, so +// a test can assert the HTTP Basic-auth credential reached filesystem resolution. +type recordingResolver struct { + inner repofs.Resolver + mu sync.Mutex + lastCred repofs.Credential +} + +func (r *recordingResolver) Resolve(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential, create bool) (billy.Filesystem, error) { + r.mu.Lock() + r.lastCred = cred + r.mu.Unlock() + return r.inner.Resolve(ctx, ref, cred, create) +} + +func (r *recordingResolver) credential() repofs.Credential { + r.mu.Lock() + defer r.mu.Unlock() + return r.lastCred +} + +// TestHTTPRejectsNonOrgRepoPath verifies the {orgID}/{repoName} shape is enforced +// by the router: a single-segment path matches no pattern and is a 404. +func TestHTTPRejectsNonOrgRepoPath(t *testing.T) { + fs := memfs.New() + d := &daemon{ + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, + authz: auth.AllowAnonymous{AllowWrite: true}, + } + ts := httptest.NewServer(d.httpHandler()) + t.Cleanup(ts.Close) + + resp, err := http.Get(ts.URL + "/single.git/info/refs?service=git-upload-pack") + if err != nil { + t.Fatalf("GET: %v", err) + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusNotFound { + t.Errorf("single-segment path: status = %d, want %d", resp.StatusCode, http.StatusNotFound) + } +} + +// TestHTTPCredentialReachesResolver verifies the HTTP Basic-auth username and +// password are threaded into the filesystem resolver. +func TestHTTPCredentialReachesResolver(t *testing.T) { + fs := memfs.New() + rec := &recordingResolver{inner: repofs.BucketResolver{Base: fs}} + d := &daemon{ + sysFS: fs, + resolver: rec, + authz: auth.AllowAnonymous{AllowWrite: true}, + } + ts := httptest.NewServer(d.httpHandler()) + t.Cleanup(ts.Close) + + req, err := http.NewRequest(http.MethodGet, ts.URL+"/acme/widgets.git/info/refs?service=git-upload-pack", nil) + if err != nil { + t.Fatalf("NewRequest: %v", err) + } + req.SetBasicAuth("alice", "s3cret") + resp, err := http.DefaultClient.Do(req) + if err != nil { + t.Fatalf("Do: %v", err) + } + defer resp.Body.Close() + + if got, want := rec.credential(), (repofs.Credential{Username: "alice", Password: "s3cret"}); got != want { + t.Errorf("resolver credential = %+v, want %+v", got, want) + } +} diff --git a/cmd/objgitd/main.go b/cmd/objgitd/main.go index 5db8fe5..f146420 100644 --- a/cmd/objgitd/main.go +++ b/cmd/objgitd/main.go @@ -18,13 +18,13 @@ import ( "github.com/facebookgo/flagenv" "github.com/gliderlabs/ssh" - "github.com/go-git/go-git/v6/plumbing/transport" "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/tigrisdata/objgit" "github.com/tigrisdata/objgit/internal" "github.com/tigrisdata/objgit/internal/auth" "github.com/tigrisdata/objgit/internal/metrics" "github.com/tigrisdata/objgit/internal/s3fs" + "github.com/tigrisdata/objgit/internal/tigrisfs" "github.com/tigrisdata/storage-go" "golang.org/x/sync/errgroup" @@ -35,7 +35,7 @@ var ( httpBind = flag.String("http-bind", ":8080", "TCP address to listen on for the git smart-HTTP protocol; empty disables it") sshBind = flag.String("ssh-bind", "", "TCP address to listen on for the git-over-SSH protocol; empty disables it") metricsBind = flag.String("metrics-bind", ":9090", "TCP address to serve the Prometheus /metrics endpoint; empty disables it") - bucket = flag.String("bucket", "", "Tigris bucket that holds the git repositories") + bucket = flag.String("bucket", "", "Tigris bucket for daemon system state (the SSH host key); repositories live in per-keypair buckets, not here") allowPush = flag.Bool("allow-push", false, "allow unauthenticated git-receive-pack (push) requests") slogLevel = flag.String("slog-level", "INFO", "log level (DEBUG, INFO, WARN, ERROR)") @@ -132,9 +132,17 @@ func main() { os.Exit(1) } + // Repositories live in per-keypair Tigris buckets resolved at request time + // (one bucket per repo, created on first push). The -bucket fsys above backs + // only daemon system state (the SSH host key). The per-keypair model can't + // share the single-bucket listing/pack caches, so repo-side caching is off. + if cache != nil || *packCacheBytes != 0 { + slog.Info("per-keypair resolver active: repo-side S3 caching is disabled (caches apply to the system bucket only)") + } + d := &daemon{ - fs: fsys, - loader: transport.NewFilesystemLoader(fsys, false), + sysFS: fsys, + resolver: tigrisfs.New(), authz: auth.AllowAnonymous{AllowWrite: *allowPush}, allowHooks: *allowHooks, hookTimeout: *hookTimeout, @@ -197,7 +205,7 @@ func main() { slog.Error("can't listen", "http_bind", *httpBind, "err", err) os.Exit(1) } - srv := &http.Server{Handler: d} + srv := &http.Server{Handler: d.httpHandler()} g.Go(func() error { if err := srv.Serve(ln); err != nil && !errors.Is(err, http.ErrServerClosed) { return err diff --git a/cmd/objgitd/ssh.go b/cmd/objgitd/ssh.go index 1edce57..8283d18 100644 --- a/cmd/objgitd/ssh.go +++ b/cmd/objgitd/ssh.go @@ -10,7 +10,6 @@ import ( "log/slog" "os" "path/filepath" - "strings" "time" ssh "github.com/gliderlabs/ssh" @@ -20,6 +19,7 @@ import ( gossh "golang.org/x/crypto/ssh" "github.com/tigrisdata/objgit/internal/auth" "github.com/tigrisdata/objgit/internal/metrics" + "github.com/tigrisdata/objgit/internal/repofs" ) const hostKeyPath = ".objgit/ssh_host_ed25519_key" @@ -100,7 +100,7 @@ func gitServiceFor(command string) (string, bool) { // newSSHServer builds the git-over-SSH server. It accepts every public key at // connect time and defers real authorization to handleSSH via daemon.authz. func newSSHServer(d *daemon, addr string) (*ssh.Server, error) { - signer, err := loadOrCreateHostKey(d.fs) + signer, err := loadOrCreateHostKey(d.sysFS) if err != nil { return nil, fmt.Errorf("ssh host key: %w", err) } @@ -136,7 +136,12 @@ func (d *daemon) handleSSH(s ssh.Session) { } // ssh://host/foo.git sends "/foo.git"; scp-style host:foo.git sends "foo.git". - repoPath := strings.TrimPrefix(cmd[1], "/") + ref, err := repofs.Parse(cmd[1]) + if err != nil { + fmt.Fprintf(s.Stderr(), "objgitd: %v\n", err) + _ = s.Exit(1) + return + } var cred auth.Credential = auth.Anonymous{} if key := s.PublicKey(); key != nil { @@ -147,7 +152,7 @@ func (d *daemon) handleSSH(s ssh.Session) { start := time.Now() if d.authorize(s.Context(), auth.Request{ - Repo: repoPath, + Repo: ref.Path(), Operation: operationFor(service), Cred: cred, Transport: "ssh", @@ -160,12 +165,12 @@ func (d *daemon) handleSSH(s ssh.Session) { slog.Info("serving ssh request", "service", service, - "path", repoPath, + "repo", ref.Path(), "remote", s.RemoteAddr().String(), ) status := "ok" - if err := d.serveSSH(s, service, repoPath); err != nil { + if err := d.serveSSH(s, service, ref); err != nil { status = "error" } metrics.ObserveGitOp("ssh", service, status, start) @@ -176,7 +181,7 @@ func (d *daemon) handleSSH(s ssh.Session) { // repository cannot be opened it also writes a client-facing message and sets a // non-zero exit status, matching git's behavior. A mid-transfer error is logged // (the exit status is left to the session default, as before). -func (d *daemon) serveSSH(s ssh.Session, service, repoPath string) error { +func (d *daemon) serveSSH(s ssh.Session, service string, ref repofs.RepoRef) error { // SSH is a persistent stream like git://: the transport commands call Close // between negotiation rounds, which would tear down the channel, so wrap the // session in no-op closers. @@ -184,44 +189,47 @@ func (d *daemon) serveSSH(s ssh.Session, service, repoPath string) error { w := ioutil.WriteNopCloser(s) ctx := s.Context() + // SSH carries no Basic-auth credential into the filesystem resolver. + var cred repofs.Credential + // Protocol v2 negotiation (GIT_PROTOCOL via s.Environ()) is intentionally not // forwarded yet; v0/v1 is sufficient. See plan. switch service { case transport.UploadPackService: - st, err := d.load(repoPath) + st, err := d.load(ctx, ref, cred) if err != nil { - fmt.Fprintf(s.Stderr(), "objgitd: repository %q not found\n", repoPath) + fmt.Fprintf(s.Stderr(), "objgitd: repository %q not found\n", ref.Path()) _ = s.Exit(1) - return fmt.Errorf("loading %q: %w", repoPath, err) + return fmt.Errorf("loading %q: %w", ref.Path(), err) } if err := transport.UploadPack(ctx, st, r, w, &transport.UploadPackRequest{}); err != nil { - slog.Error("ssh upload-pack failed", "path", repoPath, "err", err) + slog.Error("ssh upload-pack failed", "repo", ref.Path(), "err", err) return err } case transport.UploadArchiveService: - st, err := d.load(repoPath) + st, err := d.load(ctx, ref, cred) if err != nil { - fmt.Fprintf(s.Stderr(), "objgitd: repository %q not found\n", repoPath) + fmt.Fprintf(s.Stderr(), "objgitd: repository %q not found\n", ref.Path()) _ = s.Exit(1) - return fmt.Errorf("loading %q: %w", repoPath, err) + return fmt.Errorf("loading %q: %w", ref.Path(), err) } if err := transport.UploadArchive(ctx, st, r, w, &transport.UploadArchiveRequest{}); err != nil { - slog.Error("ssh upload-archive failed", "path", repoPath, "err", err) + slog.Error("ssh upload-archive failed", "repo", ref.Path(), "err", err) return err } case transport.ReceivePackService: - st, err := d.loadOrInit(repoPath) + st, err := d.loadOrInit(ctx, ref, cred) if err != nil { - fmt.Fprintf(s.Stderr(), "objgitd: cannot open repository %q\n", repoPath) + fmt.Fprintf(s.Stderr(), "objgitd: cannot open repository %q\n", ref.Path()) _ = s.Exit(1) - return fmt.Errorf("opening %q for push: %w", repoPath, err) + return fmt.Errorf("opening %q for push: %w", ref.Path(), err) } // d.receivePack stores the pack whole (Scanner-bounded PackfileWriter, see // writePack) and runs push hooks afterward. - if err := d.receivePack(ctx, st, repoPath, r, w, &transport.ReceivePackRequest{}); err != nil { - slog.Error("ssh receive-pack failed", "path", repoPath, "err", err) + if err := d.receivePack(ctx, st, ref.Path(), r, w, &transport.ReceivePackRequest{}); err != nil { + slog.Error("ssh receive-pack failed", "repo", ref.Path(), "err", err) return err } } diff --git a/cmd/objgitd/ssh_test.go b/cmd/objgitd/ssh_test.go index b6ed5c5..5bb27e0 100644 --- a/cmd/objgitd/ssh_test.go +++ b/cmd/objgitd/ssh_test.go @@ -17,6 +17,7 @@ import ( "github.com/go-git/go-billy/v6/memfs" "github.com/go-git/go-git/v6/plumbing/transport" "github.com/tigrisdata/objgit/internal/auth" + "github.com/tigrisdata/objgit/internal/repofs" ) func TestGitServiceFor(t *testing.T) { @@ -128,8 +129,8 @@ func startSSHServer(t *testing.T, allowPush, allowHooks bool) (string, billy.Fil t.Helper() fs := memfs.New() d := &daemon{ - fs: fs, - loader: transport.NewFilesystemLoader(fs, false), + sysFS: fs, + resolver: repofs.BucketResolver{Base: fs}, authz: auth.AllowAnonymous{AllowWrite: allowPush}, allowHooks: allowHooks, hookTimeout: 30 * time.Second, @@ -211,10 +212,10 @@ func TestSSH(t *testing.T) { t.Run(tt.name, func(t *testing.T) { addr, fs := startSSHServer(t, tt.allowPush, false) env := gitSSHEnv(t) - remote := "ssh://git@" + addr + "/test.git" + remote := "ssh://git@" + addr + "/acme/test.git" // Confirm no repo exists before any push. - _, preStatErr := fs.Stat("/test.git/config") + _, preStatErr := fs.Stat("/acme/test/config") if preStatErr == nil { t.Fatal("test.git must not exist before any push") } @@ -235,7 +236,7 @@ func TestSSH(t *testing.T) { } // The bare repo must exist iff a push was expected to land. - _, statErr := fs.Stat("/test.git/config") + _, statErr := fs.Stat("/acme/test/config") pushLanded := tt.doPush && !tt.wantPushErr if pushLanded && statErr != nil { t.Fatalf("expected repo to be created on push, but config missing: %v", statErr) @@ -246,7 +247,7 @@ func TestSSH(t *testing.T) { if pushLanded { // SSH shares the Scanner-bounded PackfileWriter path: the push // must land as a packfile, not loose objects. - assertPackedRepo(t, fs, "/test.git") + assertPackedRepo(t, fs, "/acme/test") } dst := t.TempDir() @@ -284,7 +285,7 @@ func TestSSHHookFires(t *testing.T) { addr, _ := startSSHServer(t, true, true) env := gitSSHEnv(t) - remote := "ssh://git@" + addr + "/hooked.git" + remote := "ssh://git@" + addr + "/acme/hooked.git" work := t.TempDir() runGit(t, work, "init", "-b", "main") diff --git a/internal/auth/auth.go b/internal/auth/auth.go index 465e985..dc23abf 100644 --- a/internal/auth/auth.go +++ b/internal/auth/auth.go @@ -15,6 +15,18 @@ const ( Write ) +// String renders the operation for logs and metrics. +func (o Operation) String() string { + switch o { + case Read: + return "read" + case Write: + return "write" + default: + return "unknown" + } +} + // Credential is what the client presented. Exactly one concrete type per // scheme; a transport constructs the variant it can produce, or Anonymous. type Credential interface{ isCredential() } @@ -52,6 +64,20 @@ const ( Unauthenticated ) +// String renders the decision for logs and metrics. +func (d Decision) String() string { + switch d { + case Deny: + return "deny" + case Allow: + return "allow" + case Unauthenticated: + return "unauthenticated" + default: + return "unknown" + } +} + // Authorizer decides whether a request may proceed. This is the seam a real // authn/authz layer plugs into later. type Authorizer interface { diff --git a/internal/repofs/repofs.go b/internal/repofs/repofs.go new file mode 100644 index 0000000..54781ff --- /dev/null +++ b/internal/repofs/repofs.go @@ -0,0 +1,84 @@ +// Package repofs maps a transport repository path to the billy.Filesystem that +// holds it. It is the seam a real backend implements to route an org to its own +// bucket/credentials; the default implementation chroots one bucket filesystem. +// +// Like internal/auth, this package is transport-neutral: it imports only the +// standard library and go-billy, never a concrete transport. +package repofs + +import ( + "context" + "errors" + "path" + "strings" + + "github.com/go-git/go-billy/v6" +) + +// ErrInvalidPath is returned by Parse when a repository path is not of the form +// {orgID}/{repoName}. +var ErrInvalidPath = errors.New("repository path must be of the form {orgID}/{repoName}") + +// ErrUnauthenticated is returned by a Resolver when the request lacks the +// credential it needs to resolve a repository. Transports surface it as an +// authentication challenge (HTTP 401) rather than a 404 or 500. +var ErrUnauthenticated = errors.New("repofs: authentication required") + +// RepoRef identifies a repository. OrgID is an opaque reference a later API call +// will validate; for now it is accepted as-is. Name has any trailing ".git" +// stripped, so org/repo.git and org/repo denote the same repository. +type RepoRef struct { + OrgID string + Name string +} + +// Path is the canonical storage and identity path, "orgID/name". +func (r RepoRef) Path() string { return path.Join(r.OrgID, r.Name) } + +// Parse converts a raw transport path into a RepoRef. It trims surrounding +// slashes, requires exactly two non-empty segments, and strips a trailing +// ".git" from the name. OrgID is not otherwise validated. +func Parse(raw string) (RepoRef, error) { + parts := strings.Split(strings.Trim(raw, "/"), "/") + if len(parts) != 2 || parts[0] == "" || parts[1] == "" { + return RepoRef{}, ErrInvalidPath + } + name := strings.TrimSuffix(parts[1], ".git") + if name == "" { + return RepoRef{}, ErrInvalidPath + } + return RepoRef{OrgID: parts[0], Name: name}, nil +} + +// Credential carries the HTTP Basic-auth username and password a caller +// presented (the zero value means none was presented). It is unvalidated; a +// Resolver decides what, if anything, to do with it. +type Credential struct { + Username string + Password string +} + +// Resolver maps a RepoRef — plus the caller's credential — to the +// billy.Filesystem rooted at that repository. The returned filesystem is the +// repository root: go-git's storage layer is built directly on top of it. +// +// create distinguishes the write path from the read path: it is true on a push +// (loadOrInit), allowing a backend to provision storage (e.g. create a bucket) +// on demand, and false on a read (load), where a missing repository must surface +// as transport.ErrRepositoryNotFound rather than being created. +type Resolver interface { + Resolve(ctx context.Context, ref RepoRef, cred Credential, create bool) (billy.Filesystem, error) +} + +// BucketResolver is the default Resolver: it chroots a single base filesystem +// (the whole bucket) to ref.Path(), ignoring the credential. This preserves the +// original single-bucket behavior. +type BucketResolver struct { + Base billy.Filesystem +} + +// Resolve chroots the base filesystem to the repository's "orgID/name" path. +// Chroot is creation-free, so create is ignored. +func (b BucketResolver) Resolve(_ context.Context, ref RepoRef, _ Credential, _ bool) (billy.Filesystem, error) { + return b.Base.Chroot(ref.Path()) +} diff --git a/internal/repofs/repofs_test.go b/internal/repofs/repofs_test.go new file mode 100644 index 0000000..6b79bac --- /dev/null +++ b/internal/repofs/repofs_test.go @@ -0,0 +1,105 @@ +package repofs + +import ( + "context" + "errors" + "testing" + + "github.com/go-git/go-billy/v6/memfs" +) + +func TestParse(t *testing.T) { + tests := []struct { + name string + input string + want RepoRef + wantErr error + }{ + { + name: "org and repo", + input: "acme/widgets", + want: RepoRef{OrgID: "acme", Name: "widgets"}, + }, + { + name: "strips .git suffix", + input: "acme/widgets.git", + want: RepoRef{OrgID: "acme", Name: "widgets"}, + }, + { + name: "leading slash", + input: "/acme/widgets.git", + want: RepoRef{OrgID: "acme", Name: "widgets"}, + }, + { + name: "trailing slash", + input: "acme/widgets/", + want: RepoRef{OrgID: "acme", Name: "widgets"}, + }, + { + name: "single segment", + input: "widgets.git", + wantErr: ErrInvalidPath, + }, + { + name: "three segments", + input: "acme/team/widgets.git", + wantErr: ErrInvalidPath, + }, + { + name: "empty", + input: "", + wantErr: ErrInvalidPath, + }, + { + name: "empty org", + input: "/widgets.git", + wantErr: ErrInvalidPath, + }, + { + name: "name is only .git", + input: "acme/.git", + wantErr: ErrInvalidPath, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := Parse(tt.input) + if !errors.Is(err, tt.wantErr) { + t.Fatalf("Parse(%q) error = %v, want %v", tt.input, err, tt.wantErr) + } + if tt.wantErr != nil { + return + } + if got != tt.want { + t.Errorf("Parse(%q) = %+v, want %+v", tt.input, got, tt.want) + } + }) + } +} + +func TestRepoRefPath(t *testing.T) { + got := RepoRef{OrgID: "acme", Name: "widgets"}.Path() + if want := "acme/widgets"; got != want { + t.Errorf("Path() = %q, want %q", got, want) + } +} + +// TestBucketResolverChroots verifies the default resolver roots the returned +// filesystem at ref.Path(): a file written through it lands under "orgID/name". +func TestBucketResolverChroots(t *testing.T) { + base := memfs.New() + r := BucketResolver{Base: base} + + fs, err := r.Resolve(context.Background(), RepoRef{OrgID: "acme", Name: "widgets"}, Credential{}, false) + if err != nil { + t.Fatalf("Resolve: %v", err) + } + + if _, err := fs.Create("HEAD"); err != nil { + t.Fatalf("Create through resolved fs: %v", err) + } + if _, err := base.Stat("acme/widgets/HEAD"); err != nil { + t.Errorf("expected file at acme/widgets/HEAD on base fs: %v", err) + } +} diff --git a/internal/s3fs/filesystem.go b/internal/s3fs/filesystem.go index aac8bed..eaa191b 100644 --- a/internal/s3fs/filesystem.go +++ b/internal/s3fs/filesystem.go @@ -31,6 +31,11 @@ type s3Client interface { CompleteMultipartUpload(context.Context, *s3.CompleteMultipartUploadInput, ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) } +// S3Client is the exported alias for the object-operation client that NewS3FS +// and Harden accept. Callers that need to hold the value Harden returns (e.g. a +// per-keypair cache) can name this type without referencing AWS SDK types. +type S3Client = s3Client + // unixMetaConfig holds the session defaults used when the optional Unix-metadata // feature is enabled. A nil *unixMetaConfig means the feature is off and the // filesystem behaves as if no POSIX attributes exist. diff --git a/internal/tigrisfs/tigrisfs.go b/internal/tigrisfs/tigrisfs.go new file mode 100644 index 0000000..0d2336b --- /dev/null +++ b/internal/tigrisfs/tigrisfs.go @@ -0,0 +1,205 @@ +// Package tigrisfs is the production repofs.Resolver: it treats the caller's +// credential as a Tigris keypair (username = access key ID, password = secret +// access key), builds one storage.Client per keypair, and gives every +// repository its own Tigris bucket — created on the push (write) path. +// +// All S3 calls go through github.com/tigrisdata/storage-go's *storage.Client +// (which embeds *s3.Client); no bare AWS client is ever constructed. +package tigrisfs + +import ( + "context" + "crypto/sha256" + "errors" + "fmt" + "math/big" + "strings" + "sync" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + smithy "github.com/aws/smithy-go" + "github.com/go-git/go-billy/v6" + "github.com/go-git/go-git/v6/plumbing/transport" + "github.com/tigrisdata/objgit/internal/repofs" + "github.com/tigrisdata/objgit/internal/s3fs" + storage "github.com/tigrisdata/storage-go" +) + +// ErrNoCredential is returned when a request carries no Tigris keypair. It is +// repofs.ErrUnauthenticated so transports map it to a 401 challenge without +// importing this package. +var ErrNoCredential = repofs.ErrUnauthenticated + +// client is what the resolver needs from a per-keypair Tigris client: the object +// operations s3fs consumes (via the embedded s3fs.S3Client) plus the bucket +// lifecycle calls. *storage.Client satisfies it. +type client interface { + s3fs.S3Client + HeadBucket(ctx context.Context, in *s3.HeadBucketInput, opts ...func(*s3.Options)) (*s3.HeadBucketOutput, error) + CreateBucket(ctx context.Context, in *s3.CreateBucketInput, opts ...func(*s3.Options)) (*s3.CreateBucketOutput, error) +} + +// cachedClient holds the two views of one keypair's client: the raw client for +// bucket lifecycle calls, and the hardened wrapper handed to s3fs for object +// I/O. The hardened wrapper owns its own HTTP connection pool, so it is built +// once per keypair and reused — rebuilding it per request would defeat +// keep-alive reuse. +type cachedClient struct { + raw client + hardened s3fs.S3Client +} + +// Resolver implements repofs.Resolver against Tigris with one bucket per repo. +type Resolver struct { + // newClient builds a client from a keypair. Overridable in tests; defaults to + // storage.New(ctx, storage.WithAccessKeypair(id, secret)). + newClient func(ctx context.Context, cred repofs.Credential) (client, error) + fsOpts []s3fs.Option + + mu sync.Mutex + clients map[string]*cachedClient // keyed by access key ID (cred.Username) +} + +// Option configures a Resolver. +type Option func(*Resolver) + +// WithFSOptions passes s3fs.Options to every per-bucket S3FS the resolver builds. +func WithFSOptions(opts ...s3fs.Option) Option { + return func(r *Resolver) { r.fsOpts = append(r.fsOpts, opts...) } +} + +// New returns a Resolver that talks to Tigris using each caller's keypair. +func New(opts ...Option) *Resolver { + r := &Resolver{ + newClient: defaultNewClient, + clients: make(map[string]*cachedClient), + } + for _, o := range opts { + o(r) + } + return r +} + +func defaultNewClient(ctx context.Context, cred repofs.Credential) (client, error) { + c, err := storage.New(ctx, storage.WithAccessKeypair(cred.Username, cred.Password)) + if err != nil { + return nil, fmt.Errorf("tigrisfs: building storage client: %w", err) + } + return c, nil +} + +// Resolve builds the billy.Filesystem for ref backed by its own Tigris bucket. +// On the write path (create) it provisions the bucket; on the read path it +// returns transport.ErrRepositoryNotFound when the bucket is absent. +func (r *Resolver) Resolve(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential, create bool) (billy.Filesystem, error) { + if cred.Username == "" || cred.Password == "" { + return nil, ErrNoCredential + } + + cc, err := r.client(ctx, cred) + if err != nil { + return nil, err + } + + bucket := bucketName(ref) + if create { + if err := ensureBucket(ctx, cc.raw, bucket); err != nil { + return nil, fmt.Errorf("tigrisfs: ensuring bucket %q: %w", bucket, err) + } + } else { + ok, err := bucketExists(ctx, cc.raw, bucket) + if err != nil { + return nil, fmt.Errorf("tigrisfs: checking bucket %q: %w", bucket, err) + } + if !ok { + return nil, transport.ErrRepositoryNotFound + } + } + + return s3fs.NewS3FS(cc.hardened, bucket, r.fsOpts...) +} + +// client returns the cached client for a keypair, building and caching it on +// first use. Keyed by access key ID, which uniquely identifies the keypair. +func (r *Resolver) client(ctx context.Context, cred repofs.Credential) (*cachedClient, error) { + r.mu.Lock() + defer r.mu.Unlock() + + // NOTE: this map is unbounded — one entry per distinct access key ID. Bound + // it (LRU) if the keypair population grows large. + if cc, ok := r.clients[cred.Username]; ok { + return cc, nil + } + + raw, err := r.newClient(ctx, cred) + if err != nil { + return nil, err + } + cc := &cachedClient{raw: raw, hardened: s3fs.Harden(raw)} + r.clients[cred.Username] = cc + return cc, nil +} + +// ensureBucket creates the bucket, treating an already-owned/already-existing +// bucket as success so repeated pushes are idempotent. +func ensureBucket(ctx context.Context, c client, bucket string) error { + _, err := c.CreateBucket(ctx, &s3.CreateBucketInput{Bucket: aws.String(bucket)}) + if err == nil { + return nil + } + if _, ok := errors.AsType[*types.BucketAlreadyOwnedByYou](err); ok { + return nil + } + if _, ok := errors.AsType[*types.BucketAlreadyExists](err); ok { + return nil + } + return err +} + +// bucketExists reports whether the bucket is present, mapping a not-found +// response to (false, nil) and any other error through. +func bucketExists(ctx context.Context, c client, bucket string) (bool, error) { + _, err := c.HeadBucket(ctx, &s3.HeadBucketInput{Bucket: aws.String(bucket)}) + if err == nil { + return true, nil + } + if isNotFound(err) { + return false, nil + } + return false, err +} + +// isNotFound recognizes the several shapes a "bucket does not exist" error can +// take from the S3 API: the typed NotFound/NoSuchBucket errors, or a generic +// smithy API error whose code says so. +func isNotFound(err error) bool { + if _, ok := errors.AsType[*types.NotFound](err); ok { + return true + } + if _, ok := errors.AsType[*types.NoSuchBucket](err); ok { + return true + } + if apiErr, ok := errors.AsType[smithy.APIError](err); ok { + switch apiErr.ErrorCode() { + case "NotFound", "NoSuchBucket", "404": + return true + } + } + return false +} + +// bucketName derives a deterministic, DNS-valid Tigris bucket name from a repo +// ref: "objgit-" + a base36 (0-9a-z) digest of "orgID/name". The digest is +// left-padded so truncation is stable, giving a fixed 39-character name well +// within the 63-character limit. +func bucketName(ref repofs.RepoRef) string { + const digestLen = 32 + sum := sha256.Sum256([]byte(ref.Path())) + b36 := new(big.Int).SetBytes(sum[:]).Text(36) + if len(b36) < digestLen { + b36 = strings.Repeat("0", digestLen-len(b36)) + b36 + } + return "objgit-" + b36[:digestLen] +} diff --git a/internal/tigrisfs/tigrisfs_test.go b/internal/tigrisfs/tigrisfs_test.go new file mode 100644 index 0000000..f427583 --- /dev/null +++ b/internal/tigrisfs/tigrisfs_test.go @@ -0,0 +1,159 @@ +package tigrisfs + +import ( + "context" + "errors" + "strings" + "sync" + "testing" + + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/go-git/go-git/v6/plumbing/transport" + "github.com/tigrisdata/objgit/internal/repofs" + "github.com/tigrisdata/objgit/internal/s3fs" +) + +// fakeClient satisfies the tigrisfs client interface: the embedded s3fs.S3Client +// supplies the (unused) object methods, and the bucket methods are observable. +type fakeClient struct { + s3fs.S3Client // nil; object methods are never called in these tests + mu sync.Mutex + createCalls int + headCalls int + exists bool +} + +func (f *fakeClient) CreateBucket(_ context.Context, _ *s3.CreateBucketInput, _ ...func(*s3.Options)) (*s3.CreateBucketOutput, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.createCalls++ + f.exists = true + return &s3.CreateBucketOutput{}, nil +} + +func (f *fakeClient) HeadBucket(_ context.Context, _ *s3.HeadBucketInput, _ ...func(*s3.Options)) (*s3.HeadBucketOutput, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.headCalls++ + if f.exists { + return &s3.HeadBucketOutput{}, nil + } + return nil, &types.NotFound{} +} + +func TestBucketName(t *testing.T) { + tests := []struct { + name string + ref repofs.RepoRef + }{ + {"simple", repofs.RepoRef{OrgID: "acme", Name: "widgets"}}, + {"other org", repofs.RepoRef{OrgID: "globex", Name: "widgets"}}, + {"other repo", repofs.RepoRef{OrgID: "acme", Name: "gadgets"}}, + } + + seen := map[string]string{} + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := bucketName(tt.ref) + + if !strings.HasPrefix(got, "objgit-") { + t.Errorf("bucketName(%v) = %q, want objgit- prefix", tt.ref, got) + } + if len(got) < 3 || len(got) > 63 { + t.Errorf("bucketName(%v) length = %d, want within [3,63]", tt.ref, len(got)) + } + for _, c := range got { + if !(c >= 'a' && c <= 'z' || c >= '0' && c <= '9' || c == '-') { + t.Errorf("bucketName(%v) = %q has invalid bucket char %q", tt.ref, got, c) + } + } + // Deterministic and collision-free across distinct refs. + if again := bucketName(tt.ref); again != got { + t.Errorf("bucketName not deterministic: %q != %q", got, again) + } + if other, ok := seen[got]; ok { + t.Errorf("bucketName collision: %v and %s both map to %q", tt.ref, other, got) + } + seen[got] = tt.ref.OrgID + "/" + tt.ref.Name + }) + } +} + +func TestResolveRequiresCredential(t *testing.T) { + r := New() + for _, cred := range []repofs.Credential{ + {}, + {Username: "key"}, + {Password: "secret"}, + } { + _, err := r.Resolve(context.Background(), repofs.RepoRef{OrgID: "acme", Name: "widgets"}, cred, true) + if !errors.Is(err, repofs.ErrUnauthenticated) { + t.Errorf("Resolve(cred=%+v) err = %v, want repofs.ErrUnauthenticated", cred, err) + } + } +} + +func TestResolveCreateGating(t *testing.T) { + cred := repofs.Credential{Username: "key", Password: "secret"} + ref := repofs.RepoRef{OrgID: "acme", Name: "widgets"} + + t.Run("push creates bucket", func(t *testing.T) { + fake := &fakeClient{} + r := newWithClient(fake) + + if _, err := r.Resolve(context.Background(), ref, cred, true); err != nil { + t.Fatalf("Resolve(create=true): %v", err) + } + if fake.createCalls != 1 { + t.Errorf("CreateBucket calls = %d, want 1", fake.createCalls) + } + if fake.headCalls != 0 { + t.Errorf("HeadBucket calls = %d, want 0 on the write path", fake.headCalls) + } + }) + + t.Run("read of missing bucket is not-found", func(t *testing.T) { + fake := &fakeClient{} + r := newWithClient(fake) + + _, err := r.Resolve(context.Background(), ref, cred, false) + if !errors.Is(err, transport.ErrRepositoryNotFound) { + t.Fatalf("Resolve(create=false) err = %v, want ErrRepositoryNotFound", err) + } + if fake.headCalls != 1 { + t.Errorf("HeadBucket calls = %d, want 1", fake.headCalls) + } + if fake.createCalls != 0 { + t.Errorf("CreateBucket calls = %d, want 0 on the read path", fake.createCalls) + } + }) + + t.Run("read of existing bucket succeeds and client is cached", func(t *testing.T) { + fake := &fakeClient{} + var builds int + r := New() + r.newClient = func(context.Context, repofs.Credential) (client, error) { + builds++ + return fake, nil + } + + // Push to create the bucket, then read it back. + if _, err := r.Resolve(context.Background(), ref, cred, true); err != nil { + t.Fatalf("Resolve(create=true): %v", err) + } + if _, err := r.Resolve(context.Background(), ref, cred, false); err != nil { + t.Fatalf("Resolve(create=false) after create: %v", err) + } + if builds != 1 { + t.Errorf("newClient builds = %d, want 1 (client cached per keypair)", builds) + } + }) +} + +// newWithClient returns a Resolver whose newClient always yields the given fake. +func newWithClient(c client) *Resolver { + r := New() + r.newClient = func(context.Context, repofs.Credential) (client, error) { return c, nil } + return r +} From bf81a1dc0d5c6ae958bdb79e4103ca334a66cdc5 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 26 Jun 2026 13:26:41 -0400 Subject: [PATCH 2/4] fix(s3fs): opt out of aws-sdk-go-v2 default request checksums aws-sdk-go-v2 (s3 >= v1.73) defaults RequestChecksumCalculation to "when supported", sending PutObject bodies with a trailing CRC32 via aws-chunked content encoding. Some S3-compatible endpoints mishandle that framing and store an empty or corrupt object. Force the legacy "when required" behavior in Harden so bodies are sent plain. Assisted-by: Claude Opus 4.8 via Claude Code Signed-off-by: Xe Iaso --- internal/s3fs/resilient.go | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/internal/s3fs/resilient.go b/internal/s3fs/resilient.go index 6227870..cc41eff 100644 --- a/internal/s3fs/resilient.go +++ b/internal/s3fs/resilient.go @@ -5,6 +5,7 @@ import ( "net/http" "time" + "github.com/aws/aws-sdk-go-v2/aws" awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http" "github.com/aws/aws-sdk-go-v2/service/s3" ) @@ -58,11 +59,23 @@ type resilientClient struct { // client that fails fast on stale keep-alive connections rather than hanging // forever. Pass the result to NewS3FS and NewListingCache. See the package // constants above for the rationale. +// +// It also opts the request/response checksum workflow back to "when required". +// aws-sdk-go-v2 (s3 >= v1.73) defaults to "when supported", which adds a CRC32 +// trailing checksum and sends the body with Content-Encoding: aws-chunked. Some +// S3-compatible endpoints mishandle that framing and store an empty or corrupt +// object even though PutObject returns 200 — which surfaces here as go-git +// reading a just-written ref back as zero bytes ("ref file is empty"). Forcing +// the legacy behavior sends a plain body that every S3 implementation accepts. func Harden(c s3Client) s3Client { hc := newHardenedHTTPClient() return resilientClient{ s3Client: c, - opt: func(o *s3.Options) { o.HTTPClient = hc }, + opt: func(o *s3.Options) { + o.HTTPClient = hc + o.RequestChecksumCalculation = aws.RequestChecksumCalculationWhenRequired + o.ResponseChecksumValidation = aws.ResponseChecksumValidationWhenRequired + }, } } From 5bb834b6b08b037676d07925bb33679594e91c84 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 26 Jun 2026 13:26:54 -0400 Subject: [PATCH 3/4] fix(s3fs): create directory markers as directories, not empty files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MkdirAll wrote a zero-byte marker at the exact key (e.g. "refs/heads"), which lists as a regular file. go-git's reference walker then read it as a ref and failed the receive-pack advertisement with "ref file is empty" — so a first push to a fresh per-repo bucket never completed. Write the marker with a trailing separator ("refs/heads/") so it lists as a CommonPrefix (directory); listChildren already skips the trailing-slash self-marker. Also key the marker through fs3.key and short-circuit the bucket root. Adds regression coverage that git.Init at the bucket root advertises references cleanly, and that MkdirAll yields a directory entry. Assisted-by: Claude Opus 4.8 via Claude Code Signed-off-by: Xe Iaso --- internal/s3fs/dir.go | 21 +++- internal/s3fs/gitinit_test.go | 206 ++++++++++++++++++++++++++++++++++ 2 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 internal/s3fs/gitinit_test.go diff --git a/internal/s3fs/dir.go b/internal/s3fs/dir.go index 45417ad..3d00804 100644 --- a/internal/s3fs/dir.go +++ b/internal/s3fs/dir.go @@ -150,15 +150,28 @@ func (fs3 *S3FS) ReadDir(dir string) ([]fs.DirEntry, error) { // perm are used for all directories that MkdirAll creates. If path is/ // already a directory, MkdirAll does nothing and returns nil. func (fs3 *S3FS) MkdirAll(filename string, perm os.FileMode) error { + key := fs3.key(filename) + if key == "" || key == "." { + // The bucket root always exists; nothing to create. + return nil + } + + // S3 has no directories. Write a zero-byte marker whose key ends in the + // separator so listings surface it as a CommonPrefix (a directory). Without + // the trailing slash the marker lists as an empty regular file, which makes + // go-git's reference walker read e.g. "refs/heads" as a ref and fail with + // "ref file is empty"; listChildren already skips the trailing-slash + // self-marker (full == prefix). + markerKey := key + fs3.separator start := time.Now() _, err := fs3.client.PutObject(context.TODO(), &s3.PutObjectInput{ - Bucket: new(fs3.bucket), - Key: new(filename), - Body: bytes.NewBuffer(nil), + Bucket: &fs3.bucket, + Key: &markerKey, + Body: bytes.NewReader(nil), }) observeS3("PutObject", start, err) if err == nil && fs3.cache != nil { - prefix, _ := splitKey(fs3.key(filename)) + prefix, _ := splitKey(key) fs3.cache.invalidate(prefix) } diff --git a/internal/s3fs/gitinit_test.go b/internal/s3fs/gitinit_test.go new file mode 100644 index 0000000..2ebfc31 --- /dev/null +++ b/internal/s3fs/gitinit_test.go @@ -0,0 +1,206 @@ +package s3fs + +import ( + "bytes" + "context" + "io" + "sort" + "strings" + "sync" + "testing" + "time" + + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/service/s3" + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/aws/smithy-go" + git "github.com/go-git/go-git/v6" + "github.com/go-git/go-git/v6/plumbing" + "github.com/go-git/go-git/v6/plumbing/cache" + "github.com/go-git/go-git/v6/storage/filesystem" +) + +// byteStub is an in-memory s3Client that actually stores object bytes, so we can +// verify a write/read round-trip (unlike stubClient, which tracks only sizes). +type byteStub struct { + mu sync.Mutex + objs map[string][]byte +} + +func newByteStub() *byteStub { return &byteStub{objs: map[string][]byte{}} } + +func (s *byteStub) get(k string) ([]byte, bool) { + s.mu.Lock() + defer s.mu.Unlock() + b, ok := s.objs[k] + return b, ok +} + +func (s *byteStub) HeadObject(_ context.Context, in *s3.HeadObjectInput, _ ...func(*s3.Options)) (*s3.HeadObjectOutput, error) { + if b, ok := s.get(aws.ToString(in.Key)); ok { + return &s3.HeadObjectOutput{ContentLength: aws.Int64(int64(len(b))), LastModified: aws.Time(time.Unix(0, 0))}, nil + } + return nil, &smithy.GenericAPIError{Code: "NotFound"} +} + +func (s *byteStub) GetObject(_ context.Context, in *s3.GetObjectInput, _ ...func(*s3.Options)) (*s3.GetObjectOutput, error) { + b, ok := s.get(aws.ToString(in.Key)) + if !ok { + return nil, &smithy.GenericAPIError{Code: "NoSuchKey"} + } + return &s3.GetObjectOutput{ + Body: io.NopCloser(bytes.NewReader(b)), + ContentLength: aws.Int64(int64(len(b))), + LastModified: aws.Time(time.Unix(0, 0)), + }, nil +} + +func (s *byteStub) PutObject(_ context.Context, in *s3.PutObjectInput, _ ...func(*s3.Options)) (*s3.PutObjectOutput, error) { + var buf []byte + if in.Body != nil { + b, err := io.ReadAll(in.Body) + if err != nil { + return nil, err + } + buf = b + } + s.mu.Lock() + s.objs[aws.ToString(in.Key)] = buf + s.mu.Unlock() + return &s3.PutObjectOutput{}, nil +} + +func (s *byteStub) ListObjectsV2(_ context.Context, in *s3.ListObjectsV2Input, _ ...func(*s3.Options)) (*s3.ListObjectsV2Output, error) { + prefix := aws.ToString(in.Prefix) + delim := aws.ToString(in.Delimiter) + s.mu.Lock() + ks := make([]string, 0, len(s.objs)) + for k := range s.objs { + ks = append(ks, k) + } + s.mu.Unlock() + sort.Strings(ks) + + seen := map[string]bool{} + out := &s3.ListObjectsV2Output{} + for _, k := range ks { + if !strings.HasPrefix(k, prefix) { + continue + } + rest := k[len(prefix):] + if delim != "" { + if i := strings.Index(rest, delim); i >= 0 { + cp := prefix + rest[:i+1] + if !seen[cp] { + seen[cp] = true + out.CommonPrefixes = append(out.CommonPrefixes, types.CommonPrefix{Prefix: aws.String(cp)}) + } + continue + } + } + out.Contents = append(out.Contents, types.Object{Key: aws.String(k), Size: aws.Int64(0)}) + } + return out, nil +} + +func (s *byteStub) DeleteObject(_ context.Context, in *s3.DeleteObjectInput, _ ...func(*s3.Options)) (*s3.DeleteObjectOutput, error) { + s.mu.Lock() + delete(s.objs, aws.ToString(in.Key)) + s.mu.Unlock() + return &s3.DeleteObjectOutput{}, nil +} + +func (s *byteStub) RenameObject(_ context.Context, in *s3.CopyObjectInput, _ ...func(*s3.Options)) (*s3.CopyObjectOutput, error) { + src := strings.TrimPrefix(aws.ToString(in.CopySource), aws.ToString(in.Bucket)+"/") + if b, ok := s.get(src); ok { + s.mu.Lock() + s.objs[aws.ToString(in.Key)] = b + delete(s.objs, src) + s.mu.Unlock() + } + return &s3.CopyObjectOutput{}, nil +} + +func (s *byteStub) CreateMultipartUpload(context.Context, *s3.CreateMultipartUploadInput, ...func(*s3.Options)) (*s3.CreateMultipartUploadOutput, error) { + panic("unexpected multipart upload in init test") +} +func (s *byteStub) UploadPart(context.Context, *s3.UploadPartInput, ...func(*s3.Options)) (*s3.UploadPartOutput, error) { + panic("unexpected multipart upload in init test") +} +func (s *byteStub) CompleteMultipartUpload(context.Context, *s3.CompleteMultipartUploadInput, ...func(*s3.Options)) (*s3.CompleteMultipartUploadOutput, error) { + panic("unexpected multipart upload in init test") +} + +// TestInitAtBucketRootAdvertisesRefs reproduces the per-keypair (root="") flow: +// init a bare repo directly at the bucket root, then read HEAD and iterate all +// references the way the receive-pack advertisement does. +// +// Iterating references is the part that regressed: git.Init's MkdirAll creates +// refs/heads and refs/tags directory markers, and if those land as zero-byte +// objects without a trailing slash they list as empty *files*, so go-git reads +// them as references and fails with "ref file is empty". The marker must be a +// directory (trailing-slash key) instead. +func TestInitAtBucketRootAdvertisesRefs(t *testing.T) { + stub := newByteStub() + fsys, err := NewS3FS(stub, "repo-bucket") + if err != nil { + t.Fatalf("NewS3FS: %v", err) + } + + st := filesystem.NewStorage(fsys, cache.NewObjectLRUDefault()) + if _, err := git.Init(st, git.WithDefaultBranch(plumbing.NewBranchReferenceName("main"))); err != nil { + t.Fatalf("git.Init: %v", err) + } + + head, ok := stub.get("HEAD") + t.Logf("HEAD object present=%v bytes=%q", ok, string(head)) + + ref, err := st.Reference(plumbing.HEAD) + if err != nil { + t.Fatalf("read HEAD reference: %v", err) + } + if got, want := ref.Target().String(), "refs/heads/main"; got != want { + t.Errorf("HEAD target = %q, want %q", got, want) + } + + // The advertisement walks every reference; the directory markers must not be + // read as empty ref files. + it, err := st.IterReferences() + if err != nil { + t.Fatalf("IterReferences: %v", err) + } + defer it.Close() + if err := it.ForEach(func(*plumbing.Reference) error { return nil }); err != nil { + t.Fatalf("iterating references (advertisement) failed: %v", err) + } +} + +// TestMkdirAllCreatesDirectoryMarker asserts a created directory lists as a +// directory entry, not a regular file — the property go-git's ref walker needs. +func TestMkdirAllCreatesDirectoryMarker(t *testing.T) { + stub := newByteStub() + fsys, err := NewS3FS(stub, "repo-bucket") + if err != nil { + t.Fatalf("NewS3FS: %v", err) + } + if err := fsys.MkdirAll("refs/heads", 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + entries, err := fsys.ReadDir("refs") + if err != nil { + t.Fatalf("ReadDir(refs): %v", err) + } + var found bool + for _, e := range entries { + if e.Name() == "heads" { + found = true + if !e.IsDir() { + t.Errorf(`ReadDir(refs): "heads" is a file, want a directory`) + } + } + } + if !found { + t.Errorf(`ReadDir(refs) did not list "heads"; got %v`, entries) + } +} From e90cae851dfab01537015dc0bdecf3c513390ea3 Mon Sep 17 00:00:00 2001 From: Xe Iaso Date: Fri, 26 Jun 2026 13:27:02 -0400 Subject: [PATCH 4/4] docs: add per-repo storage design plans Captures the design for per-repo {orgID}/{repoName} filesystem dispatch and the per-keypair bucket-per-repo Tigris resolver. Assisted-by: Claude Opus 4.8 via Claude Code Signed-off-by: Xe Iaso --- docs/plans/per-repo-bucket.md | 176 +++++++++++++++++++++++ docs/plans/per-repo-fs-dispatch.md | 215 +++++++++++++++++++++++++++++ 2 files changed, 391 insertions(+) create mode 100644 docs/plans/per-repo-bucket.md create mode 100644 docs/plans/per-repo-fs-dispatch.md diff --git a/docs/plans/per-repo-bucket.md b/docs/plans/per-repo-bucket.md new file mode 100644 index 0000000..3ffddc6 --- /dev/null +++ b/docs/plans/per-repo-bucket.md @@ -0,0 +1,176 @@ +# Per-keypair Tigris resolver: a bucket per repo + +## Context + +The per-repo `repofs.Resolver` hook is already in place: every git request resolves +to a `billy.Filesystem` via `Resolve(ctx, ref, cred)`, and the HTTP Basic-auth +pair already arrives as `repofs.Credential{Username, Password}`. The default +`BucketResolver` chroots one shared bucket. + +We now want a real backend: treat the Basic-auth credential as a **Tigris +keypair** (username = access key ID, password = secret access key), build a +`storage.Client` per keypair, and give **each repository its own Tigris bucket**, +created on first push. This replaces the single-shared-bucket model as the +production default. + +Decisions (confirmed): + +- **Credential = keypair**: `username` → access key ID, `password` → secret. +- **Bucket name**: `objgit-{base36(sha256(orgID/repoName))[:N]}` — deterministic, + DNS-valid (lowercase alnum), collision-free. +- **Create on push only**: the resolver creates the bucket only on the write + path; reads of a missing bucket are a 404. +- **Replace default**: `main.go` wires the Tigris resolver. `BucketResolver` + stays in `repofs` for tests (memfs), just not wired in production. +- **All S3 ops go through `github.com/tigrisdata/storage-go`** (`*storage.Client`, + which embeds `*s3.Client`); never construct a bare AWS `s3.Client`. + +## 1. Gate creation on the Resolver interface (`internal/repofs`) + +`Resolve` needs to know read vs. write so it only creates buckets on push. Add a +`create bool`: + +```go +type Resolver interface { + Resolve(ctx context.Context, ref RepoRef, cred Credential, create bool) (billy.Filesystem, error) +} +``` + +- `BucketResolver.Resolve` ignores `create` (chroot is creation-free). +- `daemon.load` (read) passes `create=false`; `daemon.loadOrInit` (push) passes + `create=true` (`cmd/objgitd/git_protocol.go`). +- Update the `recordingResolver` test stub in `cmd/objgitd/http_test.go`. + +Add a `BucketName()` helper — but put it in the Tigris package (below), since the +`objgit-` prefix and hashing are storage policy, not neutral identity. + +## 2. New package: `internal/tigrisfs` + +The concrete, Tigris-backed `repofs.Resolver`. Depends on `storage-go`, `s3fs`, +and (for the not-found sentinel) go-git `transport`. Keeping it out of `repofs` +preserves `repofs`'s transport/storage neutrality. + +```go +package tigrisfs + +// Resolver implements repofs.Resolver against Tigris, one bucket per repo. +type Resolver struct { + // newClient builds a storage.Client from a keypair. Defaults to + // storage.New(ctx, storage.WithAccessKeypair(id, secret)); overridable for tests. + newClient func(ctx context.Context, cred repofs.Credential) (*storage.Client, error) + fsOpts []s3fs.Option // listing/pack cache opts applied per-bucket S3FS + + mu sync.Mutex + clients map[string]*cachedClient // keyed by access key ID (cred.Username) +} + +type cachedClient struct { + raw *storage.Client // bucket ops (CreateBucket/HeadBucket) go here + hardened s3fs.S3Client // object I/O handed to s3fs (see note on Harden) +} +``` + +`Resolve`: + +1. Reject empty `cred.Username`/`cred.Password` (auth required → surfaces as 401 + at the HTTP layer; see error mapping). +2. Look up / build the cached client for `cred.Username` (build via `newClient`, + then `hardened = s3fs.Harden(raw)`). Cache under a mutex. +3. `bucket := bucketName(ref)`. +4. If `create`: `ensureBucket(ctx, raw, bucket)` — `CreateBucket`; treat + `*types.BucketAlreadyOwnedByYou` / `*types.BucketAlreadyExists` (via + `errors.As`) as success. + Else: `HeadBucket`; on `*types.NotFound`/`*types.NoSuchBucket` return + `transport.ErrRepositoryNotFound`. +5. Return `s3fs.NewS3FS(hardened, bucket, r.fsOpts...)` (root `""` — the bucket + _is_ the repo). + +```go +func bucketName(ref repofs.RepoRef) string { + sum := sha256.Sum256([]byte(ref.Path())) + b36 := new(big.Int).SetBytes(sum[:]).Text(36) // 0-9a-z + // left-pad to a fixed width so truncation is deterministic, then take N. + return "objgit-" + leftPad(b36, 50, '0')[:32] // "objgit-" + 32 = 39 chars, < 63 +} +``` + +**Harden note:** `s3fs.Harden` returns the 9-method object-only `s3Client` +wrapper — it does **not** expose `CreateBucket`/`HeadBucket`. So the resolver +keeps the raw `*storage.Client` for the (rare) bucket calls and hands the +hardened wrapper to `s3fs` for the hot object path. Both are storage-go clients; +no bare AWS client is created. + +**s3fs export:** `s3fs.NewS3FS` currently takes an unexported `s3Client` +interface. An external package can still satisfy it (method set is exported), but +to name the field type in `cachedClient` cleanly, export the interface as +`s3fs.S3Client` (alias/rename of the existing `s3Client`). Small, mechanical +change in `internal/s3fs/filesystem.go`. + +## 3. `main.go` wiring (replace default) + +- Read the keypair-mode resolver instead of `BucketResolver`: + `resolver: tigrisfs.New(tigrisfs.WithFSOptions(fsOpts...))`. +- The default `newClient` does `storage.New(ctx, storage.WithAccessKeypair(...))` + (endpoint defaults to the global Tigris endpoint; add a `-tigris-endpoint` + flag later if needed). +- `sysFS` (SSH host key) still uses the ambient `-bucket` `fsys` built today; + repos no longer use it. The existing `-bucket` flag becomes "system bucket" + (host key only). Note this in flag help. +- Per-keypair `ListingCache`/`PackCache`: the caches `main` builds today are + bound to one bucket and no longer fit a bucket-per-repo world. For this pass, + pass no per-bucket caches (or a bounded per-(keyID,bucket) cache later); + **log that repo-side caching is disabled** so it isn't mistaken for working. + +## 4. Error mapping + +- Empty/invalid credential → resolver returns a sentinel (`tigrisfs.ErrNoCredential`); + HTTP `resolve` maps it to `401 WWW-Authenticate: Basic` (reuse the existing + `auth.Unauthenticated` rendering path, or special-case the error). +- Missing bucket on read → `transport.ErrRepositoryNotFound` → existing 404 path. +- A bad keypair surfaces as an S3 `AccessDenied` mid-call; log and return 500 + (acceptable for now — real authz is a later seam). + +## 5. Caching & concurrency + +- One `cachedClient` per access key ID, guarded by a mutex (or `sync.Map`). + Building a `storage.Client` loads AWS config (network-free) and is the main + cost we're avoiding per request. +- Bound the map (simple max or LRU, e.g. 1024 keypairs) as a follow-up; note the + unbounded-growth risk in a comment for now. + +## 6. Tests + +- `internal/tigrisfs/tigrisfs_test.go` (unit, no network): + - `bucketName` is deterministic, `objgit-`-prefixed, ≤ 63 chars, lowercase + alnum, and differs for different `orgID/repoName`. + - Empty credential → `ErrNoCredential` (checked before `newClient`). + - `create` gating: with a fake `newClient` returning a client whose bucket ops + are observable, assert `CreateBucket` is called iff `create==true` and + `HeadBucket` otherwise. (Use a small fake satisfying the bucket-op + object + method set; inject via `newClient`.) +- Integration test gated by real creds (skip when + `TIGRIS_STORAGE_ACCESS_KEY_ID`/`_SECRET_ACCESS_KEY` unset — see the + `tigris-storage` skill's `skipIfNoCreds` pattern): push to + `acme/itest-.git`, assert the bucket is created and a clone round-trips; + clean up the bucket after. +- `internal/repofs` and `cmd/objgitd` existing tests keep using `BucketResolver` + (memfs); update them only for the new `create` parameter. + +## Verification + +```text +go build ./... +go test ./internal/repofs/... ./internal/tigrisfs/... ./cmd/objgitd/... +``` + +End-to-end against real Tigris (credentials in the AWS/Tigris env): + +```text +./objgitd -bucket $SYS_BUCKET -http-bind :8080 -allow-push +# username = Tigris access key ID, password = secret access key +git clone http://$KEYID:$SECRET@localhost:8080/acme/demo.git # first push creates bucket objgit- +# verify the bucket exists: +tigris bucket list | grep objgit- +git clone http://$KEYID:$SECRET@localhost:8080/acme/demo.git # second clone reuses cached client + bucket +git clone http://localhost:8080/acme/demo.git # no creds -> 401 +``` diff --git a/docs/plans/per-repo-fs-dispatch.md b/docs/plans/per-repo-fs-dispatch.md new file mode 100644 index 0000000..2a9fd0b --- /dev/null +++ b/docs/plans/per-repo-fs-dispatch.md @@ -0,0 +1,215 @@ +# Per-repo filesystem resolution + `{orgID}/{repoName}` paths (HTTP focus) + +## Context + +Today the `daemon` holds a single static `fs billy.Filesystem` (the whole bucket) +and a single `loader transport.Loader`, both built once in `main.go`. Every +transport passes a **raw, unvalidated, variable-depth** path straight into +`auth.Request.Repo` and into `load`/`loadOrInit`, which `Chroot`s the one bucket +fs by that path. + +We want two coupled changes: + +1. **Restrict repo paths to `{orgID}/{repoName}`** — `orgID` is an opaque + reference a later API call will validate; for now it's accepted as-is. Paths + that aren't exactly two segments are rejected. The `.git` suffix is stripped + from the repo name (`org/repo.git` and `org/repo` resolve to the same repo; + storage key `org/repo/`). +2. **Discover the billy filesystem per-repo via a pluggable hook**, and **pass + the HTTP Basic-auth username/password into that hook** so a real backend can + route an org to its own bucket/credentials based on who's calling. The + default hook preserves today's behavior (chroot the one bucket fs, ignoring + the credential). + +**Scope:** this pass targets the **HTTP** transport. SSH is explicitly out of +scope. The shared resolution layer is transport-agnostic, so git:// and SSH get +only the mechanical edits needed to keep compiling (they pass an empty +credential); their auth semantics are unchanged. + +## New package: `internal/repofs` + +Transport-neutral, mirroring how `internal/auth` is structured. Imports only +`context`, `errors`, `path`, `strings`, and `go-billy/v6`. + +```go +package repofs + +var ErrInvalidPath = errors.New("repository path must be of the form {orgID}/{repoName}") + +// RepoRef identifies a repository. OrgID is opaque (validated later); Name has +// any trailing ".git" stripped. +type RepoRef struct { + OrgID string + Name string +} + +// Path is the canonical storage/identity path "orgID/name". +func (r RepoRef) Path() string { return path.Join(r.OrgID, r.Name) } + +// Parse trims surrounding slashes, requires exactly two non-empty segments, +// and strips a trailing ".git" from the name. OrgID is not otherwise validated. +func Parse(raw string) (RepoRef, error) + +// Credential carries the HTTP Basic-auth username/password (zero value = none). +// Unvalidated; the Resolver decides what to do with it. +type Credential struct { + Username string + Password string +} + +// Resolver maps a RepoRef (plus the caller's credential) to the +// billy.Filesystem rooted at that repository. This is the hook a real backend +// implements to route an org to its bucket. +type Resolver interface { + Resolve(ctx context.Context, ref RepoRef, cred Credential) (billy.Filesystem, error) +} + +// BucketResolver is the default Resolver: chroot one base filesystem (the whole +// bucket) to ref.Path(), ignoring the credential. Preserves current behavior. +type BucketResolver struct{ Base billy.Filesystem } +func (b BucketResolver) Resolve(_ context.Context, ref RepoRef, _ Credential) (billy.Filesystem, error) { + return b.Base.Chroot(ref.Path()) +} +``` + +`Parse` is the single validation path. Add unit tests for valid input, +missing/extra segments, empty segments, trailing slash, and `.git` stripping. + +## `daemon` changes (`cmd/objgitd/git_protocol.go`) + +Replace the `fs` and `loader` fields: + +```go +type daemon struct { + sysFS billy.Filesystem // bucket-level storage (SSH host key); NOT repo-scoped + resolver repofs.Resolver + authz auth.Authorizer + allowHooks bool + hookTimeout time.Duration +} +``` + +Rewrite resolution to go through the hook (threading the credential), building +the storer per resolved fs. Reuse go-git's bare-repo detection +(`FilesystemLoader.load` returns `ErrRepositoryNotFound` when no `config` exists +at the chroot root): + +```go +// storerFor returns the bare-repo storer rooted at fs, or +// transport.ErrRepositoryNotFound when none exists there. +func storerFor(fs billy.Filesystem) (storage.Storer, error) { + return transport.NewFilesystemLoader(fs, false).Load(&url.URL{Path: "/"}) +} + +func (d *daemon) load(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential) (storage.Storer, error) { + fs, err := d.resolver.Resolve(ctx, ref, cred) + if err != nil { return nil, err } + st, err := storerFor(fs) + if err != nil { return nil, err } + if err := ensureHEAD(st); err != nil { slog.Warn("...", "repo", ref.Path(), "err", err) } + return st, nil +} + +func (d *daemon) loadOrInit(ctx context.Context, ref repofs.RepoRef, cred repofs.Credential) (storage.Storer, error) { + fs, err := d.resolver.Resolve(ctx, ref, cred) + if err != nil { return nil, err } + st, err := storerFor(fs) + if err == nil { ensureHEAD(st); return st, nil } + if !errors.Is(err, transport.ErrRepositoryNotFound) { return nil, err } + st = filesystem.NewStorage(fs, cache.NewObjectLRUDefault()) + if _, err := git.Init(st, git.WithDefaultBranch(plumbing.NewBranchReferenceName("main"))); err != nil { + return nil, fmt.Errorf("init bare repo: %w", err) + } + metrics.ReposCreated() + slog.Info("created repository", "repo", ref.Path()) + return st, nil +} +``` + +The old `d.fs.Chroot(repoPath)` step is gone — `Resolve` returns the repo-root +fs directly, so resolution happens once per request. + +## HTTP transport (`cmd/objgitd/http.go` + `main.go`) — primary work + +Replace the suffix-dispatch `ServeHTTP` with an `http.ServeMux` (built by a new +`d.httpHandler()` method, wired in `main.go` as the server `Handler`). With a +fixed two-segment path the wildcards the old code couldn't use now work: + +- `GET /{orgID}/{repoName}/info/refs` +- `POST /{orgID}/{repoName}/git-upload-pack` +- `POST /{orgID}/{repoName}/git-receive-pack` + +Handlers read `r.PathValue("orgID")`/`r.PathValue("repoName")`, build the ref via +`repofs.Parse(path.Join(orgID, repoName))`, and 400 on `ErrInvalidPath`. +ServeMux 404s anything that isn't exactly two segments before the suffix, so the +shape is enforced for free. + +`resolve` extracts the Basic-auth credential and threads it through: + +```go +func credFromRequest(r *http.Request) (auth.Credential, repofs.Credential) { + if u, p, ok := r.BasicAuth(); ok { + return auth.BasicAuth{Username: u, Password: p}, repofs.Credential{Username: u, Password: p} + } + return auth.Anonymous{}, repofs.Credential{} +} +``` + +(or keep the existing `auth` credential helper and build the `repofs.Credential` +inline). `resolve`, `handleInfoRefs`, `handleRPC`, and `d.receivePack` change +their `repoPath string` parameter to a `repofs.RepoRef`; `resolve` passes the +`repofs.Credential` to `load`/`loadOrInit`. Logging/hook context uses +`ref.Path()`. Remove the variable-depth comment block and the now-unused +`strings` import if it drops out. + +## git:// and SSH — mechanical only (out of scope) + +`git_protocol.go handle` and `ssh.go handleSSH` must adapt to the new +`load`/`loadOrInit` signatures: parse their raw path with `repofs.Parse` +(rendering `ErrInvalidPath` in their own dialect — pktline error / stderr+exit) +and pass an empty `repofs.Credential{}`. `ssh.go`'s host-key load switches from +`d.fs` to `d.sysFS`. No further redesign of these transports. + +## `main.go` changes + +- Keep building the base bucket fs (`fsys`) as today. +- `d := &daemon{ sysFS: fsys, resolver: repofs.BucketResolver{Base: fsys}, authz: ..., allowHooks: ..., hookTimeout: ... }` — drop the `loader` field. +- HTTP server `Handler: d.httpHandler()` instead of `Handler: d`. +- Drop the `transport.NewFilesystemLoader` call; remove the `transport` import + from `main.go` if it becomes unused. + +## Behavioral note / migration + +Stripping `.git` and requiring an org changes the storage key from `repo.git/` +to `org/repo/`. Repos created under the old layout won't resolve under the new +scheme. Acceptable for the current stage; no migration is in scope. + +## Tests + +- New `internal/repofs/repofs_test.go` — table-driven `Parse` cases (and a tiny + `BucketResolver.Resolve` check that it chroots to `ref.Path()`). +- Update `cmd/objgitd/http_test.go` (and the shared helpers in + `git_protocol_test.go` it reuses): remotes gain an org segment (`/test.git` + → `/acme/test.git`), and storage-key assertions drop `.git` + (`/test.git/config` → `/acme/test/config`; `assertPackedRepo(t, fs, +"/acme/test")`). The git:// tests in `git_protocol_test.go` need the same path + updates to keep passing. +- Optionally add an HTTP test that a single-segment path returns 404 and that a + Basic-auth credential reaches a stub resolver. + +## Verification + +```text +go build ./... +go test ./internal/repofs/... +go test -run TestSmartHTTP ./cmd/objgitd/... # requires git on PATH +go test ./cmd/objgitd/... +``` + +End-to-end against a real bucket: + +```text +./objgitd -bucket $BUCKET -http-bind :8080 -allow-push +git clone http://user:pass@localhost:8080/acme/demo.git # creates acme/demo/ on first push; user/pass reach the resolver +git clone http://localhost:8080/demo.git # single segment -> 404 +```