From fb8a8ef41a4003d7055c66cf830bb05a2741cfbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=9Awi=C4=99s?= <10480513+pswies@users.noreply.github.com> Date: Fri, 3 Jul 2026 15:32:42 +0200 Subject: [PATCH 1/3] Fix indexing of numeric annotation values of 2^63 or more Saving an entity with a numeric annotation of 2^63 or larger killed the event follower: SQLite stores integers as signed 64-bit, and Go's database/sql refuses to bind a uint64 with the high bit set, so the insert failed with "uint64 values with high bit set are not supported" and the follower stopped permanently. Every arkiv_query then timed out ("context cancelled") until restart, and re-broke on the same entity. Store the value's 8 bytes reinterpreted as int64 instead (two's complement, lossless). Values below 2^63 keep the exact same stored form, so existing databases need no migration and wedged nodes recover by simply resuming. Equality and IN lookups stay exact for all values. Known trade-off, left as follow-up: values of 2^63+ sort as negative in SQL, so range comparisons order that extreme band incorrectly. Co-Authored-By: Claude Fable 5 --- bitmap_cache.go | 8 ++-- query/evaluate.go | 16 ++++---- sqlitestore_test.go | 89 ++++++++++++++++++++++++++++++++++++++++++ store/evals.sql.go | 16 ++++---- store/models.go | 2 +- store/numeric_value.go | 39 ++++++++++++++++++ store/queries.sql.go | 6 +-- store/sqlc.yaml | 6 ++- 8 files changed, 157 insertions(+), 25 deletions(-) create mode 100644 store/numeric_value.go diff --git a/bitmap_cache.go b/bitmap_cache.go index 13ed061..c0ee59c 100644 --- a/bitmap_cache.go +++ b/bitmap_cache.go @@ -80,7 +80,7 @@ func (c *bitmapCache) AddToNumericBitmap(ctx context.Context, name string, value k := nameValue[uint64]{name: name, value: value} bitmap, ok := c.numericBitmaps[k] if !ok { - bitmap, err = c.st.GetNumericAttributeValueBitmap(ctx, store.GetNumericAttributeValueBitmapParams{Name: name, Value: value}) + bitmap, err = c.st.GetNumericAttributeValueBitmap(ctx, store.GetNumericAttributeValueBitmapParams{Name: name, Value: store.NumericValueToSQL(value)}) if err != nil && err != sql.ErrNoRows { return fmt.Errorf("failed to get numeric attribute %q value %q bitmap: %w", name, value, err) @@ -103,7 +103,7 @@ func (c *bitmapCache) RemoveFromNumericBitmap(ctx context.Context, name string, k := nameValue[uint64]{name: name, value: value} bitmap, ok := c.numericBitmaps[k] if !ok { - bitmap, err = c.st.GetNumericAttributeValueBitmap(ctx, store.GetNumericAttributeValueBitmapParams{Name: name, Value: value}) + bitmap, err = c.st.GetNumericAttributeValueBitmap(ctx, store.GetNumericAttributeValueBitmapParams{Name: name, Value: store.NumericValueToSQL(value)}) if err != nil && err != sql.ErrNoRows { return fmt.Errorf("failed to get numeric attribute %q value %q bitmap: %w", name, value, err) @@ -172,14 +172,14 @@ func (c *bitmapCache) Flush(ctx context.Context) (err error) { for k, bitmap := range c.numericBitmaps { if bitmap.IsEmpty() { - err = c.st.DeleteNumericAttributeValueBitmap(ctx, store.DeleteNumericAttributeValueBitmapParams{Name: k.name, Value: k.value}) + err = c.st.DeleteNumericAttributeValueBitmap(ctx, store.DeleteNumericAttributeValueBitmapParams{Name: k.name, Value: store.NumericValueToSQL(k.value)}) if err != nil { return fmt.Errorf("failed to delete numeric attribute %q value %q bitmap: %w", k.name, k.value, err) } continue } - err = c.st.UpsertNumericAttributeValueBitmap(ctx, store.UpsertNumericAttributeValueBitmapParams{Name: k.name, Value: k.value, Bitmap: bitmap}) + err = c.st.UpsertNumericAttributeValueBitmap(ctx, store.UpsertNumericAttributeValueBitmapParams{Name: k.name, Value: store.NumericValueToSQL(k.value), Bitmap: bitmap}) if err != nil { return fmt.Errorf("failed to upsert numeric attribute %q value %q bitmap: %w", k.name, k.value, err) } diff --git a/query/evaluate.go b/query/evaluate.go index c9cb3df..005305b 100644 --- a/query/evaluate.go +++ b/query/evaluate.go @@ -150,7 +150,7 @@ func (e *LessThan) Evaluate( } else { bitmaps, err = q.EvaluateNumericAttributeValueLowerThan(ctx, store.EvaluateNumericAttributeValueLowerThanParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err != nil { return nil, err @@ -184,7 +184,7 @@ func (e *LessOrEqualThan) Evaluate( } else { bitmaps, err = q.EvaluateNumericAttributeValueLessOrEqualThan(ctx, store.EvaluateNumericAttributeValueLessOrEqualThanParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err != nil { return nil, err @@ -219,7 +219,7 @@ func (e *GreaterThan) Evaluate( } else { bitmaps, err = q.EvaluateNumericAttributeValueGreaterThan(ctx, store.EvaluateNumericAttributeValueGreaterThanParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err != nil { return nil, err @@ -254,7 +254,7 @@ func (e *GreaterOrEqualThan) Evaluate( } else { bitmaps, err = q.EvaluateNumericAttributeValueGreaterOrEqualThan(ctx, store.EvaluateNumericAttributeValueGreaterOrEqualThanParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err != nil { return nil, err @@ -318,7 +318,7 @@ func (e *Equality) Evaluate( var bitmaps []*store.Bitmap bitmaps, err = q.EvaluateNumericAttributeValueNotEqual(ctx, store.EvaluateNumericAttributeValueNotEqualParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err != nil { return nil, err @@ -333,7 +333,7 @@ func (e *Equality) Evaluate( } else { bitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ Name: e.Var, - Value: *e.Value.Number, + Value: store.NumericValueToSQL(*e.Value.Number), }) if err == sql.ErrNoRows { @@ -389,7 +389,7 @@ func (e *Inclusion) Evaluate( if e.IsNot { bitmaps, err = q.EvaluateNumericAttributeValueNotInclusion(ctx, store.EvaluateNumericAttributeValueNotInclusionParams{ Name: e.Var, - Values: e.Values.Numbers, + Values: store.NumericValuesToSQL(e.Values.Numbers), }) if err != nil { return nil, err @@ -397,7 +397,7 @@ func (e *Inclusion) Evaluate( } else { bitmaps, err = q.EvaluateNumericAttributeValueInclusion(ctx, store.EvaluateNumericAttributeValueInclusionParams{ Name: e.Var, - Values: e.Values.Numbers, + Values: store.NumericValuesToSQL(e.Values.Numbers), }) if err != nil { return nil, err diff --git a/sqlitestore_test.go b/sqlitestore_test.go index bf0c8f0..e7ff197 100644 --- a/sqlitestore_test.go +++ b/sqlitestore_test.go @@ -4,6 +4,7 @@ import ( "context" "errors" "log/slog" + "math" "os" "path/filepath" "strings" @@ -1101,4 +1102,92 @@ var _ = Describe("SQLiteStore", func() { Expect(err).NotTo(HaveOccurred()) }) }) + + Describe("FollowEvents with numeric attribute values of 2^63 or more", func() { + It("should index the entity and find it again by equality", func() { + // Regression test: values with the top bit set used to fail at the + // database layer ("uint64 values with high bit set are not + // supported"), which permanently stopped the event follower. + iterator := pusher.NewPushIterator() + + key := common.HexToHash("0x3333333333333333333333333333333333333333333333333333333333333333") + owner := common.HexToAddress("0x1234567890123456789012345678901234567890") + + batch := events.BlockBatch{ + Blocks: []events.Block{ + { + Number: 100, + Operations: []events.Operation{ + { + TxIndex: 0, + OpIndex: 0, + Create: &events.OPCreate{ + Key: key, + ContentType: "application/json", + BTL: 1000, + Owner: owner, + Content: []byte(`{"name": "huge numbers"}`), + StringAttributes: map[string]string{ + "type": "huge", + }, + NumericAttributes: map[string]uint64{ + "big": math.MaxUint64, + "alsoBig": 1 << 63, + "small": 7, + }, + }, + }, + }, + }, + }, + } + + go func() { + defer GinkgoRecover() + iterator.Push(ctx, batch) + iterator.Close() + }() + + err := sqlStore.FollowEvents(ctx, arkivevents.BatchIterator(iterator.Iterator())) + Expect(err).NotTo(HaveOccurred()) + + lastBlock, err := sqlStore.GetLastBlock(ctx) + Expect(err).NotTo(HaveOccurred()) + Expect(lastBlock).To(Equal(uint64(100))) + + err = sqlStore.ReadTransaction(ctx, func(q *store.Queries) error { + for name, value := range map[string]uint64{ + "big": math.MaxUint64, + "alsoBig": 1 << 63, + "small": 7, + } { + bitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ + Name: name, + Value: store.NumericValueToSQL(value), + }) + Expect(err).NotTo(HaveOccurred(), "equality lookup for %q", name) + Expect(bitmap).NotTo(BeNil()) + + ids := bitmap.ToArray() + Expect(ids).To(HaveLen(1)) + + payloads, err := q.RetrievePayloads(ctx, ids) + Expect(err).NotTo(HaveOccurred()) + Expect(payloads).To(HaveLen(1)) + Expect(payloads[0].NumericAttributes.Values[name]).To(Equal(value)) + } + + // Inclusion (IN) lookups must round-trip huge values too. + bitmaps, err := q.EvaluateNumericAttributeValueInclusion(ctx, store.EvaluateNumericAttributeValueInclusionParams{ + Name: "big", + Values: store.NumericValuesToSQL([]uint64{math.MaxUint64, 42}), + }) + Expect(err).NotTo(HaveOccurred()) + Expect(bitmaps).To(HaveLen(1)) + + return nil + }) + Expect(err).NotTo(HaveOccurred()) + }) + }) }) diff --git a/store/evals.sql.go b/store/evals.sql.go index 2d10e04..df743b8 100644 --- a/store/evals.sql.go +++ b/store/evals.sql.go @@ -45,7 +45,7 @@ WHERE name = ?1 AND value = ?2 type EvaluateNumericAttributeValueEqualParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueEqual(ctx context.Context, arg EvaluateNumericAttributeValueEqualParams) (*Bitmap, error) { @@ -62,7 +62,7 @@ WHERE name = ?1 AND value >= ?2 type EvaluateNumericAttributeValueGreaterOrEqualThanParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueGreaterOrEqualThan(ctx context.Context, arg EvaluateNumericAttributeValueGreaterOrEqualThanParams) ([]*Bitmap, error) { @@ -95,7 +95,7 @@ WHERE name = ?1 AND value > ?2 type EvaluateNumericAttributeValueGreaterThanParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueGreaterThan(ctx context.Context, arg EvaluateNumericAttributeValueGreaterThanParams) ([]*Bitmap, error) { @@ -128,7 +128,7 @@ WHERE name = ?1 AND value IN (/*SLICE:values*/?) type EvaluateNumericAttributeValueInclusionParams struct { Name string - Values []uint64 + Values []int64 } func (q *Queries) EvaluateNumericAttributeValueInclusion(ctx context.Context, arg EvaluateNumericAttributeValueInclusionParams) ([]*Bitmap, error) { @@ -172,7 +172,7 @@ WHERE name = ?1 AND value <= ?2 type EvaluateNumericAttributeValueLessOrEqualThanParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueLessOrEqualThan(ctx context.Context, arg EvaluateNumericAttributeValueLessOrEqualThanParams) ([]*Bitmap, error) { @@ -205,7 +205,7 @@ WHERE name = ?1 AND value < ?2 type EvaluateNumericAttributeValueLowerThanParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueLowerThan(ctx context.Context, arg EvaluateNumericAttributeValueLowerThanParams) ([]*Bitmap, error) { @@ -238,7 +238,7 @@ WHERE name = ?1 AND value != ?2 type EvaluateNumericAttributeValueNotEqualParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) EvaluateNumericAttributeValueNotEqual(ctx context.Context, arg EvaluateNumericAttributeValueNotEqualParams) ([]*Bitmap, error) { @@ -271,7 +271,7 @@ WHERE name = ?1 AND value NOT IN (/*SLICE:values*/?) type EvaluateNumericAttributeValueNotInclusionParams struct { Name string - Values []uint64 + Values []int64 } func (q *Queries) EvaluateNumericAttributeValueNotInclusion(ctx context.Context, arg EvaluateNumericAttributeValueNotInclusionParams) ([]*Bitmap, error) { diff --git a/store/models.go b/store/models.go index c6b351f..20e3b8d 100644 --- a/store/models.go +++ b/store/models.go @@ -11,7 +11,7 @@ type LastBlock struct { type NumericAttributesValuesBitmap struct { Name string - Value uint64 + Value int64 Bitmap *Bitmap } diff --git a/store/numeric_value.go b/store/numeric_value.go new file mode 100644 index 0000000..715a96c --- /dev/null +++ b/store/numeric_value.go @@ -0,0 +1,39 @@ +package store + +// NumericValueToSQL converts a numeric attribute value (uint64) to the +// form stored in SQLite. +// +// SQLite integers are signed 64-bit, and Go's database/sql layer refuses +// to bind a uint64 with the high bit set (values of 2^63 or more). Before +// this conversion existed, indexing an entity with such a numeric +// attribute failed with: +// +// sql: converting argument $2 type: uint64 values with high bit set are not supported +// +// which permanently stopped the event follower and with it all +// arkiv_query serving. +// +// We store the value's raw 8 bytes reinterpreted as an int64 (two's +// complement). The mapping is one-to-one, so equality and IN lookups stay +// exact for every possible value, and values below 2^63 — which is all +// realistic data — are stored as the same number as before, keeping +// existing databases valid with no migration. +// +// Known trade-off: values of 2^63 or more become negative in SQL order, +// so range comparisons (<, <=, >, >=) place them below small values +// instead of above. Correcting the ordering for that extreme range needs +// an order-preserving re-encoding plus a data migration of existing rows; +// that is intentionally left as a follow-up. +func NumericValueToSQL(v uint64) int64 { + return int64(v) +} + +// NumericValuesToSQL converts a slice of numeric attribute values with +// NumericValueToSQL. +func NumericValuesToSQL(vs []uint64) []int64 { + out := make([]int64, len(vs)) + for i, v := range vs { + out[i] = int64(v) + } + return out +} diff --git a/store/queries.sql.go b/store/queries.sql.go index 0788eb6..6f06d2b 100644 --- a/store/queries.sql.go +++ b/store/queries.sql.go @@ -16,7 +16,7 @@ WHERE name = ? AND value = ? type DeleteNumericAttributeValueBitmapParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) DeleteNumericAttributeValueBitmap(ctx context.Context, arg DeleteNumericAttributeValueBitmapParams) error { @@ -67,7 +67,7 @@ WHERE name = ? AND value = ? type GetNumericAttributeValueBitmapParams struct { Name string - Value uint64 + Value int64 } func (q *Queries) GetNumericAttributeValueBitmap(ctx context.Context, arg GetNumericAttributeValueBitmapParams) (*Bitmap, error) { @@ -142,7 +142,7 @@ ON CONFLICT (name, value) DO UPDATE SET bitmap = excluded.bitmap type UpsertNumericAttributeValueBitmapParams struct { Name string - Value uint64 + Value int64 Bitmap *Bitmap } diff --git a/store/sqlc.yaml b/store/sqlc.yaml index 47aa2ae..b061ebc 100644 --- a/store/sqlc.yaml +++ b/store/sqlc.yaml @@ -24,8 +24,12 @@ sql: go_type: type: "NumericAttributes" pointer: true + # The value column holds uint64 numeric attribute values as their + # int64 bit pattern: SQLite integers are signed 64-bit, and Go's + # database/sql refuses to bind uint64 values >= 2^63. Convert at + # the call sites with NumericValueToSQL / NumericValuesToSQL. - column: "numeric_attributes_values_bitmaps.value" - go_type: "uint64" + go_type: "int64" - column: "string_attributes_values_bitmaps.bitmap" go_type: type: "Bitmap" From 9a4d58088255d8866934f05c0b1f538e4fcf14c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=9Awi=C4=99s?= <10480513+pswies@users.noreply.github.com> Date: Fri, 3 Jul 2026 15:42:02 +0200 Subject: [PATCH 2/3] Add range-query tests pinning behavior around the int64 change Answers the review question "doesn't this break range queries for large numbers?": - For values below 2^63 (all data that could exist before the fix, since larger values crashed the indexer on write) the stored number is unchanged, so <, <=, >, >= behave exactly as before. Pinned by a spec. - For values of 2^63+ the documented trade-off applies: they sort as if negative, so ranges misplace them. A second spec pins that current behavior explicitly so the follow-up (order-preserving encoding + migration) must consciously flip it. Co-Authored-By: Claude Fable 5 --- sqlitestore_test.go | 120 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) diff --git a/sqlitestore_test.go b/sqlitestore_test.go index e7ff197..c6c3aa6 100644 --- a/sqlitestore_test.go +++ b/sqlitestore_test.go @@ -1190,4 +1190,124 @@ var _ = Describe("SQLiteStore", func() { Expect(err).NotTo(HaveOccurred()) }) }) + + Describe("range queries after the int64 storage change", func() { + // Helper: run a numeric range eval and return the set of matching + // payload keys (by their "name" string attribute for readability). + type rangeCase struct { + op string + bound uint64 + } + + var runRange func(q *store.Queries, attr string, c rangeCase) []uint64 + + BeforeEach(func() { + runRange = func(q *store.Queries, attr string, c rangeCase) []uint64 { + var ( + bitmaps []*store.Bitmap + err error + ) + params := struct { + Name string + Value int64 + }{attr, store.NumericValueToSQL(c.bound)} + + switch c.op { + case "<": + bitmaps, err = q.EvaluateNumericAttributeValueLowerThan(ctx, store.EvaluateNumericAttributeValueLowerThanParams(params)) + case "<=": + bitmaps, err = q.EvaluateNumericAttributeValueLessOrEqualThan(ctx, store.EvaluateNumericAttributeValueLessOrEqualThanParams(params)) + case ">": + bitmaps, err = q.EvaluateNumericAttributeValueGreaterThan(ctx, store.EvaluateNumericAttributeValueGreaterThanParams(params)) + case ">=": + bitmaps, err = q.EvaluateNumericAttributeValueGreaterOrEqualThan(ctx, store.EvaluateNumericAttributeValueGreaterOrEqualThanParams(params)) + } + Expect(err).NotTo(HaveOccurred()) + + combined := store.NewBitmap() + for _, bm := range bitmaps { + combined.Or(bm.Bitmap) + } + return combined.ToArray() + } + }) + + follow := func(numericAttrsPerKey map[common.Hash]map[string]uint64) { + iterator := pusher.NewPushIterator() + ops := []events.Operation{} + txIndex := uint64(0) + for key, attrs := range numericAttrsPerKey { + ops = append(ops, events.Operation{ + TxIndex: txIndex, + OpIndex: 0, + Create: &events.OPCreate{ + Key: key, + ContentType: "application/json", + BTL: 1000, + Owner: common.HexToAddress("0x1234567890123456789012345678901234567890"), + Content: []byte(`{}`), + StringAttributes: map[string]string{"kind": "range-test"}, + NumericAttributes: attrs, + }, + }) + txIndex++ + } + batch := events.BlockBatch{Blocks: []events.Block{{Number: 100, Operations: ops}}} + + go func() { + defer GinkgoRecover() + iterator.Push(ctx, batch) + iterator.Close() + }() + Expect(sqlStore.FollowEvents(ctx, arkivevents.BatchIterator(iterator.Iterator()))).To(Succeed()) + } + + It("keeps range queries exact when all values are below 2^63 (all pre-existing data)", func() { + // Values below 2^63 are stored as the very same number as before + // this change, so <, <=, >, >= must behave identically. + follow(map[common.Hash]map[string]uint64{ + common.HexToHash("0x01"): {"priority": 5}, + common.HexToHash("0x02"): {"priority": 100}, + common.HexToHash("0x03"): {"priority": 7000}, + }) + + err := sqlStore.ReadTransaction(ctx, func(q *store.Queries) error { + Expect(runRange(q, "priority", rangeCase{"<", 50})).To(HaveLen(1)) // {5} + Expect(runRange(q, "priority", rangeCase{"<=", 100})).To(HaveLen(2)) // {5, 100} + Expect(runRange(q, "priority", rangeCase{">", 50})).To(HaveLen(2)) // {100, 7000} + Expect(runRange(q, "priority", rangeCase{">=", 7000})).To(HaveLen(1)) // {7000} + Expect(runRange(q, "priority", rangeCase{">", 7000})).To(BeEmpty()) // {} + return nil + }) + Expect(err).NotTo(HaveOccurred()) + }) + + It("documents the known trade-off: values of 2^63+ sort as if negative in range queries", func() { + // These assertions pin the DOCUMENTED Fix-A limitation, they do not + // bless it as correct. A value >= 2^63 is stored as a negative + // int64, so range comparisons place it BELOW small values instead + // of above. Equality and IN lookups on the same value are exact + // (covered by the spec above). + // + // When the order-preserving re-encoding + data migration lands + // (the planned follow-up), the huge entity must switch sides and + // these assertions must be flipped. + follow(map[common.Hash]map[string]uint64{ + common.HexToHash("0x0a"): {"size": 100}, + common.HexToHash("0x0b"): {"size": 1 << 63}, // huge + }) + + err := sqlStore.ReadTransaction(ctx, func(q *store.Queries) error { + // Numerically, size > 50 should match BOTH entities. Today the + // huge one is missed because it is stored negative. + Expect(runRange(q, "size", rangeCase{">", 50})).To(HaveLen(1)) + + // Numerically, size < 50 should match NOTHING. Today the huge + // entity is wrongly included. + Expect(runRange(q, "size", rangeCase{"<", 50})).To(HaveLen(1)) + return nil + }) + Expect(err).NotTo(HaveOccurred()) + }) + }) }) From fb0836ef28bd6e7e2d81a32e3ae75fbd08d8c264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20=C5=9Awi=C4=99s?= <10480513+pswies@users.noreply.github.com> Date: Fri, 3 Jul 2026 15:50:41 +0200 Subject: [PATCH 3/3] Use order-preserving encoding so range queries are correct for all values Addresses the review concern that values of 2^63+ stored as plain int64 bit patterns sort as negative and give wrong range results. Store value XOR 2^63 (as int64) instead: the mapping is one-to-one and strictly increasing over the whole uint64 range, so SQL's signed ordering now equals numeric ordering everywhere - range queries are correct for every value, not just those below 2^63. Migration 000002 re-encodes existing rows once on startup via a table rebuild (an in-place UPDATE could transiently collide under the (name, value) primary key). The shift is split into two in-range steps because the literal 9223372036854775808 does not fit in int64 and SQLite would read it as floating point, silently storing imprecise REALs - caught by the new migration test. Tests: cross-boundary range ordering (100 / 2^63 / 2^64-1), plus a migration test that builds a version-1 database with raw values and verifies queries still answer correctly after upgrade. Co-Authored-By: Claude Fable 5 --- sqlitestore_test.go | 106 ++++++++++++++---- store/numeric_value.go | 38 ++++--- ...2_order_preserving_numeric_values.down.sql | 21 ++++ ...002_order_preserving_numeric_values.up.sql | 36 ++++++ store/sqlc.yaml | 10 +- 5 files changed, 167 insertions(+), 44 deletions(-) create mode 100644 store/schema/000002_order_preserving_numeric_values.down.sql create mode 100644 store/schema/000002_order_preserving_numeric_values.up.sql diff --git a/sqlitestore_test.go b/sqlitestore_test.go index c6c3aa6..1807007 100644 --- a/sqlitestore_test.go +++ b/sqlitestore_test.go @@ -2,6 +2,7 @@ package sqlitebitmapstore_test import ( "context" + "database/sql" "errors" "log/slog" "math" @@ -164,7 +165,7 @@ var _ = Describe("SQLiteStore", func() { // Query by numeric attribute: version = 1 version1Bitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ Name: "version", - Value: 1, + Value: store.NumericValueToSQL(1), }) Expect(err).NotTo(HaveOccurred()) Expect(version1Bitmap).NotTo(BeNil()) @@ -180,7 +181,7 @@ var _ = Describe("SQLiteStore", func() { // Query by numeric attribute: version > 1 versionGT1Bitmaps, err := q.EvaluateNumericAttributeValueGreaterThan(ctx, store.EvaluateNumericAttributeValueGreaterThanParams{ Name: "version", - Value: 1, + Value: store.NumericValueToSQL(1), }) Expect(err).NotTo(HaveOccurred()) Expect(versionGT1Bitmaps).To(HaveLen(1)) @@ -202,7 +203,7 @@ var _ = Describe("SQLiteStore", func() { // Query by numeric attribute: priority >= 10 priorityGTE10Bitmaps, err := q.EvaluateNumericAttributeValueGreaterOrEqualThan(ctx, store.EvaluateNumericAttributeValueGreaterOrEqualThanParams{ Name: "priority", - Value: 10, + Value: store.NumericValueToSQL(10), }) Expect(err).NotTo(HaveOccurred()) Expect(priorityGTE10Bitmaps).To(HaveLen(2)) @@ -604,14 +605,14 @@ var _ = Describe("SQLiteStore", func() { // Verify old expiration bitmap is removed oldExpBitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ Name: "$expiration", - Value: 600, + Value: store.NumericValueToSQL(600), }) Expect(err).To(HaveOccurred()) // Verify new expiration bitmap exists newExpBitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ Name: "$expiration", - Value: 1600, + Value: store.NumericValueToSQL(1600), }) Expect(err).NotTo(HaveOccurred()) Expect(newExpBitmap.ToArray()).To(HaveLen(1)) @@ -1282,29 +1283,90 @@ var _ = Describe("SQLiteStore", func() { Expect(err).NotTo(HaveOccurred()) }) - It("documents the known trade-off: values of 2^63+ sort as if negative in range queries", func() { - // These assertions pin the DOCUMENTED Fix-A limitation, they do not - // bless it as correct. A value >= 2^63 is stored as a negative - // int64, so range comparisons place it BELOW small values instead - // of above. Equality and IN lookups on the same value are exact - // (covered by the spec above). - // - // When the order-preserving re-encoding + data migration lands - // (the planned follow-up), the huge entity must switch sides and - // these assertions must be flipped. + It("orders values of 2^63 and above correctly in range queries", func() { + // The stored form is value XOR 2^63 (as int64), which is strictly + // increasing over the whole uint64 range, so SQL's signed + // comparison must agree with numeric comparison even across the + // 2^63 boundary. follow(map[common.Hash]map[string]uint64{ common.HexToHash("0x0a"): {"size": 100}, - common.HexToHash("0x0b"): {"size": 1 << 63}, // huge + common.HexToHash("0x0b"): {"size": 1 << 63}, // just past the boundary + common.HexToHash("0x0c"): {"size": math.MaxUint64}, // largest possible }) err := sqlStore.ReadTransaction(ctx, func(q *store.Queries) error { - // Numerically, size > 50 should match BOTH entities. Today the - // huge one is missed because it is stored negative. - Expect(runRange(q, "size", rangeCase{">", 50})).To(HaveLen(1)) + Expect(runRange(q, "size", rangeCase{">", 50})).To(HaveLen(3)) // all of them + Expect(runRange(q, "size", rangeCase{"<", 50})).To(BeEmpty()) // none + Expect(runRange(q, "size", rangeCase{">", 100})).To(HaveLen(2)) // the two huge ones + Expect(runRange(q, "size", rangeCase{">=", 1 << 63})).To(HaveLen(2)) // both at/above 2^63 + Expect(runRange(q, "size", rangeCase{">", 1 << 63})).To(HaveLen(1)) // only MaxUint64 + Expect(runRange(q, "size", rangeCase{"<", 1 << 63})).To(HaveLen(1)) // only 100 + Expect(runRange(q, "size", rangeCase{">=", math.MaxUint64})).To(HaveLen(1)) // only MaxUint64 + Expect(runRange(q, "size", rangeCase{"<=", math.MaxUint64})).To(HaveLen(3)) // everything + return nil + }) + Expect(err).NotTo(HaveOccurred()) + }) + }) + + Describe("migration 000002 on a database written before the encoding change", func() { + It("re-encodes existing rows so old data keeps answering queries correctly", func() { + // Build a database the way released versions wrote it: schema at + // migration version 1, numeric values stored raw. Opening the + // store must migrate it and both equality and range queries must + // keep working on the old rows. + dbPath := filepath.Join(tmpDir, "legacy.db") + + raw, err := sql.Open("sqlite3", "file:"+dbPath+"?mode=rwc&_journal_mode=WAL") + Expect(err).NotTo(HaveOccurred()) + + initSQL, err := store.Migrations.ReadFile("schema/000001_init.up.sql") + Expect(err).NotTo(HaveOccurred()) + _, err = raw.Exec(string(initSQL)) + Expect(err).NotTo(HaveOccurred()) + + // golang-migrate's version table, pinned at version 1. + _, err = raw.Exec(`CREATE TABLE IF NOT EXISTS schema_migrations (version uint64, dirty bool); + DELETE FROM schema_migrations; INSERT INTO schema_migrations (version, dirty) VALUES (1, 0);`) + Expect(err).NotTo(HaveOccurred()) - // Numerically, size < 50 should match NOTHING. Today the huge - // entity is wrongly included. - Expect(runRange(q, "size", rangeCase{"<", 50})).To(HaveLen(1)) + // A row exactly as the pre-fix code stored it: raw value 100. + legacyBitmap := store.NewBitmap() + legacyBitmap.Add(7) + blob, err := legacyBitmap.Value() + Expect(err).NotTo(HaveOccurred()) + _, err = raw.Exec(`INSERT INTO numeric_attributes_values_bitmaps (name, value, bitmap) VALUES (?, ?, ?)`, + "legacy", int64(100), blob) + Expect(err).NotTo(HaveOccurred()) + Expect(raw.Close()).To(Succeed()) + + // Opening the store runs migration 000002. + legacyStore, err := sqlitebitmapstore.NewSQLiteStore(logger, dbPath, 2) + Expect(err).NotTo(HaveOccurred()) + defer legacyStore.Close() + + err = legacyStore.ReadTransaction(ctx, func(q *store.Queries) error { + bitmap, err := q.EvaluateNumericAttributeValueEqual(ctx, store.EvaluateNumericAttributeValueEqualParams{ + Name: "legacy", + Value: store.NumericValueToSQL(100), + }) + Expect(err).NotTo(HaveOccurred()) + Expect(bitmap).NotTo(BeNil()) + Expect(bitmap.ToArray()).To(Equal([]uint64{7})) + + greater, err := q.EvaluateNumericAttributeValueGreaterThan(ctx, store.EvaluateNumericAttributeValueGreaterThanParams{ + Name: "legacy", + Value: store.NumericValueToSQL(50), + }) + Expect(err).NotTo(HaveOccurred()) + Expect(greater).To(HaveLen(1)) + + lower, err := q.EvaluateNumericAttributeValueLowerThan(ctx, store.EvaluateNumericAttributeValueLowerThanParams{ + Name: "legacy", + Value: store.NumericValueToSQL(50), + }) + Expect(err).NotTo(HaveOccurred()) + Expect(lower).To(BeEmpty()) return nil }) Expect(err).NotTo(HaveOccurred()) diff --git a/store/numeric_value.go b/store/numeric_value.go index 715a96c..24c376a 100644 --- a/store/numeric_value.go +++ b/store/numeric_value.go @@ -3,29 +3,31 @@ package store // NumericValueToSQL converts a numeric attribute value (uint64) to the // form stored in SQLite. // -// SQLite integers are signed 64-bit, and Go's database/sql layer refuses -// to bind a uint64 with the high bit set (values of 2^63 or more). Before -// this conversion existed, indexing an entity with such a numeric -// attribute failed with: +// Two problems are solved at once: +// +// 1. SQLite integers are signed 64-bit, and Go's database/sql layer +// refuses to bind a uint64 with the high bit set (values of 2^63 or +// more). Before this conversion existed, indexing an entity with such +// a numeric attribute failed with // // sql: converting argument $2 type: uint64 values with high bit set are not supported // -// which permanently stopped the event follower and with it all -// arkiv_query serving. +// which permanently stopped the event follower and with it all +// arkiv_query serving. +// +// 2. Range queries (<, <=, >, >=) compare the stored column with SQL's +// signed ordering, which must therefore agree with unsigned numeric +// ordering across the full uint64 range. // -// We store the value's raw 8 bytes reinterpreted as an int64 (two's -// complement). The mapping is one-to-one, so equality and IN lookups stay -// exact for every possible value, and values below 2^63 — which is all -// realistic data — are stored as the same number as before, keeping -// existing databases valid with no migration. +// Flipping the top bit and reinterpreting the result as int64 solves +// both: the mapping is one-to-one (equality and IN stay exact) and +// strictly increasing — 0 maps to the most negative int64, 2^64-1 to the +// most positive, so signed SQL order equals numeric order everywhere. // -// Known trade-off: values of 2^63 or more become negative in SQL order, -// so range comparisons (<, <=, >, >=) place them below small values -// instead of above. Correcting the ordering for that extreme range needs -// an order-preserving re-encoding plus a data migration of existing rows; -// that is intentionally left as a follow-up. +// Existing databases written before this encoding hold the raw value in +// the column; migration 000002 re-encodes them once on startup. func NumericValueToSQL(v uint64) int64 { - return int64(v) + return int64(v ^ (1 << 63)) } // NumericValuesToSQL converts a slice of numeric attribute values with @@ -33,7 +35,7 @@ func NumericValueToSQL(v uint64) int64 { func NumericValuesToSQL(vs []uint64) []int64 { out := make([]int64, len(vs)) for i, v := range vs { - out[i] = int64(v) + out[i] = NumericValueToSQL(v) } return out } diff --git a/store/schema/000002_order_preserving_numeric_values.down.sql b/store/schema/000002_order_preserving_numeric_values.down.sql new file mode 100644 index 0000000..f61961a --- /dev/null +++ b/store/schema/000002_order_preserving_numeric_values.down.sql @@ -0,0 +1,21 @@ +-- The encoding transform (XOR with 2^63) is its own inverse, so undoing +-- the migration applies the exact same rebuild as the up migration. +-- See the up migration for why the shift is split into two in-range steps. +CREATE TABLE numeric_attributes_values_bitmaps_new ( + name TEXT NOT NULL, + value INTEGER NOT NULL, + bitmap BLOB, + PRIMARY KEY (name, value) +); + +INSERT INTO numeric_attributes_values_bitmaps_new (name, value, bitmap) +SELECT name, + CASE WHEN value >= 0 THEN (value - 9223372036854775807) - 1 + ELSE (value + 9223372036854775807) + 1 + END, + bitmap +FROM numeric_attributes_values_bitmaps; + +DROP TABLE numeric_attributes_values_bitmaps; + +ALTER TABLE numeric_attributes_values_bitmaps_new RENAME TO numeric_attributes_values_bitmaps; diff --git a/store/schema/000002_order_preserving_numeric_values.up.sql b/store/schema/000002_order_preserving_numeric_values.up.sql new file mode 100644 index 0000000..5fc0f61 --- /dev/null +++ b/store/schema/000002_order_preserving_numeric_values.up.sql @@ -0,0 +1,36 @@ +-- Re-encode numeric attribute values so that SQL's signed ordering of the +-- stored column matches unsigned numeric ordering across the full uint64 +-- range: stored = value XOR 2^63, reinterpreted as signed 64-bit. +-- +-- In signed arithmetic the transform is +-- new = old - 2^63 for old >= 0 +-- new = old + 2^63 for old < 0 +-- which is exact for every int64 (no overflow) and is its own inverse. +-- +-- The shift is written as two steps of 9223372036854775807 (int64 max) +-- and 1: the one-step literal 9223372036854775808 does not fit in a +-- 64-bit integer, so SQLite would read it as a floating-point number and +-- silently store imprecise REAL values. Every intermediate below stays +-- inside the int64 range, keeping the arithmetic exact and integer-typed. +-- +-- The table is rebuilt rather than updated in place: the transform swaps +-- the two halves of the number line, so an in-place UPDATE could collide +-- with a not-yet-updated row under the (name, value) primary key. +CREATE TABLE numeric_attributes_values_bitmaps_new ( + name TEXT NOT NULL, + value INTEGER NOT NULL, + bitmap BLOB, + PRIMARY KEY (name, value) +); + +INSERT INTO numeric_attributes_values_bitmaps_new (name, value, bitmap) +SELECT name, + CASE WHEN value >= 0 THEN (value - 9223372036854775807) - 1 + ELSE (value + 9223372036854775807) + 1 + END, + bitmap +FROM numeric_attributes_values_bitmaps; + +DROP TABLE numeric_attributes_values_bitmaps; + +ALTER TABLE numeric_attributes_values_bitmaps_new RENAME TO numeric_attributes_values_bitmaps; diff --git a/store/sqlc.yaml b/store/sqlc.yaml index b061ebc..8d78291 100644 --- a/store/sqlc.yaml +++ b/store/sqlc.yaml @@ -24,10 +24,12 @@ sql: go_type: type: "NumericAttributes" pointer: true - # The value column holds uint64 numeric attribute values as their - # int64 bit pattern: SQLite integers are signed 64-bit, and Go's - # database/sql refuses to bind uint64 values >= 2^63. Convert at - # the call sites with NumericValueToSQL / NumericValuesToSQL. + # The value column holds uint64 numeric attribute values in an + # order-preserving signed encoding (value XOR 2^63 as int64): + # SQLite integers are signed 64-bit and Go's database/sql refuses + # to bind uint64 values >= 2^63, and signed SQL ordering must match + # unsigned numeric ordering for range queries. Convert at the call + # sites with NumericValueToSQL / NumericValuesToSQL. - column: "numeric_attributes_values_bitmaps.value" go_type: "int64" - column: "string_attributes_values_bitmaps.bitmap"