From 6f93002aca2a1d63b87dbac2196de17a0b40b4ce Mon Sep 17 00:00:00 2001 From: Junwang Zhao Date: Fri, 19 Jun 2026 20:45:48 +0800 Subject: [PATCH] fix: allow historical sort orders with dropped fields This fix is aligned with: https://github.com/apache/iceberg/pull/16521 --- src/iceberg/json_serde.cc | 44 ++++++++++++++++------- src/iceberg/test/metadata_serde_test.cc | 48 +++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 13 deletions(-) diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 1f2b8f45c..8bad99263 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -273,8 +273,14 @@ Result> SortFieldFromJson(const nlohmann::json& json) null_order); } -Result> SortOrderFromJson( - const nlohmann::json& json, const std::shared_ptr& current_schema) { +namespace { + +struct ParsedSortOrder { + int32_t order_id; + std::vector fields; +}; + +Result ParseSortOrder(const nlohmann::json& json) { ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue(json, kOrderId)); ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue(json, kFields)); @@ -283,19 +289,30 @@ Result> SortOrderFromJson( ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json)); sort_fields.push_back(std::move(*sort_field)); } - return SortOrder::Make(*current_schema, order_id, std::move(sort_fields)); + return ParsedSortOrder{.order_id = order_id, .fields = std::move(sort_fields)}; } -Result> SortOrderFromJson(const nlohmann::json& json) { - ICEBERG_ASSIGN_OR_RAISE(auto order_id, GetJsonValue(json, kOrderId)); - ICEBERG_ASSIGN_OR_RAISE(auto fields, GetJsonValue(json, kFields)); +} // namespace - std::vector sort_fields; - for (const auto& field_json : fields) { - ICEBERG_ASSIGN_OR_RAISE(auto sort_field, SortFieldFromJson(field_json)); - sort_fields.push_back(std::move(*sort_field)); +Result> SortOrderFromJson( + const nlohmann::json& json, const std::shared_ptr& current_schema, + int32_t default_sort_order_id) { + ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json)); + if (parsed.order_id == default_sort_order_id) { + return SortOrder::Make(*current_schema, parsed.order_id, std::move(parsed.fields)); } - return SortOrder::Make(order_id, std::move(sort_fields)); + return SortOrder::Make(parsed.order_id, std::move(parsed.fields)); +} + +Result> SortOrderFromJson( + const nlohmann::json& json, const std::shared_ptr& current_schema) { + ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json)); + return SortOrder::Make(*current_schema, parsed.order_id, std::move(parsed.fields)); +} + +Result> SortOrderFromJson(const nlohmann::json& json) { + ICEBERG_ASSIGN_OR_RAISE(auto parsed, ParseSortOrder(json)); + return SortOrder::Make(parsed.order_id, std::move(parsed.fields)); } nlohmann::json ToJson(const SchemaField& field) { @@ -1107,8 +1124,9 @@ Status ParseSortOrders(const nlohmann::json& json, int8_t format_version, ICEBERG_ASSIGN_OR_RAISE(auto sort_order_array, GetJsonValue(json, kSortOrders)); for (const auto& sort_order_json : sort_order_array) { - ICEBERG_ASSIGN_OR_RAISE(auto sort_order, - SortOrderFromJson(sort_order_json, current_schema)); + ICEBERG_ASSIGN_OR_RAISE( + auto sort_order, + SortOrderFromJson(sort_order_json, current_schema, default_sort_order_id)); sort_orders.push_back(std::move(sort_order)); } } else { diff --git a/src/iceberg/test/metadata_serde_test.cc b/src/iceberg/test/metadata_serde_test.cc index 48f88f2bc..f367aca37 100644 --- a/src/iceberg/test/metadata_serde_test.cc +++ b/src/iceberg/test/metadata_serde_test.cc @@ -486,6 +486,54 @@ TEST(MetadataSerdeTest, DeserializeV2MissingSortOrder) { "sort-orders must exist"); } +TEST(MetadataSerdeTest, DeserializeHistoricalSortOrderWithDroppedField) { + nlohmann::json metadata_json = R"({ + "format-version": 2, + "table-uuid": "test-uuid-1234", + "location": "s3://bucket/test", + "last-sequence-number": 0, + "last-updated-ms": 0, + "last-column-id": 2, + "schemas": [ + { + "type": "struct", + "schema-id": 1, + "fields": [ + {"id": 1, "name": "id", "type": "int", "required": true} + ] + } + ], + "current-schema-id": 1, + "partition-specs": [{"spec-id": 0, "fields": []}], + "default-spec-id": 0, + "last-partition-id": 999, + "sort-orders": [ + {"order-id": 1, "fields": [ + {"transform": "identity", "source-id": 1, "direction": "asc", "null-order": "nulls-first"}, + {"transform": "identity", "source-id": 2, "direction": "asc", "null-order": "nulls-first"} + ]}, + {"order-id": 2, "fields": [ + {"transform": "identity", "source-id": 1, "direction": "asc", "null-order": "nulls-first"} + ]} + ], + "default-sort-order-id": 2, + "properties": {}, + "current-snapshot-id": null, + "refs": {}, + "snapshots": [], + "statistics": [], + "partition-statistics": [], + "snapshot-log": [], + "metadata-log": [] + })"_json; + + auto metadata = TableMetadataFromJson(metadata_json); + ASSERT_THAT(metadata, IsOk()); + ASSERT_EQ(metadata.value()->sort_orders.size(), 2); + EXPECT_EQ(metadata.value()->sort_orders[0]->order_id(), 1); + EXPECT_EQ(metadata.value()->sort_orders[1]->order_id(), 2); +} + TEST(MetadataSerdeTest, EncryptionKeysRoundTrip) { nlohmann::json metadata_json = R"({ "format-version": 2,