From 246cab6f3f80c38c32308655024a3be770913148 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Mon, 15 Jun 2026 10:49:03 +0000 Subject: [PATCH 1/4] feat(logging): add Logger interface and process-global default (2/6) Second block: the backend-agnostic logging API and the swappable default logger. - Logger: pure-virtual sink (ShouldLog/Log/SetLevel/level/Flush/IsNoop), with ShouldLog() as the single authority for runtime filtering and a documented no-reentrancy / thread-safety / noexcept contract. Mirrors MetricsReporter. - LogMessage owns its formatted text (moved in) so a sink may retain records; reserves an owning attributes vector for future structured logging without an ABI break. logger.h is backend-agnostic -- it never includes the build config header nor references the spdlog feature macro, so consumers see one stable API. - Process-global default logger: GetDefaultLogger / SetDefaultLogger / SetDefaultLevel, plus a lock-free thread-local generation cache on the hot path (no lock or refcount churn in steady state; slot mutex only on swap/refresh). NoopLogger is the placeholder default here; CerrLogger and the spdlog backend arrive in the following blocks. Builds the source into both CMake and Meson and installs logger.h alongside log_level.h; logger_test covers the default-logger API, level lowering taking effect immediately, and concurrent swap/read safety. Co-authored-by: Isaac --- src/iceberg/CMakeLists.txt | 1 + src/iceberg/logging/logger.cc | 146 ++++++++++++++++ src/iceberg/logging/logger.h | 213 ++++++++++++++++++++++++ src/iceberg/logging/meson.build | 2 +- src/iceberg/meson.build | 1 + src/iceberg/test/CMakeLists.txt | 2 +- src/iceberg/test/logger_test.cc | 149 +++++++++++++++++ src/iceberg/test/logging_test_helpers.h | 77 +++++++++ src/iceberg/test/meson.build | 2 +- 9 files changed, 590 insertions(+), 3 deletions(-) create mode 100644 src/iceberg/logging/logger.cc create mode 100644 src/iceberg/logging/logger.h create mode 100644 src/iceberg/test/logger_test.cc create mode 100644 src/iceberg/test/logging_test_helpers.h diff --git a/src/iceberg/CMakeLists.txt b/src/iceberg/CMakeLists.txt index 18615c202..93a3a6004 100644 --- a/src/iceberg/CMakeLists.txt +++ b/src/iceberg/CMakeLists.txt @@ -47,6 +47,7 @@ set(ICEBERG_SOURCES inheritable_metadata.cc json_serde.cc location_provider.cc + logging/logger.cc manifest/manifest_adapter.cc manifest/manifest_entry.cc manifest/manifest_filter_manager.cc diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc new file mode 100644 index 000000000..cd864bc42 --- /dev/null +++ b/src/iceberg/logging/logger.cc @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include +#include + +namespace iceberg { + +namespace { + +/// \brief Logger that drops every record. +class NoopLogger final : public Logger { + public: + bool ShouldLog(LogLevel /*level*/) const override { return false; } + void Log(LogMessage&& /*message*/) noexcept override {} + void SetLevel(LogLevel /*level*/) override {} + LogLevel level() const override { return LogLevel::kOff; } + bool IsNoop() const override { return true; } +}; + +/// \brief Construct the process default logger for this build configuration. +/// +/// This block ships only the interface and the no-op logger; the concrete +/// std::cerr and spdlog sinks (and the build-config selection between them) +/// arrive in later blocks, which update this factory. +std::shared_ptr MakeDefaultLogger() { return std::make_shared(); } + +/// \brief The process-global default-logger slot. +struct DefaultSlot { + std::mutex mtx; + std::shared_ptr logger; + // Seeded to 1 so a fresh thread (tls_gen == 0) always refreshes on first use. + std::atomic gen{1}; + + DefaultSlot() : logger(MakeDefaultLogger()) {} +}; + +/// \brief Immortal (leaked, hence reachable -> LSan-clean) accessor for the slot. +DefaultSlot& Slot() { + static auto* slot = new DefaultSlot(); + return *slot; +} + +} // namespace + +std::shared_ptr Logger::Noop() { + // Intentionally leaked: reachable via the function-local static (LSan-clean) + // and never destroyed, so logging during static teardown stays safe. + static auto* instance = new std::shared_ptr(std::make_shared()); + return *instance; +} + +std::shared_ptr GetDefaultLogger() { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + return slot.logger; +} + +void SetDefaultLogger(std::shared_ptr logger) { + if (!logger) { + logger = Logger::Noop(); + } + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger = std::move(logger); + // Publish the swap; the mutex provides the happens-before, gen is a detector. + slot.gen.fetch_add(1, std::memory_order_relaxed); +} + +void SetDefaultLevel(LogLevel level) { + DefaultSlot& slot = Slot(); + std::lock_guard lock(slot.mtx); + slot.logger->SetLevel(level); +} + +namespace detail { + +const std::shared_ptr& CurrentLogger() noexcept { + static thread_local std::shared_ptr tls; + static thread_local uint64_t tls_gen = 0; + // Sentinel whose destructor marks the cache dead at thread exit. It is + // declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once + // dead, a log from any later-destroyed thread_local destructor must not touch + // the (about-to-be / already) destroyed tls slot. + static thread_local struct AliveFlag { + bool value = true; + ~AliveFlag() { value = false; } + } alive; + if (!alive.value) { + // Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger + // (leaked, never destroyed) so logging during teardown stays safe. + static const std::shared_ptr kFallback = Logger::Noop(); + return kFallback; + } + DefaultSlot& slot = Slot(); + uint64_t current = slot.gen.load(std::memory_order_relaxed); + if (current != tls_gen) { + std::lock_guard lock(slot.mtx); + tls = slot.logger; + tls_gen = current; + } + return tls; +} + +void Emit(Logger& logger, LogLevel level, const std::source_location& location, + std::string&& message) { + logger.Log(LogMessage{.level = level, + .message = std::move(message), + .location = location, + .attributes = {}}); +} + +void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept { + // Fixed short literal (<= 15 bytes, fits SSO on libstdc++/libc++/MSVC -> no heap + // allocation), no std::format, no retry. Cannot throw or recurse. + logger.Log(LogMessage{.level = level, + .message = std::string(""), + .location = location, + .attributes = {}}); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h new file mode 100644 index 000000000..58f5691af --- /dev/null +++ b/src/iceberg/logging/logger.h @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +/// \file iceberg/logging/logger.h +/// \brief Pluggable logging interface, the process-global default logger, and +/// the logging macros. +/// +/// This header is backend-agnostic: it never includes the build-generated +/// backend configuration header and never references the spdlog feature macro, +/// so consumers see one stable API regardless of how the backend was configured. + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iceberg/iceberg_export.h" +#include "iceberg/logging/log_level.h" +#include "iceberg/result.h" + +namespace iceberg { + +/// \brief A structured key/value attribute attached to a log record. +/// +/// Both key and value are owned so a sink may retain the record safely. +/// Unused in v1; reserved so structured logging can be added without an ABI +/// break to LogMessage. +struct LogAttribute { + std::string key; + std::string value; +}; + +/// \brief A single log record handed to a Logger. +/// +/// The formatted message is owned (moved in by the logging macros), so a sink +/// may safely retain the record beyond the Log() call. The member set must not +/// depend on the build's logging backend (the spdlog backend never appears here). +struct LogMessage { + LogLevel level = LogLevel::kOff; + std::string message; + std::source_location location = std::source_location::current(); + std::vector attributes; +}; + +/// \brief Well-known Logger::Initialize() property keys. +/// +/// `level` is honored by the base Logger::Initialize (parsed via +/// LogLevelFromString). `pattern` is honored by the formatting sinks +/// (CerrLogger, SpdLogger). +inline constexpr std::string_view kLevelProperty = "level"; +inline constexpr std::string_view kPatternProperty = "pattern"; + +/// \brief Pluggable logging sink. +/// +/// ShouldLog() is the single authority for runtime filtering -- the macros call +/// it on every (compile-time-enabled) statement, so level changes by any path +/// take effect immediately. Implementations must be thread-safe and must not +/// throw. They must also obey: +/// - No reentrancy: Log()/Flush() must not call the logging macros or +/// GetDefaultLogger() (UB -- deadlock with mutex-based sinks). +/// - level() is an accessor consistent with ShouldLog (used by SetDefaultLevel +/// and introspection); ShouldLog may implement finer logic than a level compare. +class ICEBERG_EXPORT Logger { + public: + virtual ~Logger() = default; + + /// \brief Property-based setup, called by Loggers::Load() before first use. + /// + /// The base implementation applies the "level" property (parsed via + /// LogLevelFromString); an unrecognized value is an InvalidArgument error. + /// Formatting sinks override this to also apply "pattern" and then delegate + /// to this base for "level". + virtual Status Initialize( + const std::unordered_map& properties) { + if (auto it = properties.find(std::string(kLevelProperty)); it != properties.end()) { + auto parsed = LogLevelFromString(it->second); + if (!parsed) return std::unexpected(parsed.error()); + SetLevel(*parsed); + } + return {}; + } + + /// \brief Cheap check whether a record at \p level would be emitted. + virtual bool ShouldLog(LogLevel level) const = 0; + + /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. + virtual void Log(LogMessage&& message) noexcept = 0; + + /// \brief Set the minimum level this logger emits. + virtual void SetLevel(LogLevel level) = 0; + + /// \brief Return the minimum level this logger emits. + virtual LogLevel level() const = 0; + + /// \brief Flush any buffered output. Must not throw; best-effort on the fatal path. + virtual void Flush() noexcept {} + + /// \brief Return true if this logger is a no-op. + virtual bool IsNoop() const { return false; } + + /// \brief Return a shared, immortal no-op logger singleton. + static std::shared_ptr Noop(); +}; + +/// \brief Return the process-global default logger (never null). +/// +/// Off the hot path -- acquires the slot lock and returns an owning copy. The +/// logging macros use the cheaper internal hot-path accessor instead. +ICEBERG_EXPORT std::shared_ptr GetDefaultLogger(); + +/// \brief Install a new process-global default logger. +/// +/// A null argument installs the no-op logger. Thread-safe; intended for +/// occasional (configuration-time) use rather than the hot path. +ICEBERG_EXPORT void SetDefaultLogger(std::shared_ptr logger); + +/// \brief Set the minimum level of the current default logger. +/// +/// Convenience for `GetDefaultLogger()->SetLevel(level)`. Filtering is always +/// decided by the logger's own ShouldLog(), so changing a logger's level by any +/// means (this, SetLevel on a held handle, or Initialize) takes effect immediately. +ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); + +// --------------------------------------------------------------------------- +// Using the API directly (the LOG_* macros that wrap this are added later in +// the stack). Example: a custom sink, installed as the process default. +// +// class MySink : public Logger { +// public: +// bool ShouldLog(LogLevel level) const override { return level >= level_; } +// void Log(LogMessage&& m) noexcept override { write_line(m.message); } +// void SetLevel(LogLevel level) override { level_ = level; } +// LogLevel level() const override { return level_; } +// private: +// std::atomic level_{LogLevel::kInfo}; +// }; +// +// SetDefaultLogger(std::make_shared()); // install process-wide +// SetDefaultLevel(LogLevel::kDebug); // adjust the threshold +// +// auto logger = GetDefaultLogger(); // borrow the current default +// if (logger->ShouldLog(LogLevel::kInfo)) { +// logger->Log(LogMessage{.level = LogLevel::kInfo, .message = "scan ready"}); +// } +// +// // Or configure from catalog-style properties (applies the "level" key): +// auto sink = std::make_shared(); +// auto status = sink->Initialize({{std::string(kLevelProperty), "warn"}}); // -> kWarn +// --------------------------------------------------------------------------- + +namespace detail { + +/// \brief Hot-path accessor for the default logger. +/// +/// Returns a reference to a thread-local cached shared_ptr that is refreshed +/// only when the default logger has changed (no lock / no refcount churn in +/// steady state). The reference is valid for the duration of the calling +/// statement. +ICEBERG_EXPORT const std::shared_ptr& CurrentLogger() noexcept; + +/// \brief Build a LogMessage from the already-formatted text and dispatch it. +/// +/// Declared ICEBERG_EXPORT because the logging macros expand into this call in +/// consumer translation units. +ICEBERG_EXPORT void Emit(Logger& logger, LogLevel level, + const std::source_location& location, std::string&& message); + +/// \brief Emit a fixed fallback record when formatting threw. +/// +/// noexcept, allocation-light (small/SSO literal), performs no std::format, and +/// does not recurse -- so the macro's "logging never throws" guarantee holds +/// even when a format argument throws. +ICEBERG_EXPORT void EmitFormatError(Logger& logger, LogLevel level, + const std::source_location& location) noexcept; + +/// \brief Runtime (non-literal) format-string helper. +/// +/// std::format requires a compile-time format string; this routes a runtime +/// string through std::vformat. Args are bound as named lvalues and the +/// arg-store is held in a named variable so it outlives the vformat call +/// (C++23 make_format_args rejects rvalues -- P2905 / LWG3631). +template +std::string VFormat(std::string_view fmt, Args&&... args) { + auto store = std::make_format_args(args...); + return std::vformat(fmt, store); +} + +} // namespace detail + +} // namespace iceberg diff --git a/src/iceberg/logging/meson.build b/src/iceberg/logging/meson.build index 3c286a196..3f7af4fec 100644 --- a/src/iceberg/logging/meson.build +++ b/src/iceberg/logging/meson.build @@ -15,4 +15,4 @@ # specific language governing permissions and limitations # under the License. -install_headers(['log_level.h'], subdir: 'iceberg/logging') +install_headers(['log_level.h', 'logger.h'], subdir: 'iceberg/logging') diff --git a/src/iceberg/meson.build b/src/iceberg/meson.build index 7a6ba4f8b..a0a0be610 100644 --- a/src/iceberg/meson.build +++ b/src/iceberg/meson.build @@ -72,6 +72,7 @@ iceberg_sources = files( 'inspect/snapshots_table.cc', 'json_serde.cc', 'location_provider.cc', + 'logging/logger.cc', 'manifest/manifest_adapter.cc', 'manifest/manifest_entry.cc', 'manifest/manifest_filter_manager.cc', diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt index 0a7db5ed2..16575c161 100644 --- a/src/iceberg/test/CMakeLists.txt +++ b/src/iceberg/test/CMakeLists.txt @@ -102,7 +102,7 @@ add_iceberg_test(table_test table_test.cc table_update_test.cc) -add_iceberg_test(logging_test SOURCES log_level_test.cc) +add_iceberg_test(logging_test SOURCES log_level_test.cc logger_test.cc) add_iceberg_test(expression_test SOURCES diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc new file mode 100644 index 000000000..ed1c88aa5 --- /dev/null +++ b/src/iceberg/test/logger_test.cc @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "iceberg/logging/logger.h" + +#include +#include +#include +#include + +#include + +#include "iceberg/logging/log_level.h" +#include "iceberg/test/logging_test_helpers.h" + +namespace iceberg { + +TEST(LoggerTest, NoopIsSharedImmortalAndSilent) { + auto noop = Logger::Noop(); + ASSERT_NE(noop, nullptr); + EXPECT_TRUE(noop->IsNoop()); + EXPECT_FALSE(noop->ShouldLog(LogLevel::kFatal)); + EXPECT_EQ(noop->level(), LogLevel::kOff); + // Same singleton instance every call. + EXPECT_EQ(noop.get(), Logger::Noop().get()); +} + +TEST(LoggerTest, DefaultLoggerIsNeverNull) { EXPECT_NE(GetDefaultLogger(), nullptr); } + +TEST(LoggerTest, SetAndGetDefaultLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + EXPECT_EQ(GetDefaultLogger().get(), capturing.get()); + EXPECT_EQ(detail::CurrentLogger().get(), capturing.get()); +} + +TEST(LoggerTest, SetNullFallsBackToNoop) { + ScopedDefaultLogger guard(std::make_shared()); + SetDefaultLogger(nullptr); + EXPECT_TRUE(GetDefaultLogger()->IsNoop()); +} + +TEST(LoggerTest, CurrentLoggerTracksSwaps) { + auto first = std::make_shared(); + auto second = std::make_shared(); + ScopedDefaultLogger guard(first); + EXPECT_EQ(detail::CurrentLogger().get(), first.get()); + SetDefaultLogger(second); + // Generation bump must invalidate the thread-local cache. + EXPECT_EQ(detail::CurrentLogger().get(), second.get()); +} + +TEST(LoggerTest, SetDefaultLevelUpdatesLogger) { + auto capturing = std::make_shared(); + ScopedDefaultLogger guard(capturing); + SetDefaultLevel(LogLevel::kError); + EXPECT_EQ(capturing->level(), LogLevel::kError); +} + +// Filtering is decided by the logger's own ShouldLog (no separate cached gate), +// so lowering a logger's level out-of-band (not via SetDefaultLevel) takes effect +// immediately -- this is the regression guard for the dropped g_effective_level gate. +TEST(LoggerTest, OutOfBandLevelLoweringTakesEffect) { + auto capturing = std::make_shared(); + capturing->SetLevel(LogLevel::kError); + ScopedDefaultLogger guard(capturing); + EXPECT_FALSE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); + capturing->SetLevel(LogLevel::kTrace); // lowered directly on the handle + EXPECT_TRUE(detail::CurrentLogger()->ShouldLog(LogLevel::kInfo)); +} + +TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { + // Stress CurrentLogger()/GetDefaultLogger() against SetDefaultLogger() swaps. + // Run under TSan in CI; here it asserts no crash and a valid logger throughout. + auto a = std::make_shared(); + auto b = std::make_shared(); + ScopedDefaultLogger guard(a); + std::atomic stop{false}; + std::atomic saw_null{false}; + std::vector readers; + for (int i = 0; i < 6; ++i) { + readers.emplace_back([&stop, &saw_null] { + // ASSERT_* doesn't propagate from non-main threads; record via a flag. + while (!stop.load(std::memory_order_relaxed)) { + const auto& l = detail::CurrentLogger(); + if (!l) saw_null.store(true, std::memory_order_relaxed); + (void)l->ShouldLog(LogLevel::kError); + (void)GetDefaultLogger(); + } + }); + } + for (int i = 0; i < 2000; ++i) SetDefaultLogger((i & 1) ? a : b); + stop.store(true, std::memory_order_relaxed); + for (auto& t : readers) t.join(); + EXPECT_FALSE(saw_null.load()); // CurrentLogger() is never null across swaps +} + +TEST(LoggerTest, InitializeAppliesLevelProperty) { + CapturingLogger logger; + auto status = logger.Initialize({{std::string(kLevelProperty), std::string("error")}}); + ASSERT_TRUE(status.has_value()); + EXPECT_EQ(logger.level(), LogLevel::kError); +} + +TEST(LoggerTest, InitializeRejectsInvalidLevel) { + CapturingLogger logger; + auto status = + logger.Initialize({{std::string(kLevelProperty), std::string("not-a-level")}}); + ASSERT_FALSE(status.has_value()); + EXPECT_EQ(status.error().kind, ErrorKind::kInvalidArgument); +} + +// Logging during thread teardown (from a thread_local destructor) must not crash: +// CurrentLogger() serves the live cache or falls back to an immortal logger. Run +// under ASan in CI for full signal. +TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { + std::thread([] { + struct Probe { + ~Probe() { + const auto& logger = detail::CurrentLogger(); + if (logger) { + detail::Emit(*logger, LogLevel::kInfo, std::source_location::current(), + "from thread_local dtor"); + } + } + }; + static thread_local Probe probe; + (void)probe; + }).join(); + SUCCEED(); +} + +} // namespace iceberg diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h new file mode 100644 index 000000000..00a57cf82 --- /dev/null +++ b/src/iceberg/test/logging_test_helpers.h @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "iceberg/logging/logger.h" + +namespace iceberg { + +/// \brief Test sink that records every emitted LogMessage under a mutex. +class CapturingLogger : public Logger { + public: + bool ShouldLog(LogLevel level) const override { return level >= level_; } + + void Log(LogMessage&& message) noexcept override { + std::lock_guard lock(mutex_); + records_.push_back(std::move(message)); + } + + void SetLevel(LogLevel level) override { level_ = level; } + LogLevel level() const override { return level_; } + + std::vector records() const { + std::lock_guard lock(mutex_); + return records_; + } + + std::size_t count() const { + std::lock_guard lock(mutex_); + return records_.size(); + } + + private: + mutable std::mutex mutex_; + LogLevel level_ = LogLevel::kTrace; + std::vector records_; +}; + +/// \brief RAII guard that restores the process default logger on scope exit, so +/// tests that swap the global default don't leak state into other tests. +class ScopedDefaultLogger { + public: + explicit ScopedDefaultLogger(std::shared_ptr logger) + : previous_(GetDefaultLogger()) { + SetDefaultLogger(std::move(logger)); + } + ~ScopedDefaultLogger() { SetDefaultLogger(previous_); } + + ScopedDefaultLogger(const ScopedDefaultLogger&) = delete; + ScopedDefaultLogger& operator=(const ScopedDefaultLogger&) = delete; + + private: + std::shared_ptr previous_; +}; + +} // namespace iceberg diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build index a76a15553..bd19c20f2 100644 --- a/src/iceberg/test/meson.build +++ b/src/iceberg/test/meson.build @@ -61,7 +61,7 @@ iceberg_tests = { 'table_update_test.cc', ), }, - 'logging_test': {'sources': files('log_level_test.cc')}, + 'logging_test': {'sources': files('log_level_test.cc', 'logger_test.cc')}, 'expression_test': { 'sources': files( 'aggregate_test.cc', From 796ff0d4cf29c341537f908a333f1169dc30ca19 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Fri, 19 Jun 2026 17:35:09 +0000 Subject: [PATCH 2/4] fix(logging): address review feedback on the Logger interface Resolves @wgtmac's review comments on PR #723: - Mark ShouldLog()/SetLevel()/level() noexcept so a custom logger cannot throw from the hot filter path; propagated to NoopLogger and CapturingLogger. - Make CurrentLogger() safe during thread teardown without UB: the per-thread cache is reached via a trivially-destructible thread_local pointer to a never-freed heap slot (kept LSan-reachable by a registry), so logging from a thread_local destructor in any order is safe. Replaces the AliveFlag guard. - Add ICEBERG_EXPORT to LogAttribute/LogMessage, matching Error/ScanReport. - Trim the logger.h \file brief (the macros it referenced arrive in PR #725). - Tests: use the IsError matcher and std::ignore; strengthen the teardown test to log from a thread_local destroyed after the cache. Co-authored-by: Isaac --- src/iceberg/logging/logger.cc | 67 ++++++++++++++++--------- src/iceberg/logging/logger.h | 19 ++++--- src/iceberg/test/logger_test.cc | 20 +++++--- src/iceberg/test/logging_test_helpers.h | 6 +-- 4 files changed, 69 insertions(+), 43 deletions(-) diff --git a/src/iceberg/logging/logger.cc b/src/iceberg/logging/logger.cc index cd864bc42..9c8c2798e 100644 --- a/src/iceberg/logging/logger.cc +++ b/src/iceberg/logging/logger.cc @@ -24,6 +24,7 @@ #include #include #include +#include namespace iceberg { @@ -32,10 +33,10 @@ namespace { /// \brief Logger that drops every record. class NoopLogger final : public Logger { public: - bool ShouldLog(LogLevel /*level*/) const override { return false; } + bool ShouldLog(LogLevel /*level*/) const noexcept override { return false; } void Log(LogMessage&& /*message*/) noexcept override {} - void SetLevel(LogLevel /*level*/) override {} - LogLevel level() const override { return LogLevel::kOff; } + void SetLevel(LogLevel /*level*/) noexcept override {} + LogLevel level() const noexcept override { return LogLevel::kOff; } bool IsNoop() const override { return true; } }; @@ -62,6 +63,34 @@ DefaultSlot& Slot() { return *slot; } +/// \brief A thread's cached view of the default logger and the generation it was +/// cached at. Heap-allocated and intentionally never freed (see CurrentLogger). +struct ThreadCache { + std::shared_ptr logger; + uint64_t gen = 0; // 0 != Slot().gen (seeded to 1) -> first use always refreshes +}; + +/// \brief Keeps every leaked ThreadCache reachable so LeakSanitizer does not flag +/// the intentional per-thread leak. Immortal (leaked) itself, like the slot. +struct CacheRegistry { + std::mutex mtx; + std::vector caches; +}; +CacheRegistry& Registry() { + static auto* registry = new CacheRegistry(); + return *registry; +} + +/// \brief Allocate a per-thread cache that is never destroyed and register it so +/// it stays reachable for LSan. +ThreadCache* NewThreadCache() { + auto* cache = new ThreadCache(); // intentionally leaked; see CurrentLogger + CacheRegistry& registry = Registry(); + std::lock_guard lock(registry.mtx); + registry.caches.push_back(cache); + return cache; +} + } // namespace std::shared_ptr Logger::Noop() { @@ -97,30 +126,22 @@ void SetDefaultLevel(LogLevel level) { namespace detail { const std::shared_ptr& CurrentLogger() noexcept { - static thread_local std::shared_ptr tls; - static thread_local uint64_t tls_gen = 0; - // Sentinel whose destructor marks the cache dead at thread exit. It is - // declared after tls/tls_gen, so it is destroyed FIRST (reverse order); once - // dead, a log from any later-destroyed thread_local destructor must not touch - // the (about-to-be / already) destroyed tls slot. - static thread_local struct AliveFlag { - bool value = true; - ~AliveFlag() { value = false; } - } alive; - if (!alive.value) { - // Thread teardown: the TLS cache is unsafe. Fall back to an immortal logger - // (leaked, never destroyed) so logging during teardown stays safe. - static const std::shared_ptr kFallback = Logger::Noop(); - return kFallback; - } + // The per-thread cache is reached through a trivially-destructible thread_local + // raw pointer to a heap object that is intentionally NEVER destroyed. This makes + // the accessor safe to call during thread teardown -- including from another + // thread_local's destructor, in any destruction order: the pointer has no + // destructor (so it stays readable throughout teardown), and the cached + // shared_ptr outlives every thread_local, so there is no use-after-destruction. + // The intentional per-thread leak is kept LSan-reachable via the registry. + static thread_local ThreadCache* cache = NewThreadCache(); DefaultSlot& slot = Slot(); uint64_t current = slot.gen.load(std::memory_order_relaxed); - if (current != tls_gen) { + if (current != cache->gen) { std::lock_guard lock(slot.mtx); - tls = slot.logger; - tls_gen = current; + cache->logger = slot.logger; + cache->gen = current; } - return tls; + return cache->logger; } void Emit(Logger& logger, LogLevel level, const std::source_location& location, diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 58f5691af..693bbc52a 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -20,8 +20,7 @@ #pragma once /// \file iceberg/logging/logger.h -/// \brief Pluggable logging interface, the process-global default logger, and -/// the logging macros. +/// \brief Pluggable logging interface and the process-global default logger. /// /// This header is backend-agnostic: it never includes the build-generated /// backend configuration header and never references the spdlog feature macro, @@ -48,7 +47,7 @@ namespace iceberg { /// Both key and value are owned so a sink may retain the record safely. /// Unused in v1; reserved so structured logging can be added without an ABI /// break to LogMessage. -struct LogAttribute { +struct ICEBERG_EXPORT LogAttribute { std::string key; std::string value; }; @@ -58,7 +57,7 @@ struct LogAttribute { /// The formatted message is owned (moved in by the logging macros), so a sink /// may safely retain the record beyond the Log() call. The member set must not /// depend on the build's logging backend (the spdlog backend never appears here). -struct LogMessage { +struct ICEBERG_EXPORT LogMessage { LogLevel level = LogLevel::kOff; std::string message; std::source_location location = std::source_location::current(); @@ -104,16 +103,16 @@ class ICEBERG_EXPORT Logger { } /// \brief Cheap check whether a record at \p level would be emitted. - virtual bool ShouldLog(LogLevel level) const = 0; + virtual bool ShouldLog(LogLevel level) const noexcept = 0; /// \brief Emit one (already-formatted) record, taking ownership. Must not throw. virtual void Log(LogMessage&& message) noexcept = 0; /// \brief Set the minimum level this logger emits. - virtual void SetLevel(LogLevel level) = 0; + virtual void SetLevel(LogLevel level) noexcept = 0; /// \brief Return the minimum level this logger emits. - virtual LogLevel level() const = 0; + virtual LogLevel level() const noexcept = 0; /// \brief Flush any buffered output. Must not throw; best-effort on the fatal path. virtual void Flush() noexcept {} @@ -150,10 +149,10 @@ ICEBERG_EXPORT void SetDefaultLevel(LogLevel level); // // class MySink : public Logger { // public: -// bool ShouldLog(LogLevel level) const override { return level >= level_; } +// bool ShouldLog(LogLevel level) const noexcept override { return level >= level_; } // void Log(LogMessage&& m) noexcept override { write_line(m.message); } -// void SetLevel(LogLevel level) override { level_ = level; } -// LogLevel level() const override { return level_; } +// void SetLevel(LogLevel level) noexcept override { level_ = level; } +// LogLevel level() const noexcept override { return level_; } // private: // std::atomic level_{LogLevel::kInfo}; // }; diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index ed1c88aa5..0c53714d1 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include "iceberg/logging/log_level.h" #include "iceberg/test/logging_test_helpers.h" +#include "iceberg/test/matchers.h" namespace iceberg { @@ -100,8 +102,8 @@ TEST(LoggerTest, ConcurrentSwapAndReadIsSafe) { while (!stop.load(std::memory_order_relaxed)) { const auto& l = detail::CurrentLogger(); if (!l) saw_null.store(true, std::memory_order_relaxed); - (void)l->ShouldLog(LogLevel::kError); - (void)GetDefaultLogger(); + std::ignore = l->ShouldLog(LogLevel::kError); + std::ignore = GetDefaultLogger(); } }); } @@ -123,12 +125,15 @@ TEST(LoggerTest, InitializeRejectsInvalidLevel) { auto status = logger.Initialize({{std::string(kLevelProperty), std::string("not-a-level")}}); ASSERT_FALSE(status.has_value()); - EXPECT_EQ(status.error().kind, ErrorKind::kInvalidArgument); + EXPECT_THAT(status, IsError(ErrorKind::kInvalidArgument)); } -// Logging during thread teardown (from a thread_local destructor) must not crash: -// CurrentLogger() serves the live cache or falls back to an immortal logger. Run -// under ASan in CI for full signal. +// Logging during thread teardown (from a thread_local destructor) must not crash. +// CurrentLogger()'s per-thread cache is reached via a trivially-destructible +// pointer to a never-freed heap object, so it is safe even when the logging +// statement runs from a thread_local that is destroyed AFTER the cache -- the +// hard case. Probe is constructed before CurrentLogger() is first touched on the +// thread, so it is destroyed last. Run under ASan in CI for full signal. TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { std::thread([] { struct Probe { @@ -141,7 +146,8 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { } }; static thread_local Probe probe; - (void)probe; + (void)probe; // construct Probe first ... + (void)detail::CurrentLogger(); // ... then the logger cache (destroyed first) }).join(); SUCCEED(); } diff --git a/src/iceberg/test/logging_test_helpers.h b/src/iceberg/test/logging_test_helpers.h index 00a57cf82..8859523c5 100644 --- a/src/iceberg/test/logging_test_helpers.h +++ b/src/iceberg/test/logging_test_helpers.h @@ -31,15 +31,15 @@ namespace iceberg { /// \brief Test sink that records every emitted LogMessage under a mutex. class CapturingLogger : public Logger { public: - bool ShouldLog(LogLevel level) const override { return level >= level_; } + bool ShouldLog(LogLevel level) const noexcept override { return level >= level_; } void Log(LogMessage&& message) noexcept override { std::lock_guard lock(mutex_); records_.push_back(std::move(message)); } - void SetLevel(LogLevel level) override { level_ = level; } - LogLevel level() const override { return level_; } + void SetLevel(LogLevel level) noexcept override { level_ = level; } + LogLevel level() const noexcept override { return level_; } std::vector records() const { std::lock_guard lock(mutex_); From 4943849be68d34b91ee2d49f078c589b6ba3ab26 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 00:08:59 +0000 Subject: [PATCH 3/4] feat(logging): add std::format-style Log() functions Addresses @wgtmac's review comment on PR #723 asking for a std::format-style API that builds the string only after the level check. - A single overloaded iceberg::Log(): Log(level, "fmt {}", args...) -> process-default logger Log(logger, level, "fmt {}", args...) -> explicit logger disambiguated by the first argument. - Formats only after ShouldLog() (no wasted work when disabled) and never throws -- a formatting failure routes to EmitFormatError, matching the macros. - The call-site source_location is captured via a small consteval FmtWithLoc wrapper (the std::print technique), preserving compile-time format-string checking without a macro. The sink interface (Logger::Log(LogMessage&&)) intentionally stays pre-formatted: passing std::format_args across the virtual boundary is lifetime-unsafe and would break safe record retention. The format-style win is delivered at the call layer instead. Co-authored-by: Isaac --- src/iceberg/logging/logger.h | 57 +++++++++++++++++++++++++++++++++ src/iceberg/test/logger_test.cc | 29 +++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index 693bbc52a..eea0a6cd0 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -26,12 +26,14 @@ /// backend configuration header and never references the spdlog feature macro, /// so consumers see one stable API regardless of how the backend was configured. +#include #include #include #include #include #include #include +#include #include #include #include @@ -207,6 +209,61 @@ std::string VFormat(std::string_view fmt, Args&&... args) { return std::vformat(fmt, store); } +/// \brief A checked format string bundled with the caller's source_location. +/// +/// The consteval constructor preserves std::format's compile-time format-string +/// checking while capturing the call site (the std::print/println technique), +/// so the function-style Log() can record an accurate file:line without a macro. +/// Used as a non-deduced parameter so the trailing args drive deduction. +template +struct FmtWithLoc { + std::format_string fmt; + std::source_location loc; + + template + requires std::convertible_to> + consteval FmtWithLoc( // NOLINT(google-explicit-constructor): mirrors std::format_string + const T& s, std::source_location loc = std::source_location::current()) + : fmt(s), loc(loc) {} +}; + +/// \brief Shared gate -> format -> emit body for the function-style Log() API. +/// +/// Formats only when the logger is enabled for \p level, and never throws (a +/// formatting failure routes to EmitFormatError, matching the macros). +template +void Dispatch(Logger& logger, LogLevel level, const std::source_location& loc, + std::format_string fmt, Args&&... args) noexcept { + if (!logger.ShouldLog(level)) return; + try { + Emit(logger, level, loc, std::format(fmt, std::forward(args)...)); + } catch (...) { + EmitFormatError(logger, level, loc); + } +} + } // namespace detail +/// \brief Log to the process-default logger, std::format style. Formats only if +/// the level is enabled; never throws. +/// +/// Example: `iceberg::Log(LogLevel::kInfo, "loaded {} files", n);` +template +void Log(LogLevel level, detail::FmtWithLoc...> fmt, + Args&&... args) { + const std::shared_ptr& logger = detail::CurrentLogger(); + if (logger) { + detail::Dispatch(*logger, level, fmt.loc, fmt.fmt, std::forward(args)...); + } +} + +/// \brief Log to an explicit logger, std::format style. Formats only if enabled. +/// +/// Example: `iceberg::Log(logger, LogLevel::kWarn, "retry {}", attempt);` +template +void Log(Logger& logger, LogLevel level, + detail::FmtWithLoc...> fmt, Args&&... args) { + detail::Dispatch(logger, level, fmt.loc, fmt.fmt, std::forward(args)...); +} + } // namespace iceberg diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index 0c53714d1..833163819 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -152,4 +152,33 @@ TEST(LoggerTest, LoggingFromThreadLocalDestructorIsSafe) { SUCCEED(); } +// --- Function-style API (non-macro): overloaded Log() --- + +TEST(LoggerTest, LogToExplicitLoggerFormats) { + auto sink = std::make_shared(); + Log(*sink, LogLevel::kInfo, "x={} y={}", 1, 2); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kInfo); + EXPECT_EQ(records[0].message, "x=1 y=2"); + EXPECT_NE(records[0].location.line(), 0u); // call-site location captured +} + +TEST(LoggerTest, LogRespectsLevelAndDoesNotFormatWhenDisabled) { + auto sink = std::make_shared(); + sink->SetLevel(LogLevel::kError); + Log(*sink, LogLevel::kInfo, "dropped {}", 1); + EXPECT_EQ(sink->count(), 0u); +} + +TEST(LoggerTest, LogToDefaultLoggerFormatStyle) { + auto sink = std::make_shared(); + ScopedDefaultLogger guard(sink); + Log(LogLevel::kWarn, "v={}", 7); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kWarn); + EXPECT_EQ(records[0].message, "v=7"); +} + } // namespace iceberg From bd4a25cb67bf3f00c60889ee5416776e79d84669 Mon Sep 17 00:00:00 2001 From: Kam Cheung Ting Date: Sun, 21 Jun 2026 00:14:11 +0000 Subject: [PATCH 4/4] feat(logging): add LogMessage::Builder for structured attributes Addresses @wgtmac's review comment on PR #723 that LogMessage::attributes had no easy fill path. Adds a fluent LogMessage::Builder so engine loggers can attach structured fields (query id, task id, table name, snapshot id, file path, ...): auto record = LogMessage::Builder(LogLevel::kInfo) .Message("scan finished") .Attribute("table", table_name) .Attribute("snapshot_id", std::to_string(id)) .Build(); logger->Log(std::move(record)); - Message() sets the formatted text; Attribute() appends an owned key/value; Location() overrides the source location (defaults to the build site). - Build() moves the accumulated state into a LogMessage. This defines how attributes get populated, so structured logging can grow without an ABI break. Co-authored-by: Isaac --- src/iceberg/logging/logger.h | 60 +++++++++++++++++++++++++++++++-- src/iceberg/test/logger_test.cc | 34 +++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/src/iceberg/logging/logger.h b/src/iceberg/logging/logger.h index eea0a6cd0..d9edc2f34 100644 --- a/src/iceberg/logging/logger.h +++ b/src/iceberg/logging/logger.h @@ -46,9 +46,9 @@ namespace iceberg { /// \brief A structured key/value attribute attached to a log record. /// -/// Both key and value are owned so a sink may retain the record safely. -/// Unused in v1; reserved so structured logging can be added without an ABI -/// break to LogMessage. +/// Both key and value are owned so a sink may retain the record safely. Engine +/// loggers can surface these as discrete fields (query id, task id, table name, +/// snapshot id, file path, ...); see LogMessage::Builder to populate them. struct ICEBERG_EXPORT LogAttribute { std::string key; std::string value; @@ -59,11 +59,65 @@ struct ICEBERG_EXPORT LogAttribute { /// The formatted message is owned (moved in by the logging macros), so a sink /// may safely retain the record beyond the Log() call. The member set must not /// depend on the build's logging backend (the spdlog backend never appears here). +/// Use LogMessage::Builder for a readable way to assemble one, especially with +/// structured attributes. struct ICEBERG_EXPORT LogMessage { LogLevel level = LogLevel::kOff; std::string message; std::source_location location = std::source_location::current(); std::vector attributes; + + class Builder; +}; + +/// \brief Fluent builder for LogMessage, the easy path to attach structured +/// attributes. +/// +/// Example: +/// auto record = LogMessage::Builder(LogLevel::kInfo) +/// .Message("scan finished") +/// .Attribute("table", table_name) +/// .Attribute("snapshot_id", std::to_string(id)) +/// .Build(); +/// logger->Log(std::move(record)); +/// +/// The location defaults to where the Builder is constructed; override it with +/// Location() (e.g. to forward a caller's std::source_location). +class ICEBERG_EXPORT LogMessage::Builder { + public: + explicit Builder(LogLevel level) : level_(level) {} + + /// \brief Set the already-formatted message text. + Builder& Message(std::string message) { + message_ = std::move(message); + return *this; + } + + /// \brief Append a structured key/value attribute. + Builder& Attribute(std::string key, std::string value) { + attributes_.push_back(LogAttribute{.key = std::move(key), .value = std::move(value)}); + return *this; + } + + /// \brief Override the record's source location (defaults to the build site). + Builder& Location(std::source_location location) { + location_ = location; + return *this; + } + + /// \brief Materialize the LogMessage, moving the accumulated state out. + LogMessage Build() { + return LogMessage{.level = level_, + .message = std::move(message_), + .location = location_, + .attributes = std::move(attributes_)}; + } + + private: + LogLevel level_; + std::string message_; + std::source_location location_ = std::source_location::current(); + std::vector attributes_; }; /// \brief Well-known Logger::Initialize() property keys. diff --git a/src/iceberg/test/logger_test.cc b/src/iceberg/test/logger_test.cc index 833163819..c66a7c02e 100644 --- a/src/iceberg/test/logger_test.cc +++ b/src/iceberg/test/logger_test.cc @@ -181,4 +181,38 @@ TEST(LoggerTest, LogToDefaultLoggerFormatStyle) { EXPECT_EQ(records[0].message, "v=7"); } +// --- LogMessage::Builder (structured attributes) --- + +TEST(LoggerTest, BuilderAssemblesMessageAndAttributes) { + auto record = LogMessage::Builder(LogLevel::kInfo) + .Message("scan finished") + .Attribute("table", "db.t") + .Attribute("snapshot_id", "42") + .Build(); + EXPECT_EQ(record.level, LogLevel::kInfo); + EXPECT_EQ(record.message, "scan finished"); + ASSERT_EQ(record.attributes.size(), 2u); + EXPECT_EQ(record.attributes[0].key, "table"); + EXPECT_EQ(record.attributes[0].value, "db.t"); + EXPECT_EQ(record.attributes[1].key, "snapshot_id"); + EXPECT_EQ(record.attributes[1].value, "42"); +} + +TEST(LoggerTest, BuilderDefaultsAndEmitToSink) { + auto sink = std::make_shared(); + sink->Log(LogMessage::Builder(LogLevel::kError).Message("boom").Build()); + auto records = sink->records(); + ASSERT_EQ(records.size(), 1u); + EXPECT_EQ(records[0].level, LogLevel::kError); + EXPECT_EQ(records[0].message, "boom"); + EXPECT_TRUE(records[0].attributes.empty()); + EXPECT_NE(records[0].location.line(), 0u); // location defaulted at build site +} + +TEST(LoggerTest, BuilderLocationOverride) { + auto loc = std::source_location::current(); + auto record = LogMessage::Builder(LogLevel::kDebug).Location(loc).Build(); + EXPECT_EQ(record.location.line(), loc.line()); +} + } // namespace iceberg