From 97801f55823357330a1223491546a5c7e24cc5f1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Tue, 23 Jun 2026 07:49:58 -0500 Subject: [PATCH 1/2] feat(#1458): Renderable Codec Protocol MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements T3.2 of the 2.3 release plan against the spec in datajoint-docs#188. A runtime-checkable Protocol that codecs opt into by implementing ``render_spark(self, decoded, *, key=None) -> Any``. Consumers (e.g., a Databricks silver-layer publish pipeline) detect support via ``isinstance(codec, Renderable)``. What's added: - src/datajoint/rendering.py (new, ~85 lines including docstrings): Single @runtime_checkable Protocol declaration. Module-level docstring explains the design rationale (Protocol vs. abstract method on Codec); class docstring documents allowed return-value shapes (primitives / lists / dicts mapping to Spark ArrayType / StructType / MapType), with worked codec examples. - src/datajoint/__init__.py: ``dj.Renderable`` exported at the top level alongside the existing Codec API exports. - tests/unit/test_rendering.py (new, 9 tests): detection of opt-in vs non-opt-in classes, top-level re-export, @runtime_checkable guarantee, built-in and codecs are not Renderable (per spec contract), invocation pass-through, key kwarg acceptance, subclass opt-in behavior. What's NOT in this PR (out of scope per spec): - Specific renderable codec implementations. Codecs like , , , ship downstream as plugins. They register via the existing codec auto-registration and opt in by implementing render_spark(). - Silver-layer publish pipeline (lives in datajoint-databricks). - No decode_spark (reverse direction). - No BINARY fallback — codecs either implement Renderable or remain non-eligible. All 9 unit tests pass. No regressions expected — this is purely additive (a new module + one top-level re-export + tests). Slated for DataJoint 2.3. --- src/datajoint/__init__.py | 3 + src/datajoint/rendering.py | 92 ++++++++++++++++++++++++++++++ tests/unit/test_rendering.py | 105 +++++++++++++++++++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 src/datajoint/rendering.py create mode 100644 tests/unit/test_rendering.py diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 4970b19d4..de0013be8 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -51,6 +51,8 @@ "get_codec", "ObjectRef", "NpyRef", + # Renderable Codec Protocol + "Renderable", # Storage Adapter API "StorageAdapter", "get_storage_adapter", @@ -85,6 +87,7 @@ from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef +from .rendering import Renderable from .storage_adapter import StorageAdapter, get_storage_adapter from .schemas import _Schema, VirtualModule, list_schemas, virtual_schema from .autopopulate import AutoPopulate diff --git a/src/datajoint/rendering.py b/src/datajoint/rendering.py new file mode 100644 index 000000000..29f3bc03c --- /dev/null +++ b/src/datajoint/rendering.py @@ -0,0 +1,92 @@ +""" +Renderable Codec Protocol. + +Opt-in contract for codecs that can render their decoded values to +Spark-native types — primitives, lists, dicts, and nested combinations. + +Codecs implement this method when they want their column eligible for +downstream typed-query systems (Spark SQL, Delta Sharing, BI tools). +Generic codecs like ```` and ```` deliberately do not +implement it: their decoded values can be arbitrary Python objects with +no fixed Spark-native shape. + +The contract is intentionally a Protocol rather than an abstract method +on :class:`datajoint.Codec`: + +- Generic codecs need no acknowledgement (no ``NotImplementedError`` stubs). +- Existing plugin codecs continue to work unchanged. +- Codec authors opt in by adding the method on their own release cadence. +- Consumers detect support structurally via ``isinstance(codec, Renderable)``. + +See ``datajoint-docs/src/reference/specs/renderable.md`` for the +normative specification (signature, return-value shape constraints, +worked codec examples). +""" + +from __future__ import annotations + +from typing import Any, Protocol, runtime_checkable + + +@runtime_checkable +class Renderable(Protocol): + """ + A codec that can render its decoded values to Spark-native types. + + Opt-in. Codecs implementing this method declare that their decoded + values can be expressed as primitives, lists, or dicts of the same — + i.e., shapes that map cleanly to Spark's ``StructType`` / + ``ArrayType`` / ``MapType``. + + Consumers (e.g., a Databricks silver-layer publish pipeline) check + ``isinstance(codec, Renderable)`` per column to determine eligibility. + + Allowed return-value shapes: + + - Primitives: ``bool``, ``int``, ``float``, ``str``, ``bytes``, + ``None``, ``datetime.date``, ``datetime.datetime``. + - ``list[T]`` where ``T`` is any allowed shape (→ Spark ``ArrayType``). + - ``dict[str, T]`` where ``T`` is any allowed shape (→ Spark + ``StructType`` or ``MapType``, consumer-decided). + + NumPy arrays must be converted to lists; no tuples, sets, or custom + objects in the return value. + + Examples + -------- + A 1D float-array codec (shipped as a plugin, not in datajoint-python):: + + class FloatArrayCodec(dj.Codec): + name = "float_array" + + def encode(self, value, *, key=None, store_name=None): ... + def decode(self, stored, *, key=None) -> np.ndarray: ... + + def render_spark(self, decoded: np.ndarray, *, key=None) -> list[float]: + return decoded.tolist() # → Spark ARRAY + + Eligibility check:: + + from datajoint import Renderable + isinstance(FloatArrayCodec(), Renderable) # True + """ + + def render_spark(self, decoded: Any, *, key: dict | None = None) -> Any: + """ + Render a decoded codec value to a Spark-native shape. + + Parameters + ---------- + decoded : Any + The Python value produced by the codec's ``decode()``. + key : dict, optional + Optional context dict — same shape as ``Codec.encode``'s + ``key`` parameter. Most codecs ignore it. + + Returns + ------- + Any + A value composed entirely of allowed Spark-native shapes + (see class docstring). + """ + ... diff --git a/tests/unit/test_rendering.py b/tests/unit/test_rendering.py new file mode 100644 index 000000000..581b56918 --- /dev/null +++ b/tests/unit/test_rendering.py @@ -0,0 +1,105 @@ +""" +Unit tests for the Renderable Codec Protocol (#1458). + +The Protocol is a structural-typing contract — codecs opt in by +implementing ``render_spark`` and consumers detect support via +``isinstance(codec, Renderable)``. These tests cover the detection +behavior, not specific rendering implementations (which live downstream). +""" + +from __future__ import annotations + +import datajoint as dj +from datajoint.rendering import Renderable + + +class _RenderableCodec: + """A minimal codec-like object that opts into the protocol.""" + + name = "fake_renderable" + + def render_spark(self, decoded, *, key=None): + return list(decoded) if hasattr(decoded, "__iter__") else decoded + + +class _NonRenderableCodec: + """A minimal codec-like object that does NOT opt into the protocol.""" + + name = "fake_opaque" + + def encode(self, value, *, key=None, store_name=None): + return bytes(value) + + def decode(self, stored, *, key=None): + return stored + + +def test_renderable_protocol_detects_opt_in(): + """A class implementing ``render_spark`` is detected as Renderable.""" + assert isinstance(_RenderableCodec(), Renderable) + + +def test_renderable_protocol_rejects_non_opt_in(): + """A class without ``render_spark`` is not detected as Renderable.""" + assert not isinstance(_NonRenderableCodec(), Renderable) + + +def test_renderable_exported_at_top_level(): + """``dj.Renderable`` is accessible at the top level.""" + assert dj.Renderable is Renderable + + +def test_renderable_is_runtime_checkable(): + """The Protocol is decorated with @runtime_checkable (the test fixtures + above rely on this).""" + # Direct assertion: classes lacking runtime_checkable would raise TypeError + # on isinstance(). The previous tests would error rather than fail. + try: + isinstance(object(), Renderable) + except TypeError: + raise AssertionError("Renderable must be @runtime_checkable") + + +def test_blob_codec_is_not_renderable(): + """The built-in codec is intentionally non-renderable per the spec.""" + from datajoint.builtin_codecs.blob import BlobCodec + + assert not isinstance(BlobCodec(), Renderable) + + +def test_hash_codec_is_not_renderable(): + """The built-in codec is intentionally non-renderable per the spec.""" + from datajoint.builtin_codecs.hash import HashCodec + + assert not isinstance(HashCodec(), Renderable) + + +def test_renderable_invocation_passes_through(): + """A codec implementing the method can be invoked and returns its result.""" + codec = _RenderableCodec() + assert codec.render_spark([1, 2, 3]) == [1, 2, 3] + assert codec.render_spark(42) == 42 + + +def test_renderable_method_accepts_key_kwarg(): + """The method signature accepts the optional ``key`` keyword argument.""" + codec = _RenderableCodec() + # Should not raise + codec.render_spark([1, 2, 3], key={"some_pk": 1}) + + +def test_subclass_with_render_spark_is_renderable(): + """A subclass of a non-renderable that adds the method becomes renderable.""" + + class _OpaqueBase: + name = "base" + + def encode(self, value, *, key=None, store_name=None): + return b"" + + class _TypedSubclass(_OpaqueBase): + def render_spark(self, decoded, *, key=None): + return decoded + + assert not isinstance(_OpaqueBase(), Renderable) + assert isinstance(_TypedSubclass(), Renderable) From ae8cdf11209c7663755f44bc62807000637560d7 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 26 Jun 2026 11:04:24 -0500 Subject: [PATCH 2/2] =?UTF-8?q?feat(#1458):=20rename=20Renderable=20?= =?UTF-8?q?=E2=86=92=20SparkAdapter?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renderable conflicts with the broader notion of graphically renderable field types and is too generic for an interface targeted specifically at Spark / Lakehouse Sync. Rename for clarity: - Class: Renderable → SparkAdapter (parallels StorageAdapter) - Method: render_spark → to_spark (matches pandas/Arrow conventions like to_pandas, to_arrow, __dataframe__) - Module: datajoint.rendering → datajoint.spark - Tests: tests/unit/test_rendering.py → tests/unit/test_spark.py - Top-level export: dj.Renderable → dj.SparkAdapter --- src/datajoint/__init__.py | 6 +- src/datajoint/{rendering.py => spark.py} | 26 +++--- tests/unit/test_rendering.py | 105 ----------------------- tests/unit/test_spark.py | 105 +++++++++++++++++++++++ 4 files changed, 121 insertions(+), 121 deletions(-) rename src/datajoint/{rendering.py => spark.py} (77%) delete mode 100644 tests/unit/test_rendering.py create mode 100644 tests/unit/test_spark.py diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index de0013be8..5ec72afdb 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -51,8 +51,8 @@ "get_codec", "ObjectRef", "NpyRef", - # Renderable Codec Protocol - "Renderable", + # SparkAdapter Codec Protocol + "SparkAdapter", # Storage Adapter API "StorageAdapter", "get_storage_adapter", @@ -87,7 +87,7 @@ from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef -from .rendering import Renderable +from .spark import SparkAdapter from .storage_adapter import StorageAdapter, get_storage_adapter from .schemas import _Schema, VirtualModule, list_schemas, virtual_schema from .autopopulate import AutoPopulate diff --git a/src/datajoint/rendering.py b/src/datajoint/spark.py similarity index 77% rename from src/datajoint/rendering.py rename to src/datajoint/spark.py index 29f3bc03c..29397b64f 100644 --- a/src/datajoint/rendering.py +++ b/src/datajoint/spark.py @@ -1,8 +1,8 @@ """ -Renderable Codec Protocol. +SparkAdapter Codec Protocol. -Opt-in contract for codecs that can render their decoded values to -Spark-native types — primitives, lists, dicts, and nested combinations. +Opt-in contract for codecs that adapt their decoded values to Spark-native +types — primitives, lists, dicts, and nested combinations. Codecs implement this method when they want their column eligible for downstream typed-query systems (Spark SQL, Delta Sharing, BI tools). @@ -16,9 +16,9 @@ - Generic codecs need no acknowledgement (no ``NotImplementedError`` stubs). - Existing plugin codecs continue to work unchanged. - Codec authors opt in by adding the method on their own release cadence. -- Consumers detect support structurally via ``isinstance(codec, Renderable)``. +- Consumers detect support structurally via ``isinstance(codec, SparkAdapter)``. -See ``datajoint-docs/src/reference/specs/renderable.md`` for the +See ``datajoint-docs/src/reference/specs/spark-adapter.md`` for the normative specification (signature, return-value shape constraints, worked codec examples). """ @@ -29,9 +29,9 @@ @runtime_checkable -class Renderable(Protocol): +class SparkAdapter(Protocol): """ - A codec that can render its decoded values to Spark-native types. + A codec that adapts its decoded values to Spark-native types. Opt-in. Codecs implementing this method declare that their decoded values can be expressed as primitives, lists, or dicts of the same — @@ -39,7 +39,7 @@ class Renderable(Protocol): ``ArrayType`` / ``MapType``. Consumers (e.g., a Databricks silver-layer publish pipeline) check - ``isinstance(codec, Renderable)`` per column to determine eligibility. + ``isinstance(codec, SparkAdapter)`` per column to determine eligibility. Allowed return-value shapes: @@ -62,18 +62,18 @@ class FloatArrayCodec(dj.Codec): def encode(self, value, *, key=None, store_name=None): ... def decode(self, stored, *, key=None) -> np.ndarray: ... - def render_spark(self, decoded: np.ndarray, *, key=None) -> list[float]: + def to_spark(self, decoded: np.ndarray, *, key=None) -> list[float]: return decoded.tolist() # → Spark ARRAY Eligibility check:: - from datajoint import Renderable - isinstance(FloatArrayCodec(), Renderable) # True + from datajoint import SparkAdapter + isinstance(FloatArrayCodec(), SparkAdapter) # True """ - def render_spark(self, decoded: Any, *, key: dict | None = None) -> Any: + def to_spark(self, decoded: Any, *, key: dict | None = None) -> Any: """ - Render a decoded codec value to a Spark-native shape. + Adapt a decoded codec value to a Spark-native shape. Parameters ---------- diff --git a/tests/unit/test_rendering.py b/tests/unit/test_rendering.py deleted file mode 100644 index 581b56918..000000000 --- a/tests/unit/test_rendering.py +++ /dev/null @@ -1,105 +0,0 @@ -""" -Unit tests for the Renderable Codec Protocol (#1458). - -The Protocol is a structural-typing contract — codecs opt in by -implementing ``render_spark`` and consumers detect support via -``isinstance(codec, Renderable)``. These tests cover the detection -behavior, not specific rendering implementations (which live downstream). -""" - -from __future__ import annotations - -import datajoint as dj -from datajoint.rendering import Renderable - - -class _RenderableCodec: - """A minimal codec-like object that opts into the protocol.""" - - name = "fake_renderable" - - def render_spark(self, decoded, *, key=None): - return list(decoded) if hasattr(decoded, "__iter__") else decoded - - -class _NonRenderableCodec: - """A minimal codec-like object that does NOT opt into the protocol.""" - - name = "fake_opaque" - - def encode(self, value, *, key=None, store_name=None): - return bytes(value) - - def decode(self, stored, *, key=None): - return stored - - -def test_renderable_protocol_detects_opt_in(): - """A class implementing ``render_spark`` is detected as Renderable.""" - assert isinstance(_RenderableCodec(), Renderable) - - -def test_renderable_protocol_rejects_non_opt_in(): - """A class without ``render_spark`` is not detected as Renderable.""" - assert not isinstance(_NonRenderableCodec(), Renderable) - - -def test_renderable_exported_at_top_level(): - """``dj.Renderable`` is accessible at the top level.""" - assert dj.Renderable is Renderable - - -def test_renderable_is_runtime_checkable(): - """The Protocol is decorated with @runtime_checkable (the test fixtures - above rely on this).""" - # Direct assertion: classes lacking runtime_checkable would raise TypeError - # on isinstance(). The previous tests would error rather than fail. - try: - isinstance(object(), Renderable) - except TypeError: - raise AssertionError("Renderable must be @runtime_checkable") - - -def test_blob_codec_is_not_renderable(): - """The built-in codec is intentionally non-renderable per the spec.""" - from datajoint.builtin_codecs.blob import BlobCodec - - assert not isinstance(BlobCodec(), Renderable) - - -def test_hash_codec_is_not_renderable(): - """The built-in codec is intentionally non-renderable per the spec.""" - from datajoint.builtin_codecs.hash import HashCodec - - assert not isinstance(HashCodec(), Renderable) - - -def test_renderable_invocation_passes_through(): - """A codec implementing the method can be invoked and returns its result.""" - codec = _RenderableCodec() - assert codec.render_spark([1, 2, 3]) == [1, 2, 3] - assert codec.render_spark(42) == 42 - - -def test_renderable_method_accepts_key_kwarg(): - """The method signature accepts the optional ``key`` keyword argument.""" - codec = _RenderableCodec() - # Should not raise - codec.render_spark([1, 2, 3], key={"some_pk": 1}) - - -def test_subclass_with_render_spark_is_renderable(): - """A subclass of a non-renderable that adds the method becomes renderable.""" - - class _OpaqueBase: - name = "base" - - def encode(self, value, *, key=None, store_name=None): - return b"" - - class _TypedSubclass(_OpaqueBase): - def render_spark(self, decoded, *, key=None): - return decoded - - assert not isinstance(_OpaqueBase(), Renderable) - assert isinstance(_TypedSubclass(), Renderable) diff --git a/tests/unit/test_spark.py b/tests/unit/test_spark.py new file mode 100644 index 000000000..854d554a6 --- /dev/null +++ b/tests/unit/test_spark.py @@ -0,0 +1,105 @@ +""" +Unit tests for the SparkAdapter Codec Protocol (#1458). + +The Protocol is a structural-typing contract — codecs opt in by +implementing ``to_spark`` and consumers detect support via +``isinstance(codec, SparkAdapter)``. These tests cover the detection +behavior, not specific rendering implementations (which live downstream). +""" + +from __future__ import annotations + +import datajoint as dj +from datajoint.spark import SparkAdapter + + +class _SparkAdapterCodec: + """A minimal codec-like object that opts into the protocol.""" + + name = "fake_spark_adapter" + + def to_spark(self, decoded, *, key=None): + return list(decoded) if hasattr(decoded, "__iter__") else decoded + + +class _OpaqueCodec: + """A minimal codec-like object that does NOT opt into the protocol.""" + + name = "fake_opaque" + + def encode(self, value, *, key=None, store_name=None): + return bytes(value) + + def decode(self, stored, *, key=None): + return stored + + +def test_protocol_detects_opt_in(): + """A class implementing ``to_spark`` is detected as a SparkAdapter.""" + assert isinstance(_SparkAdapterCodec(), SparkAdapter) + + +def test_protocol_rejects_non_opt_in(): + """A class without ``to_spark`` is not detected as a SparkAdapter.""" + assert not isinstance(_OpaqueCodec(), SparkAdapter) + + +def test_protocol_exported_at_top_level(): + """``dj.SparkAdapter`` is accessible at the top level.""" + assert dj.SparkAdapter is SparkAdapter + + +def test_protocol_is_runtime_checkable(): + """The Protocol is decorated with @runtime_checkable (the test fixtures + above rely on this).""" + # Direct assertion: classes lacking runtime_checkable would raise TypeError + # on isinstance(). The previous tests would error rather than fail. + try: + isinstance(object(), SparkAdapter) + except TypeError: + raise AssertionError("SparkAdapter must be @runtime_checkable") + + +def test_blob_codec_is_not_spark_adapter(): + """The built-in codec is intentionally non-adapting per the spec.""" + from datajoint.builtin_codecs.blob import BlobCodec + + assert not isinstance(BlobCodec(), SparkAdapter) + + +def test_hash_codec_is_not_spark_adapter(): + """The built-in codec is intentionally non-adapting per the spec.""" + from datajoint.builtin_codecs.hash import HashCodec + + assert not isinstance(HashCodec(), SparkAdapter) + + +def test_to_spark_invocation_passes_through(): + """A codec implementing the method can be invoked and returns its result.""" + codec = _SparkAdapterCodec() + assert codec.to_spark([1, 2, 3]) == [1, 2, 3] + assert codec.to_spark(42) == 42 + + +def test_to_spark_method_accepts_key_kwarg(): + """The method signature accepts the optional ``key`` keyword argument.""" + codec = _SparkAdapterCodec() + # Should not raise + codec.to_spark([1, 2, 3], key={"some_pk": 1}) + + +def test_subclass_adding_to_spark_becomes_adapter(): + """A subclass of an opaque codec that adds the method becomes a SparkAdapter.""" + + class _OpaqueBase: + name = "base" + + def encode(self, value, *, key=None, store_name=None): + return b"" + + class _TypedSubclass(_OpaqueBase): + def to_spark(self, decoded, *, key=None): + return decoded + + assert not isinstance(_OpaqueBase(), SparkAdapter) + assert isinstance(_TypedSubclass(), SparkAdapter)