diff --git a/CHANGELOG.md b/CHANGELOG.md index 46df1f48..c32c7572 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +CHANGED + +- Changed the default large-payload externalization threshold (`LargePayloadStorageOptions.threshold_bytes`) from 900,000 bytes to 262,144 bytes (256 KiB), matching the .NET SDK default. Behavioral change (not source/binary breaking): payloads larger than 256 KiB are now externalized by default. + ## v1.7.0 ADDED diff --git a/docs/features.md b/docs/features.md index acbba2a9..57760f64 100644 --- a/docs/features.md +++ b/docs/features.md @@ -285,7 +285,7 @@ pip install durabletask[azure-blob-payloads] #### How it works 1. When the worker or client sends a payload that exceeds the - configured threshold (default 900 KB), the payload is + configured threshold (default 256 KiB), the payload is compressed (GZip, enabled by default) and uploaded to the external store. 2. The original payload in the gRPC message is replaced with a @@ -307,7 +307,7 @@ from durabletask.extensions.azure_blob_payloads import BlobPayloadStore, BlobPay store = BlobPayloadStore(BlobPayloadStoreOptions( connection_string="DefaultEndpointsProtocol=https;...", container_name="durabletask-payloads", # default - threshold_bytes=900_000, # default (900 KB) + threshold_bytes=262_144, # default (256 KiB) max_stored_payload_bytes=10_485_760, # default (10 MB) enable_compression=True, # default )) @@ -351,7 +351,7 @@ store = BlobPayloadStore(BlobPayloadStoreOptions( | Option | Default | Description | |---|---|---| -| `threshold_bytes` | 900,000 (900 KB) | Payloads larger than this are externalized | +| `threshold_bytes` | 262,144 (256 KiB) | Payloads larger than this are externalized | | `max_stored_payload_bytes` | 10,485,760 (10 MB) | Maximum size for externalized payloads | | `enable_compression` | `True` | GZip-compress payloads before uploading | | `container_name` | `"durabletask-payloads"` | Azure Blob container name | diff --git a/docs/supported-patterns.md b/docs/supported-patterns.md index 34b5944c..17459132 100644 --- a/docs/supported-patterns.md +++ b/docs/supported-patterns.md @@ -217,7 +217,7 @@ with DurableTaskSchedulerWorker( state = c.wait_for_orchestration_completion(instance_id, timeout=60) ``` -In this example, any payload exceeding the threshold (default 900 KB) is compressed and uploaded to +In this example, any payload exceeding the threshold (default 256 KiB) is compressed and uploaded to the configured Azure Blob container. When the worker or client reads the message, it downloads and decompresses the payload automatically. diff --git a/durabletask/payload/store.py b/durabletask/payload/store.py index df0f5cfc..4acf5001 100644 --- a/durabletask/payload/store.py +++ b/durabletask/payload/store.py @@ -21,15 +21,15 @@ class LargePayloadStorageOptions: Attributes: threshold_bytes: Payloads larger than this value (in bytes) will - be externalized to the payload store. Defaults to 900,000 - (900 KB), matching the .NET SDK default. + be externalized to the payload store. Defaults to 262,144 + (256 KiB), matching the .NET SDK default. max_stored_payload_bytes: Maximum payload size (in bytes) that can be stored externally. Payloads exceeding this limit will cause an error. Defaults to 10,485,760 (10 MB). enable_compression: When ``True`` (the default), payloads are GZip-compressed before uploading. """ - threshold_bytes: int = 900_000 + threshold_bytes: int = 262_144 max_stored_payload_bytes: int = 10 * 1024 * 1024 # 10 MB enable_compression: bool = True diff --git a/examples/large_payload/README.md b/examples/large_payload/README.md index 63309efe..89104801 100644 --- a/examples/large_payload/README.md +++ b/examples/large_payload/README.md @@ -78,8 +78,8 @@ python app.py The example schedules two orchestrations: - **Small payload** — The input and output stay inline in the gRPC - messages (below the 1 KB threshold configured in the example). -- **Large payload** — The activity output (~70 KB) exceeds the + messages (below the 256 KiB threshold configured in the example). +- **Large payload** — The activity output (~342 KiB) exceeds the threshold and is automatically externalized to blob storage and retrieved transparently. @@ -106,7 +106,7 @@ The `BlobPayloadStoreOptions` class supports the following settings: | Option | Default | Description | |---|---|---| -| `threshold_bytes` | 900,000 (900 KB) | Payloads larger than this are externalized | +| `threshold_bytes` | 262,144 (256 KiB) | Payloads larger than this are externalized | | `max_stored_payload_bytes` | 10,485,760 (10 MB) | Maximum externalized payload size | | `enable_compression` | `True` | GZip-compress before uploading | | `container_name` | `"durabletask-payloads"` | Blob container name | diff --git a/examples/large_payload/app.py b/examples/large_payload/app.py index c4a8081d..e0b5bcee 100644 --- a/examples/large_payload/app.py +++ b/examples/large_payload/app.py @@ -81,8 +81,8 @@ def main(): # Configure the blob payload store store = BlobPayloadStore(BlobPayloadStoreOptions( connection_string=storage_conn_str, - # Use a low threshold so that we can see externalization in action - threshold_bytes=1_024, + # 256 KiB, matching the SDK default; larger payloads are externalized + threshold_bytes=262_144, )) secure_channel = endpoint.startswith("https://") @@ -120,7 +120,7 @@ def main(): # (the report will be externalized to blob storage automatically) print("\n--- Large payload (externalized to blob storage) ---") instance_id = c.schedule_new_orchestration( - large_payload_orchestrator, input=10_000) + large_payload_orchestrator, input=50_000) state = c.wait_for_orchestration_completion(instance_id, timeout=60) if state and state.runtime_status == client.OrchestrationStatus.COMPLETED: print(f"Result: {state.serialized_output}") diff --git a/tests/durabletask-azuremanaged/test_dts_large_payload_e2e.py b/tests/durabletask-azuremanaged/test_dts_large_payload_e2e.py index 9eb7e891..2b3a5dd3 100644 --- a/tests/durabletask-azuremanaged/test_dts_large_payload_e2e.py +++ b/tests/durabletask-azuremanaged/test_dts_large_payload_e2e.py @@ -45,7 +45,7 @@ TEST_CONTAINER = f"dts-payloads-{uuid.uuid4().hex[:8]}" # A low threshold so we can trigger externalization without massive strings. -# In production the default is 900 KB; here we use 1 KB for fast tests. +# In production the default is 256 KiB; here we use 1 KB for fast tests. THRESHOLD_BYTES = 1_024 # Pin API version to one that Azurite supports. diff --git a/tests/durabletask/test_large_payload.py b/tests/durabletask/test_large_payload.py index a1c3d962..c819f9a1 100644 --- a/tests/durabletask/test_large_payload.py +++ b/tests/durabletask/test_large_payload.py @@ -421,7 +421,7 @@ def test_default_options(self): connection_string="UseDevelopmentStorage=true", )) opts = store.options - assert opts.threshold_bytes == 900_000 + assert opts.threshold_bytes == 262_144 assert opts.max_stored_payload_bytes == 10 * 1024 * 1024 assert opts.enable_compression is True assert opts.container_name == "durabletask-payloads" diff --git a/tests/durabletask/test_large_payload_e2e.py b/tests/durabletask/test_large_payload_e2e.py index 6ef76291..0e48d1c8 100644 --- a/tests/durabletask/test_large_payload_e2e.py +++ b/tests/durabletask/test_large_payload_e2e.py @@ -39,7 +39,7 @@ TEST_CONTAINER = f"e2e-payloads-{uuid.uuid4().hex[:8]}" # A low threshold so we can trigger externalization without massive strings. -# In production the default is 900 KB; here we use 1 KB for fast tests. +# In production the default is 256 KiB; here we use 1 KB for fast tests. THRESHOLD_BYTES = 1_024 # Pin API version to one that Azurite supports.