From 4e2d7b3b71b70cbecebbe26894e2ab59c9d981d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a?= Date: Thu, 18 Jun 2026 21:05:05 +0000 Subject: [PATCH] feat: add date functions to `bigframes.bigquery` module Used the following prompt: > Update the descriptions and argument names in scripts/data/sql-functions/global-namespace/date.yaml according to the following SQL documentation: > > (Paste from https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/date_functions) > > Also, if there is a natural argument to use for `series_accessor_arg` in this yaml or others, add it. --- .../bigframes/bigframes/bigquery/__init__.py | 46 + .../extensions/core/series_accessor.py | 427 ++++++++ .../googlesql/global_namespace/date.py | 944 ++++++++++++++++++ .../sql-functions/global_namespace/bit.yaml | 1 + .../sql-functions/global_namespace/date.yaml | 447 +++++++++ .../scripts/generate_bigframes_bigquery.py | 21 + .../templates/core_series_accessor.py.j2 | 1 + .../generated/global_namespace/test_date.py | 372 +++++++ 8 files changed, 2259 insertions(+) create mode 100644 packages/bigframes/bigframes/operations/googlesql/global_namespace/date.py create mode 100644 packages/bigframes/scripts/data/sql-functions/global_namespace/date.yaml create mode 100644 packages/bigframes/tests/unit/bigquery/generated/global_namespace/test_date.py diff --git a/packages/bigframes/bigframes/bigquery/__init__.py b/packages/bigframes/bigframes/bigquery/__init__.py index 99a47d218691..8617982e62d2 100644 --- a/packages/bigframes/bigframes/bigquery/__init__.py +++ b/packages/bigframes/bigframes/bigquery/__init__.py @@ -126,6 +126,22 @@ parse_numeric, string, ) +from bigframes.operations.googlesql.global_namespace.date import ( + current_date, + date, + date_add, + date_bucket, + date_diff, + date_from_unix_date, + date_sub, + date_trunc, + extract, + format_date, + generate_date_array, + last_day, + parse_date, + unix_date, +) _functions = [ # approximate aggregate ops @@ -156,6 +172,21 @@ parse_bignumeric, parse_numeric, string, + # date ops + current_date, + date, + date_add, + date_bucket, + date_diff, + date_from_unix_date, + date_sub, + date_trunc, + extract, + format_date, + generate_date_array, + last_day, + parse_date, + unix_date, # datetime ops unix_micros, unix_millis, @@ -240,6 +271,21 @@ "parse_bignumeric", "parse_numeric", "string", + # date ops + "current_date", + "date", + "date_add", + "date_bucket", + "date_diff", + "date_from_unix_date", + "date_sub", + "date_trunc", + "extract", + "format_date", + "generate_date_array", + "last_day", + "parse_date", + "unix_date", # datetime ops "unix_micros", "unix_millis", diff --git a/packages/bigframes/bigframes/extensions/core/series_accessor.py b/packages/bigframes/bigframes/extensions/core/series_accessor.py index 96d0eb8d045e..219f34b30917 100644 --- a/packages/bigframes/bigframes/extensions/core/series_accessor.py +++ b/packages/bigframes/bigframes/extensions/core/series_accessor.py @@ -19,6 +19,7 @@ from __future__ import annotations import abc +import datetime from typing import ( Any, Generic, @@ -598,6 +599,22 @@ def flatten( ) return self._to_series(cast(series.Series, result)) + def bit_count( + self, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """The input, `expression`, must be an integer or `BYTES`. Returns the number of bits that are set in the input expression. For signed integers, this is the number of bits in two's complement form.""" + from bigframes.operations.googlesql.global_namespace.bit import ( + bit_count as bit_count_impl, + ) + + bf_series = self._bf_from_series(session) + result = bit_count_impl( + bf_series, + ) + return self._to_series(cast(series.Series, result)) + def bool_( self, *, @@ -752,6 +769,416 @@ def string( ) return self._to_series(cast(series.Series, result)) + def date( + self, + time_zone_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + year: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + month: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + day: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Constructs or extracts a date.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date as date_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + time_zone_expression, + year, + month, + day, + ) + + bf_series = self._bf_from_series(session) + result = date_impl( + bf_series, + time_zone_expression, + year, + month, + day, + ) + return self._to_series(cast(series.Series, result)) + + def date_add( + self, + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ], + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Adds a specified time interval to a DATE.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_add as date_add_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + int64_expression, + date_part, + ) + + bf_series = self._bf_from_series(session) + result = date_add_impl( + bf_series, + int64_expression, + date_part, + ) + return self._to_series(cast(series.Series, result)) + + def date_bucket( + self, + bucket_width: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], datetime.timedelta], + ], + bucket_origin: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Gets the lower bound of the date bucket that contains a date.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_bucket as date_bucket_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + bucket_width, + bucket_origin, + ) + + bf_series = self._bf_from_series(session) + result = date_bucket_impl( + bf_series, + bucket_width, + bucket_origin, + ) + return self._to_series(cast(series.Series, result)) + + def date_diff( + self, + start_date: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + granularity: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Gets the number of unit boundaries between two DATE values (end_date - start_date) at a particular time granularity.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_diff as date_diff_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + start_date, + granularity, + ) + + bf_series = self._bf_from_series(session) + result = date_diff_impl( + bf_series, + start_date, + granularity, + ) + return self._to_series(cast(series.Series, result)) + + def date_from_unix_date( + self, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Interprets an INT64 expression as the number of days since 1970-01-01.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_from_unix_date as date_from_unix_date_impl, + ) + + bf_series = self._bf_from_series(session) + result = date_from_unix_date_impl( + bf_series, + ) + return self._to_series(cast(series.Series, result)) + + def date_sub( + self, + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ], + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Subtracts a specified time interval from a DATE.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_sub as date_sub_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + int64_expression, + date_part, + ) + + bf_series = self._bf_from_series(session) + result = date_sub_impl( + bf_series, + int64_expression, + date_part, + ) + return self._to_series(cast(series.Series, result)) + + def date_trunc( + self, + granularity: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Truncates a DATE, DATETIME, or TIMESTAMP value at a particular granularity.""" + from bigframes.operations.googlesql.global_namespace.date import ( + date_trunc as date_trunc_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + granularity, + time_zone, + ) + + bf_series = self._bf_from_series(session) + result = date_trunc_impl( + bf_series, + granularity, + time_zone, + ) + return self._to_series(cast(series.Series, result)) + + def extract( + self, + part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Returns the value corresponding to the specified date part.""" + from bigframes.operations.googlesql.global_namespace.date import ( + extract as extract_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + part, + time_zone, + ) + + bf_series = self._bf_from_series(session) + result = extract_impl( + bf_series, + part, + time_zone, + ) + return self._to_series(cast(series.Series, result)) + + def format_date( + self, + format_string: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ], + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Formats a DATE value according to a specified format string.""" + from bigframes.operations.googlesql.global_namespace.date import ( + format_date as format_date_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + format_string, + time_zone, + ) + + bf_series = self._bf_from_series(session) + result = format_date_impl( + format_string, + bf_series, + time_zone, + ) + return self._to_series(cast(series.Series, result)) + + def last_day( + self, + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Returns the last day from a date expression. This is commonly used to return the last day of the month.""" + from bigframes.operations.googlesql.global_namespace.date import ( + last_day as last_day_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + date_part, + ) + + bf_series = self._bf_from_series(session) + result = last_day_impl( + bf_series, + date_part, + ) + return self._to_series(cast(series.Series, result)) + + def parse_date( + self, + format_string: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ], + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Converts a STRING value to a DATE value.""" + from bigframes.operations.googlesql.global_namespace.date import ( + parse_date as parse_date_impl, + ) + + # Resolve session from other arguments if not passed + if session is None: + import bigframes.core.googlesql as googlesql + + session = googlesql._find_session( + format_string, + ) + + bf_series = self._bf_from_series(session) + result = parse_date_impl( + format_string, + bf_series, + ) + return self._to_series(cast(series.Series, result)) + + def unix_date( + self, + *, + session: Optional[bigframes.session.Session] = None, + ) -> S: + """Returns the number of days since 1970-01-01.""" + from bigframes.operations.googlesql.global_namespace.date import ( + unix_date as unix_date_impl, + ) + + bf_series = self._bf_from_series(session) + result = unix_date_impl( + bf_series, + ) + return self._to_series(cast(series.Series, result)) + class AeadSeriesAccessor(AbstractBigQuerySeriesAccessor[S]): """Series accessor for BigQuery aead functions.""" diff --git a/packages/bigframes/bigframes/operations/googlesql/global_namespace/date.py b/packages/bigframes/bigframes/operations/googlesql/global_namespace/date.py new file mode 100644 index 000000000000..8e0dbf9ca90b --- /dev/null +++ b/packages/bigframes/bigframes/operations/googlesql/global_namespace/date.py @@ -0,0 +1,944 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: scripts/data/sql-functions/global_namespace/date.yaml +# by the script: scripts/generate_bigframes_bigquery.py + +from __future__ import annotations + +import datetime +from typing import Any, Literal, Union + +import bigframes.core.col +import bigframes.core.googlesql +import bigframes.core.sentinels as sentinels +import bigframes.series as series +from bigframes import dtypes +from bigframes.operations import googlesql + +_CURRENT_DATE_OP = googlesql.GoogleSqlScalarOp( + "CURRENT_DATE", + args=(googlesql.ArgSpec(optional=True),), + signature=lambda *args: dtypes.DATE_DTYPE, +) +_DATE_OP = googlesql.GoogleSqlScalarOp( + "DATE", + args=( + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + ), + signature=lambda *args: dtypes.DATE_DTYPE, +) + + +def _DATE_ADD_SIG(*args): + # Pad args with None to match max expected args + args = args + (None,) * (3 - len(args)) + # Try matching impl 0 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if dtypes.coerce_to_common(args[1], dtypes.INT_DTYPE) != dtypes.INT_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[2]) + except TypeError: + match_ok = False + else: + any1_val = args[2] + if match_ok: + return dtypes.DATE_DTYPE + + # Try matching impl 1 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if dtypes.coerce_to_common(args[1], dtypes.INT_DTYPE) != dtypes.INT_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[2]) + except TypeError: + match_ok = False + else: + any1_val = args[2] + if match_ok: + return dtypes.TIMESTAMP_DTYPE + + raise TypeError( + f"Could not find matching signature for date_add with argument types: {[str(t) for t in args]}" + ) + + +_DATE_ADD_OP = googlesql.GoogleSqlScalarOp( + "DATE_ADD", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=_DATE_ADD_SIG, +) + + +def _DATE_BUCKET_SIG(*args): + # Pad args with None to match max expected args + args = args + (None,) * (3 - len(args)) + # Try matching impl 0 + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if ( + dtypes.coerce_to_common(args[1], dtypes.TIMEDELTA_DTYPE) + != dtypes.TIMEDELTA_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + try: + if dtypes.coerce_to_common(args[2], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.DATE_DTYPE + + # Try matching impl 1 + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if ( + dtypes.coerce_to_common(args[1], dtypes.TIMEDELTA_DTYPE) + != dtypes.TIMEDELTA_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + try: + if ( + dtypes.coerce_to_common(args[2], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.TIMESTAMP_DTYPE + + raise TypeError( + f"Could not find matching signature for date_bucket with argument types: {[str(t) for t in args]}" + ) + + +_DATE_BUCKET_OP = googlesql.GoogleSqlScalarOp( + "DATE_BUCKET", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec(optional=True)), + signature=_DATE_BUCKET_SIG, +) +_DATE_DIFF_OP = googlesql.GoogleSqlScalarOp( + "DATE_DIFF", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.INT_DTYPE, +) +_DATE_FROM_UNIX_DATE_OP = googlesql.GoogleSqlScalarOp( + "DATE_FROM_UNIX_DATE", + args=(googlesql.ArgSpec(),), + signature=lambda *args: dtypes.DATE_DTYPE, +) + + +def _DATE_SUB_SIG(*args): + # Pad args with None to match max expected args + args = args + (None,) * (3 - len(args)) + # Try matching impl 0 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if dtypes.coerce_to_common(args[1], dtypes.INT_DTYPE) != dtypes.INT_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[2]) + except TypeError: + match_ok = False + else: + any1_val = args[2] + if match_ok: + return dtypes.DATE_DTYPE + + # Try matching impl 1 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if dtypes.coerce_to_common(args[1], dtypes.INT_DTYPE) != dtypes.INT_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[2] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[2]) + except TypeError: + match_ok = False + else: + any1_val = args[2] + if match_ok: + return dtypes.TIMESTAMP_DTYPE + + raise TypeError( + f"Could not find matching signature for date_sub with argument types: {[str(t) for t in args]}" + ) + + +_DATE_SUB_OP = googlesql.GoogleSqlScalarOp( + "DATE_SUB", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=_DATE_SUB_SIG, +) + + +def _DATE_TRUNC_SIG(*args): + # Pad args with None to match max expected args + args = args + (None,) * (3 - len(args)) + # Try matching impl 0 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.DATE_DTYPE + + # Try matching impl 1 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.TIMESTAMP_DTYPE + + # Try matching impl 2 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok and args[2] is not None: + try: + if ( + dtypes.coerce_to_common(args[2], dtypes.STRING_DTYPE) + != dtypes.STRING_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.TIMESTAMP_DTYPE + + raise TypeError( + f"Could not find matching signature for date_trunc with argument types: {[str(t) for t in args]}" + ) + + +_DATE_TRUNC_OP = googlesql.GoogleSqlScalarOp( + "DATE_TRUNC", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec(optional=True)), + signature=_DATE_TRUNC_SIG, +) + + +def _EXTRACT_SIG(*args): + # Pad args with None to match max expected args + args = args + (None,) * (3 - len(args)) + # Try matching impl 0 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.DATE_DTYPE) != dtypes.DATE_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.INT_DTYPE + + # Try matching impl 1 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok and args[2] is not None: + try: + if ( + dtypes.coerce_to_common(args[2], dtypes.STRING_DTYPE) + != dtypes.STRING_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.INT_DTYPE + + # Try matching impl 2 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.INT_DTYPE + + # Try matching impl 3 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if dtypes.coerce_to_common(args[0], dtypes.TIME_DTYPE) != dtypes.TIME_DTYPE: + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.INT_DTYPE + + # Try matching impl 4 + any1_val = None + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMEDELTA_DTYPE) + != dtypes.TIMEDELTA_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + if any1_val is not None: + try: + any1_val = dtypes.coerce_to_common(any1_val, args[1]) + except TypeError: + match_ok = False + else: + any1_val = args[1] + if match_ok: + return dtypes.INT_DTYPE + + # Try matching impl 5 + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok and args[1] is not None: + try: + if ( + dtypes.coerce_to_common(args[1], dtypes.STRING_DTYPE) + != dtypes.STRING_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.TIME_DTYPE + + # Try matching impl 6 + match_ok = True + if match_ok and args[0] is not None: + try: + if ( + dtypes.coerce_to_common(args[0], dtypes.TIMESTAMP_DTYPE) + != dtypes.TIMESTAMP_DTYPE + ): + match_ok = False + except TypeError: + match_ok = False + if match_ok: + return dtypes.TIME_DTYPE + + raise TypeError( + f"Could not find matching signature for extract with argument types: {[str(t) for t in args]}" + ) + + +_EXTRACT_OP = googlesql.GoogleSqlScalarOp( + "EXTRACT", + args=( + googlesql.ArgSpec(), + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + ), + signature=_EXTRACT_SIG, +) +_FORMAT_DATE_OP = googlesql.GoogleSqlScalarOp( + "FORMAT_DATE", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(), googlesql.ArgSpec(optional=True)), + signature=lambda *args: dtypes.STRING_DTYPE, +) +_GENERATE_DATE_ARRAY_OP = googlesql.GoogleSqlScalarOp( + "GENERATE_DATE_ARRAY", + args=( + googlesql.ArgSpec(), + googlesql.ArgSpec(), + googlesql.ArgSpec(optional=True), + googlesql.ArgSpec(optional=True), + ), + signature=lambda *args: dtypes.list_type(dtypes.DATE_DTYPE), +) +_LAST_DAY_OP = googlesql.GoogleSqlScalarOp( + "LAST_DAY", + args=(googlesql.ArgSpec(), googlesql.ArgSpec(optional=True)), + signature=lambda *args: dtypes.DATE_DTYPE, +) +_PARSE_DATE_OP = googlesql.GoogleSqlScalarOp( + "PARSE_DATE", + args=(googlesql.ArgSpec(), googlesql.ArgSpec()), + signature=lambda *args: dtypes.DATE_DTYPE, +) +_UNIX_DATE_OP = googlesql.GoogleSqlScalarOp( + "UNIX_DATE", + args=(googlesql.ArgSpec(),), + signature=lambda *args: dtypes.INT_DTYPE, +) + + +def current_date( + time_zone_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Returns the current date as a DATE object. Parentheses are optional when called with no arguments.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _CURRENT_DATE_OP, + time_zone_expression, + ) + + +def date( + expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + str, + ], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + time_zone_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + year: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + month: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + day: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Constructs or extracts a date.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_OP, + expression, + time_zone_expression, + year, + month, + day, + ) + + +def date_add( + date_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ], + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Adds a specified time interval to a DATE.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_ADD_OP, + date_expression, + int64_expression, + date_part, + ) + + +def date_bucket( + date_in_bucket: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + bucket_width: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], datetime.timedelta], + ], + bucket_origin: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Gets the lower bound of the date bucket that contains a date.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_BUCKET_OP, + date_in_bucket, + bucket_width, + bucket_origin, + ) + + +def date_diff( + end_date: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + start_date: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + granularity: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Gets the number of unit boundaries between two DATE values (end_date - start_date) at a particular time granularity.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_DIFF_OP, + end_date, + start_date, + granularity, + ) + + +def date_from_unix_date( + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Interprets an INT64 expression as the number of days since 1970-01-01.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_FROM_UNIX_DATE_OP, + int64_expression, + ) + + +def date_sub( + date_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ], + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Subtracts a specified time interval from a DATE.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_SUB_OP, + date_expression, + int64_expression, + date_part, + ) + + +def date_trunc( + date_value: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + granularity: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ], + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Truncates a DATE, DATETIME, or TIMESTAMP value at a particular granularity.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _DATE_TRUNC_OP, + date_value, + granularity, + time_zone, + ) + + +def extract( + date_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + datetime.time, + datetime.timedelta, + ], + ], + part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Returns the value corresponding to the specified date part.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _EXTRACT_OP, + date_expression, + part, + time_zone, + ) + + +def format_date( + format_string: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ], + date_expr: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + time_zone: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Formats a DATE value according to a specified format string.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _FORMAT_DATE_OP, + format_string, + date_expr, + time_zone, + ) + + +def generate_date_array( + start_date: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], datetime.date], + ], + end_date: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], datetime.date], + ], + int64_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], int], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Generates an array of dates in a range.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _GENERATE_DATE_ARRAY_OP, + start_date, + end_date, + int64_expression, + date_part, + ) + + +def last_day( + date_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[ + Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], + datetime.date, + datetime.datetime, + ], + ], + date_part: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Any, Literal[sentinels.Sentinel.ARGUMENT_DEFAULT]], + ] = sentinels.Sentinel.ARGUMENT_DEFAULT, +) -> Union[series.Series, bigframes.core.col.Expression]: + """Returns the last day from a date expression. This is commonly used to return the last day of the month.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _LAST_DAY_OP, + date_expression, + date_part, + ) + + +def parse_date( + format_string: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ], + date_string: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], str], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Converts a STRING value to a DATE value.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _PARSE_DATE_OP, + format_string, + date_string, + ) + + +def unix_date( + date_expression: Union[ + series.Series, + bigframes.core.col.Expression, + Union[Literal[sentinels.Sentinel.ARGUMENT_DEFAULT], datetime.date], + ], +) -> Union[series.Series, bigframes.core.col.Expression]: + """Returns the number of days since 1970-01-01.""" + return bigframes.core.googlesql.apply_googlesql_scalar_op( + _UNIX_DATE_OP, + date_expression, + ) diff --git a/packages/bigframes/scripts/data/sql-functions/global_namespace/bit.yaml b/packages/bigframes/scripts/data/sql-functions/global_namespace/bit.yaml index 3c2953133e49..fe14eae7b649 100644 --- a/packages/bigframes/scripts/data/sql-functions/global_namespace/bit.yaml +++ b/packages/bigframes/scripts/data/sql-functions/global_namespace/bit.yaml @@ -2,6 +2,7 @@ urn: extension:google:bq_scalar_functions scalar_functions: - name: "bit_count" description: "The input, `expression`, must be an integer or `BYTES`. Returns the number of bits that are set in the input expression. For signed integers, this is the number of bits in two's complement form." + series_accessor_arg: expression impls: # Signature: bit_count:i32 - args: diff --git a/packages/bigframes/scripts/data/sql-functions/global_namespace/date.yaml b/packages/bigframes/scripts/data/sql-functions/global_namespace/date.yaml new file mode 100644 index 000000000000..609be2d61791 --- /dev/null +++ b/packages/bigframes/scripts/data/sql-functions/global_namespace/date.yaml @@ -0,0 +1,447 @@ +urn: extension:google:bq_scalar_functions +scalar_functions: + - name: "current_date" + description: "Returns the current date as a DATE object. Parentheses are optional when called with no arguments." + impls: + # Signature: current_date:str + - args: + - name: "time_zone_expression" + value: string + optional: true + keyword_only: false + return: date + - name: "date" + description: "Constructs or extracts a date." + series_accessor_arg: expression + impls: + # Signature: date:pts_str + - args: + - name: "expression" + value: timestamp + optional: false + keyword_only: false + - name: "time_zone_expression" + value: string + optional: true + keyword_only: false + return: date + # Signature: date:pts + - args: + - name: "expression" + value: timestamp + optional: false + keyword_only: false + return: date + # Signature: date:i64_i64_i64 + - args: + - name: "year" + value: i64 + optional: false + keyword_only: false + - name: "month" + value: i64 + optional: false + keyword_only: false + - name: "day" + value: i64 + optional: false + keyword_only: false + return: date + # Signature: date:date + - args: + - name: "expression" + value: date + optional: false + keyword_only: false + return: date + # Signature: date:str + - args: + - name: "expression" + value: string + optional: false + keyword_only: false + return: date + - name: "date_add" + description: "Adds a specified time interval to a DATE." + series_accessor_arg: date_expression + impls: + # Signature: date_add:date_i64_any + - args: + - name: "date_expression" + value: date + optional: false + keyword_only: false + - name: "int64_expression" + value: i64 + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: false + keyword_only: false + return: date + # Signature: date_add:pts_i64_any + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "int64_expression" + value: i64 + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: false + keyword_only: false + return: timestamp + - name: "date_bucket" + description: "Gets the lower bound of the date bucket that contains a date." + series_accessor_arg: date_in_bucket + impls: + # Signature: date_bucket:date_iday_date + - args: + - name: "date_in_bucket" + value: date + optional: false + keyword_only: false + - name: "bucket_width" + value: interval_day + optional: false + keyword_only: false + - name: "bucket_origin" + value: date + optional: true + keyword_only: false + return: date + # Signature: date_bucket:pts_iday_pts + - args: + - name: "date_in_bucket" + value: timestamp + optional: false + keyword_only: false + - name: "bucket_width" + value: interval_day + optional: false + keyword_only: false + - name: "bucket_origin" + value: timestamp + optional: true + keyword_only: false + return: timestamp + - name: "date_diff" + description: "Gets the number of unit boundaries between two DATE values (end_date - start_date) at a particular time granularity." + series_accessor_arg: end_date + impls: + # Signature: date_diff:date_date_any + - args: + - name: "end_date" + value: date + optional: false + keyword_only: false + - name: "start_date" + value: date + optional: false + keyword_only: false + - name: "granularity" + value: any1 + optional: false + keyword_only: false + return: i64 + # Signature: date_diff:pts_pts_any + - args: + - name: "end_date" + value: timestamp + optional: false + keyword_only: false + - name: "start_date" + value: timestamp + optional: false + keyword_only: false + - name: "granularity" + value: any1 + optional: false + keyword_only: false + return: i64 + - name: "date_from_unix_date" + description: "Interprets an INT64 expression as the number of days since 1970-01-01." + series_accessor_arg: int64_expression + impls: + # Signature: date_from_unix_date:i64 + - args: + - name: "int64_expression" + value: i64 + optional: false + keyword_only: false + return: date + - name: "date_sub" + description: "Subtracts a specified time interval from a DATE." + series_accessor_arg: date_expression + impls: + # Signature: date_sub:date_i64_any + - args: + - name: "date_expression" + value: date + optional: false + keyword_only: false + - name: "int64_expression" + value: i64 + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: false + keyword_only: false + return: date + # Signature: date_sub:pts_i64_any + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "int64_expression" + value: i64 + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: false + keyword_only: false + return: timestamp + - name: "date_trunc" + description: "Truncates a DATE, DATETIME, or TIMESTAMP value at a particular granularity." + series_accessor_arg: date_value + impls: + # Signature: date_trunc:date_any + - args: + - name: "date_value" + value: date + optional: false + keyword_only: false + - name: "granularity" + value: any1 + optional: false + keyword_only: false + return: date + # Signature: date_trunc:pts_any + - args: + - name: "date_value" + value: timestamp + optional: false + keyword_only: false + - name: "granularity" + value: any1 + optional: false + keyword_only: false + return: timestamp + # Signature: date_trunc:pts_any_str + - args: + - name: "date_value" + value: timestamp + optional: false + keyword_only: false + - name: "granularity" + value: any1 + optional: false + keyword_only: false + - name: "time_zone" + value: string + optional: true + keyword_only: false + return: timestamp + - name: "extract" + description: "Returns the value corresponding to the specified date part." + series_accessor_arg: date_expression + impls: + # Signature: extract:date_any + - args: + - name: "date_expression" + value: date + optional: false + keyword_only: false + - name: "part" + value: any1 + optional: false + keyword_only: false + return: i64 + # Signature: extract:pts_any_str + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "part" + value: any1 + optional: false + keyword_only: false + - name: "time_zone" + value: string + optional: true + keyword_only: false + return: i64 + # Signature: extract:pts_any + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "part" + value: any1 + optional: false + keyword_only: false + return: i64 + # Signature: extract:pt_any + - args: + - name: "date_expression" + value: time + optional: false + keyword_only: false + - name: "part" + value: any1 + optional: false + keyword_only: false + return: i64 + # Signature: extract:iday_any + - args: + - name: "date_expression" + value: interval_day + optional: false + keyword_only: false + - name: "part" + value: any1 + optional: false + keyword_only: false + return: i64 + # Signature: extract:pts_str + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "time_zone" + value: string + optional: true + keyword_only: false + return: time + # Signature: extract:pts + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + return: time + - name: "format_date" + description: "Formats a DATE value according to a specified format string." + series_accessor_arg: date_expr + impls: + # Signature: format_date:str_date + - args: + - name: "format_string" + value: string + optional: false + keyword_only: false + - name: "date_expr" + value: date + optional: false + keyword_only: false + return: string + # Signature: format_date:str_pts + - args: + - name: "format_string" + value: string + optional: false + keyword_only: false + - name: "date_expr" + value: timestamp + optional: false + keyword_only: false + return: string + # Signature: format_date:str_pts_str + - args: + - name: "format_string" + value: string + optional: false + keyword_only: false + - name: "date_expr" + value: timestamp + optional: false + keyword_only: false + - name: "time_zone" + value: string + optional: true + keyword_only: false + return: string + - name: "generate_date_array" + description: "Generates an array of dates in a range." + impls: + # Signature: generate_date_array:date_date_i64_any + - args: + - name: "start_date" + value: date + optional: false + keyword_only: false + - name: "end_date" + value: date + optional: false + keyword_only: false + - name: "int64_expression" + value: i64 + optional: true + keyword_only: false + - name: "date_part" + value: any1 + optional: true + keyword_only: false + return: list + - name: "last_day" + description: "Returns the last day from a date expression. This is commonly used to return the last day of the month." + series_accessor_arg: date_expression + impls: + # Signature: last_day:date_any + - args: + - name: "date_expression" + value: date + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: true + keyword_only: false + return: date + # Signature: last_day:pts_any + - args: + - name: "date_expression" + value: timestamp + optional: false + keyword_only: false + - name: "date_part" + value: any1 + optional: true + keyword_only: false + return: date + - name: "parse_date" + description: "Converts a STRING value to a DATE value." + series_accessor_arg: date_string + impls: + # Signature: parse_date:str_str + - args: + - name: "format_string" + value: string + optional: false + keyword_only: false + - name: "date_string" + value: string + optional: false + keyword_only: false + return: date + - name: "unix_date" + description: "Returns the number of days since 1970-01-01." + series_accessor_arg: date_expression + impls: + # Signature: unix_date:date + - args: + - name: "date_expression" + value: date + optional: false + keyword_only: false + return: i64 diff --git a/packages/bigframes/scripts/generate_bigframes_bigquery.py b/packages/bigframes/scripts/generate_bigframes_bigquery.py index bb232a6cdf8c..1a095c804eaf 100755 --- a/packages/bigframes/scripts/generate_bigframes_bigquery.py +++ b/packages/bigframes/scripts/generate_bigframes_bigquery.py @@ -78,6 +78,7 @@ "timestamp": "dtypes.TIMESTAMP_DTYPE", "decimal<38,9>": "dtypes.NUMERIC_DTYPE", "decimal<76,38>": "dtypes.BIGNUMERIC_DTYPE", + "interval_day": "dtypes.TIMEDELTA_DTYPE", } PY_TYPE_MAP = { @@ -98,6 +99,7 @@ "struct": "dict", "decimal<38,9>": "decimal.Decimal", "decimal<76,38>": "decimal.Decimal", + "interval_day": "datetime.timedelta", } YAML_TYPE_TO_COL = { @@ -222,9 +224,12 @@ def load_templates(): def _collect_args(impls): args_by_name = {} arg_order = [] + arg_appearances = {} for impl in impls: + seen_in_impl = set() for arg in impl["args"]: name = arg["name"] + seen_in_impl.add(name) if name not in args_by_name: args_by_name[name] = { "types": set(), @@ -232,7 +237,23 @@ def _collect_args(impls): "keyword_only": arg["keyword_only"], } arg_order.append(name) + else: + # If it was marked optional or keyword_only in any previous impl, keep it. + # Or if this impl marks it as optional/keyword_only, update it. + if arg["optional"]: + args_by_name[name]["optional"] = True + if arg["keyword_only"]: + args_by_name[name]["keyword_only"] = True args_by_name[name]["types"].add(arg["value"]) + for name in seen_in_impl: + arg_appearances[name] = arg_appearances.get(name, 0) + 1 + + # If an argument is not in all impls, it must be optional overall + num_impls = len(impls) + for name, count in arg_appearances.items(): + if count < num_impls: + args_by_name[name]["optional"] = True + return args_by_name, arg_order diff --git a/packages/bigframes/scripts/templates/core_series_accessor.py.j2 b/packages/bigframes/scripts/templates/core_series_accessor.py.j2 index 5881fe6963b9..3d57971d8465 100644 --- a/packages/bigframes/scripts/templates/core_series_accessor.py.j2 +++ b/packages/bigframes/scripts/templates/core_series_accessor.py.j2 @@ -8,6 +8,7 @@ from __future__ import annotations import abc +import datetime from typing import ( Any, cast, diff --git a/packages/bigframes/tests/unit/bigquery/generated/global_namespace/test_date.py b/packages/bigframes/tests/unit/bigquery/generated/global_namespace/test_date.py new file mode 100644 index 000000000000..b8cd1a35b720 --- /dev/null +++ b/packages/bigframes/tests/unit/bigquery/generated/global_namespace/test_date.py @@ -0,0 +1,372 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# DO NOT MODIFY THIS FILE DIRECTLY. +# This file was generated from: scripts/data/sql-functions/global_namespace/date.yaml +# by the script: scripts/generate_bigframes_bigquery.py + +import bigframes.bigquery as bbq +import bigframes.core.col +import bigframes.core.expression as ex +import bigframes.operations.googlesql.global_namespace.date as date_op +import bigframes.pandas as bpd + + +def test_current_date_expression(): + # Call the function with col() expressions + result = bbq.current_date( + bpd.col("time_zone_expression"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._CURRENT_DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 1 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "time_zone_expression" + + +def test_date_expression(): + # Call the function with col() expressions + result = bbq.date( + bpd.col("expression"), + bpd.col("time_zone_expression"), + bpd.col("year"), + bpd.col("month"), + bpd.col("day"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 5 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "expression" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "time_zone_expression" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "year" + assert isinstance(expr.inputs[3], ex.UnboundVariableExpression) + assert expr.inputs[3].id == "month" + assert isinstance(expr.inputs[4], ex.UnboundVariableExpression) + assert expr.inputs[4].id == "day" + + +def test_date_add_expression(): + # Call the function with col() expressions + result = bbq.date_add( + bpd.col("date_expression"), + bpd.col("int64_expression"), + bpd.col("date_part"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_ADD_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_expression" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "int64_expression" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "date_part" + + +def test_date_bucket_expression(): + # Call the function with col() expressions + result = bbq.date_bucket( + bpd.col("date_in_bucket"), + bpd.col("bucket_width"), + bpd.col("bucket_origin"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_BUCKET_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_in_bucket" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "bucket_width" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "bucket_origin" + + +def test_date_diff_expression(): + # Call the function with col() expressions + result = bbq.date_diff( + bpd.col("end_date"), + bpd.col("start_date"), + bpd.col("granularity"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_DIFF_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "end_date" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "start_date" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "granularity" + + +def test_date_from_unix_date_expression(): + # Call the function with col() expressions + result = bbq.date_from_unix_date( + bpd.col("int64_expression"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_FROM_UNIX_DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 1 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "int64_expression" + + +def test_date_sub_expression(): + # Call the function with col() expressions + result = bbq.date_sub( + bpd.col("date_expression"), + bpd.col("int64_expression"), + bpd.col("date_part"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_SUB_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_expression" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "int64_expression" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "date_part" + + +def test_date_trunc_expression(): + # Call the function with col() expressions + result = bbq.date_trunc( + bpd.col("date_value"), + bpd.col("granularity"), + bpd.col("time_zone"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._DATE_TRUNC_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_value" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "granularity" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "time_zone" + + +def test_extract_expression(): + # Call the function with col() expressions + result = bbq.extract( + bpd.col("date_expression"), + bpd.col("part"), + bpd.col("time_zone"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._EXTRACT_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_expression" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "part" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "time_zone" + + +def test_format_date_expression(): + # Call the function with col() expressions + result = bbq.format_date( + bpd.col("format_string"), + bpd.col("date_expr"), + bpd.col("time_zone"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._FORMAT_DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 3 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "format_string" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "date_expr" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "time_zone" + + +def test_generate_date_array_expression(): + # Call the function with col() expressions + result = bbq.generate_date_array( + bpd.col("start_date"), + bpd.col("end_date"), + bpd.col("int64_expression"), + bpd.col("date_part"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._GENERATE_DATE_ARRAY_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 4 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "start_date" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "end_date" + assert isinstance(expr.inputs[2], ex.UnboundVariableExpression) + assert expr.inputs[2].id == "int64_expression" + assert isinstance(expr.inputs[3], ex.UnboundVariableExpression) + assert expr.inputs[3].id == "date_part" + + +def test_last_day_expression(): + # Call the function with col() expressions + result = bbq.last_day( + bpd.col("date_expression"), + bpd.col("date_part"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._LAST_DAY_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 2 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_expression" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "date_part" + + +def test_parse_date_expression(): + # Call the function with col() expressions + result = bbq.parse_date( + bpd.col("format_string"), + bpd.col("date_string"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._PARSE_DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 2 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "format_string" + assert isinstance(expr.inputs[1], ex.UnboundVariableExpression) + assert expr.inputs[1].id == "date_string" + + +def test_unix_date_expression(): + # Call the function with col() expressions + result = bbq.unix_date( + bpd.col("date_expression"), + ) + + # Verify result is a col Expression + assert isinstance(result, bigframes.core.col.Expression) + + # Verify the internal expression structure + expr = result._value + assert isinstance(expr, ex.OpExpression) + assert expr.op == date_op._UNIX_DATE_OP + + # Verify arguments are free variables matching the names + assert len(expr.inputs) == 1 + assert isinstance(expr.inputs[0], ex.UnboundVariableExpression) + assert expr.inputs[0].id == "date_expression"