From 7d1d9122f0d277a8feeb6cf7c3b16ae7dcfbf948 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:07:03 +0100 Subject: [PATCH 01/19] - new _typing and _expression stub file to centralize type aliases and allow circular imports between files. - added nested dtypes, bytesarray, and memoryview as literal, convertible python types - PythonLiteral is a recursive type, to allow dict of list, list of list, etc... --- _duckdb-stubs/_expression.pyi | 55 +++++++++++++++++++++++++++++++++++ _duckdb-stubs/_typing.pyi | 35 ++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 _duckdb-stubs/_expression.pyi create mode 100644 _duckdb-stubs/_typing.pyi diff --git a/_duckdb-stubs/_expression.pyi b/_duckdb-stubs/_expression.pyi new file mode 100644 index 00000000..34b08940 --- /dev/null +++ b/_duckdb-stubs/_expression.pyi @@ -0,0 +1,55 @@ +from typing import TYPE_CHECKING, Any, overload +from duckdb import sqltypes + +if TYPE_CHECKING: + from ._typing import IntoExpr + +class Expression: + def __add__(self, other: IntoExpr) -> Expression: ... + def __and__(self, other: IntoExpr) -> Expression: ... + def __div__(self, other: IntoExpr) -> Expression: ... + def __eq__(self, other: IntoExpr) -> Expression: ... # type: ignore[override] + def __floordiv__(self, other: IntoExpr) -> Expression: ... + def __ge__(self, other: IntoExpr) -> Expression: ... + def __gt__(self, other: IntoExpr) -> Expression: ... + @overload + def __init__(self, arg0: str) -> None: ... + @overload + def __init__(self, arg0: Any) -> None: ... + def __invert__(self) -> Expression: ... + def __le__(self, other: IntoExpr) -> Expression: ... + def __lt__(self, other: IntoExpr) -> Expression: ... + def __mod__(self, other: IntoExpr) -> Expression: ... + def __mul__(self, other: IntoExpr) -> Expression: ... + def __ne__(self, other: IntoExpr) -> Expression: ... # type: ignore[override] + def __neg__(self) -> Expression: ... + def __or__(self, other: IntoExpr) -> Expression: ... + def __pow__(self, other: IntoExpr) -> Expression: ... + def __radd__(self, other: IntoExpr) -> Expression: ... + def __rand__(self, other: IntoExpr) -> Expression: ... + def __rdiv__(self, other: IntoExpr) -> Expression: ... + def __rfloordiv__(self, other: IntoExpr) -> Expression: ... + def __rmod__(self, other: IntoExpr) -> Expression: ... + def __rmul__(self, other: IntoExpr) -> Expression: ... + def __ror__(self, other: IntoExpr) -> Expression: ... + def __rpow__(self, other: IntoExpr) -> Expression: ... + def __rsub__(self, other: IntoExpr) -> Expression: ... + def __rtruediv__(self, other: IntoExpr) -> Expression: ... + def __sub__(self, other: IntoExpr) -> Expression: ... + def __truediv__(self, other: IntoExpr) -> Expression: ... + def alias(self, name: str) -> Expression: ... + def asc(self) -> Expression: ... + def between(self, lower: IntoExpr, upper: IntoExpr) -> Expression: ... + def cast(self, type: sqltypes.DuckDBPyType) -> Expression: ... + def collate(self, collation: str) -> Expression: ... + def desc(self) -> Expression: ... + def get_name(self) -> str: ... + def isin(self, *args: IntoExpr) -> Expression: ... + def isnotin(self, *args: IntoExpr) -> Expression: ... + def isnotnull(self) -> Expression: ... + def isnull(self) -> Expression: ... + def nulls_first(self) -> Expression: ... + def nulls_last(self) -> Expression: ... + def otherwise(self, value: IntoExpr) -> Expression: ... + def show(self) -> None: ... + def when(self, condition: IntoExpr, value: IntoExpr) -> Expression: ... diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi new file mode 100644 index 00000000..9c0ab487 --- /dev/null +++ b/_duckdb-stubs/_typing.pyi @@ -0,0 +1,35 @@ +from __future__ import annotations +from typing import TypeAlias, TYPE_CHECKING +from datetime import date, datetime, time, timedelta +from decimal import Decimal +from uuid import UUID +from collections.abc import Mapping + +if TYPE_CHECKING: + from ._expression import Expression + +NumericLiteral: TypeAlias = int | float | Decimal +"""Python objects that can be converted to a numerical `ConstantExpression` (integer or floating points numbers.)""" +TemporalLiteral: TypeAlias = date | datetime | time | timedelta +BlobLiteral: TypeAlias = bytes | bytearray | memoryview +"""Python objects that can be converted to a `BLOB` `ConstantExpression`. + +Note: + `bytes` can also be converted to a `BITSTRING`. +""" +NonNestedLiteral: TypeAlias = NumericLiteral | TemporalLiteral | str | bool | BlobLiteral | UUID +PythonLiteral: TypeAlias = ( + NonNestedLiteral | list[PythonLiteral] | tuple[PythonLiteral, ...] | dict[PythonLiteral, PythonLiteral] | None +) +"""Python objects that can be converted to a `ConstantExpression`.""" +# the field_ids argument to to_parquet and write_parquet has a recursive structure +ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] + +IntoExprColumn: TypeAlias = Expression | str +"""Types that are, or can be used as a `ColumnExpression`.""" +IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral +"""Any type that can be converted to an `Expression` (or is already one). + +See Also: + https://duckdb.org/docs/stable/clients/python/conversion +""" From 40b10d483bd633bedbd48986850d100cccef5a0e Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:08:22 +0100 Subject: [PATCH 02/19] refactor of init in consequence of last commit: - _ExpressionLike -> IntoExpr - Expression | str -> IntoExprColumn --- _duckdb-stubs/__init__.pyi | 88 ++++++++------------------------------ 1 file changed, 19 insertions(+), 69 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 81d69be7..2290ba71 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -1,10 +1,8 @@ -import datetime -import decimal import os import pathlib import typing -import uuid from typing_extensions import Self +from ._expression import Expression if typing.TYPE_CHECKING: import fsspec @@ -12,7 +10,8 @@ if typing.TYPE_CHECKING: import polars import pandas import pyarrow.lib - from collections.abc import Callable, Iterable, Sequence, Mapping + from collections.abc import Callable, Iterable, Sequence + from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn from duckdb import sqltypes, func from builtins import list as lst # needed to avoid mypy error on DuckDBPyRelation.list method shadowing @@ -491,9 +490,7 @@ class DuckDBPyRelation: def __getattr__(self, name: str) -> DuckDBPyRelation: ... def __getitem__(self, name: str) -> DuckDBPyRelation: ... def __len__(self) -> int: ... - def aggregate( - self, aggr_expr: str | Iterable[_ExpressionLike], group_expr: _ExpressionLike = "" - ) -> DuckDBPyRelation: ... + def aggregate(self, aggr_expr: str | Iterable[IntoExpr], group_expr: IntoExpr = "") -> DuckDBPyRelation: ... def any_value( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... @@ -578,7 +575,7 @@ class DuckDBPyRelation: def fetchmany(self, size: typing.SupportsInt = 1) -> lst[tuple[typing.Any, ...]]: ... def fetchnumpy(self) -> dict[str, np.typing.NDArray[typing.Any] | pandas.Categorical]: ... def fetchone(self) -> tuple[typing.Any, ...] | None: ... - def filter(self, filter_expr: Expression | str) -> DuckDBPyRelation: ... + def filter(self, filter_expr: IntoExprColumn) -> DuckDBPyRelation: ... def first(self, expression: str, groups: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... def first_value(self, expression: str, window_spec: str = "", projected_columns: str = "") -> DuckDBPyRelation: ... def fsum( @@ -594,7 +591,7 @@ class DuckDBPyRelation: def join( self, other_rel: DuckDBPyRelation, - condition: Expression | str, + condition: IntoExprColumn, how: typing.Literal["inner", "left", "right", "outer", "semi", "anti"] = "inner", ) -> DuckDBPyRelation: ... def lag( @@ -665,7 +662,7 @@ class DuckDBPyRelation: def product( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... - def project(self, *args: _ExpressionLike, groups: str = "") -> DuckDBPyRelation: ... + def project(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... def quantile( self, expression: str, @@ -694,6 +691,9 @@ class DuckDBPyRelation: def rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... + def select(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... + def select_dtypes(self, types: list[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... + def select_types(self, types: list[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... def select(self, *args: _ExpressionLike, groups: str = "") -> DuckDBPyRelation: ... def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... def select_types(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... @@ -707,7 +707,7 @@ class DuckDBPyRelation: null_value: str | None = None, render_mode: RenderMode | None = None, ) -> None: ... - def sort(self, *args: _ExpressionLike) -> DuckDBPyRelation: ... + def sort(self, *args: IntoExpr) -> DuckDBPyRelation: ... def sql_query(self) -> str: ... def std( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -771,7 +771,7 @@ class DuckDBPyRelation: def torch(self) -> dict[str, typing.Any]: ... def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... - def update(self, set: dict[str, _ExpressionLike], *, condition: _ExpressionLike | None = None) -> None: ... + def update(self, set: dict[str, IntoExpr], *, condition: IntoExpr = None) -> None: ... def value_counts(self, expression: str, groups: str = "") -> DuckDBPyRelation: ... def var( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -878,56 +878,6 @@ class ExplainType: @property def value(self) -> int: ... -class Expression: - def __add__(self, other: _ExpressionLike) -> Expression: ... - def __and__(self, other: _ExpressionLike) -> Expression: ... - def __div__(self, other: _ExpressionLike) -> Expression: ... - def __eq__(self, other: _ExpressionLike) -> Expression: ... # type: ignore[override] - def __floordiv__(self, other: _ExpressionLike) -> Expression: ... - def __ge__(self, other: _ExpressionLike) -> Expression: ... - def __gt__(self, other: _ExpressionLike) -> Expression: ... - @typing.overload - def __init__(self, arg0: str) -> None: ... - @typing.overload - def __init__(self, arg0: typing.Any) -> None: ... - def __invert__(self) -> Expression: ... - def __le__(self, other: _ExpressionLike) -> Expression: ... - def __lt__(self, other: _ExpressionLike) -> Expression: ... - def __mod__(self, other: _ExpressionLike) -> Expression: ... - def __mul__(self, other: _ExpressionLike) -> Expression: ... - def __ne__(self, other: _ExpressionLike) -> Expression: ... # type: ignore[override] - def __neg__(self) -> Expression: ... - def __or__(self, other: _ExpressionLike) -> Expression: ... - def __pow__(self, other: _ExpressionLike) -> Expression: ... - def __radd__(self, other: _ExpressionLike) -> Expression: ... - def __rand__(self, other: _ExpressionLike) -> Expression: ... - def __rdiv__(self, other: _ExpressionLike) -> Expression: ... - def __rfloordiv__(self, other: _ExpressionLike) -> Expression: ... - def __rmod__(self, other: _ExpressionLike) -> Expression: ... - def __rmul__(self, other: _ExpressionLike) -> Expression: ... - def __ror__(self, other: _ExpressionLike) -> Expression: ... - def __rpow__(self, other: _ExpressionLike) -> Expression: ... - def __rsub__(self, other: _ExpressionLike) -> Expression: ... - def __rtruediv__(self, other: _ExpressionLike) -> Expression: ... - def __sub__(self, other: _ExpressionLike) -> Expression: ... - def __truediv__(self, other: _ExpressionLike) -> Expression: ... - def alias(self, name: str) -> Expression: ... - def asc(self) -> Expression: ... - def between(self, lower: _ExpressionLike, upper: _ExpressionLike) -> Expression: ... - def cast(self, type: sqltypes.DuckDBPyType) -> Expression: ... - def collate(self, collation: str) -> Expression: ... - def desc(self) -> Expression: ... - def get_name(self) -> str: ... - def isin(self, *args: _ExpressionLike) -> Expression: ... - def isnotin(self, *args: _ExpressionLike) -> Expression: ... - def isnotnull(self) -> Expression: ... - def isnull(self) -> Expression: ... - def nulls_first(self) -> Expression: ... - def nulls_last(self) -> Expression: ... - def otherwise(self, value: _ExpressionLike) -> Expression: ... - def show(self) -> None: ... - def when(self, condition: _ExpressionLike, value: _ExpressionLike) -> Expression: ... - class FatalException(DatabaseError): ... class HTTPException(IOException): @@ -1078,18 +1028,18 @@ class token_type: @property def value(self) -> int: ... -def CaseExpression(condition: _ExpressionLike, value: _ExpressionLike) -> Expression: ... -def CoalesceOperator(*args: _ExpressionLike) -> Expression: ... +def CaseExpression(condition: IntoExpr, value: IntoExpr) -> Expression: ... +def CoalesceOperator(*args: IntoExpr) -> Expression: ... def ColumnExpression(*args: str) -> Expression: ... def ConstantExpression(value: typing.Any) -> Expression: ... def DefaultExpression() -> Expression: ... -def FunctionExpression(function_name: str, *args: _ExpressionLike) -> Expression: ... -def LambdaExpression(lhs: typing.Any, rhs: _ExpressionLike) -> Expression: ... +def FunctionExpression(function_name: str, *args: IntoExpr) -> Expression: ... +def LambdaExpression(lhs: typing.Any, rhs: IntoExpr) -> Expression: ... def SQLExpression(expression: str) -> Expression: ... def StarExpression(*, exclude: Iterable[str | Expression] | None = None) -> Expression: ... def aggregate( df: pandas.DataFrame, - aggr_expr: str | Iterable[_ExpressionLike], + aggr_expr: str | Iterable[IntoExpr], group_expr: str = "", *, connection: DuckDBPyConnection | None = None, @@ -1203,7 +1153,7 @@ def fetchone(*, connection: DuckDBPyConnection | None = None) -> tuple[typing.An def filesystem_is_registered(name: str, *, connection: DuckDBPyConnection | None = None) -> bool: ... def filter( df: pandas.DataFrame, - filter_expr: Expression | str, + filter_expr: IntoExprColumn, *, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... @@ -1347,7 +1297,7 @@ def pl( connection: DuckDBPyConnection | None = None, ) -> polars.DataFrame | polars.LazyFrame: ... def project( - df: pandas.DataFrame, *args: _ExpressionLike, groups: str = "", connection: DuckDBPyConnection | None = None + df: pandas.DataFrame, *args: IntoExpr, groups: str = "", connection: DuckDBPyConnection | None = None ) -> DuckDBPyRelation: ... def query( query: Statement | str, From 03f915e4695f4568a3c62f856116ea2fa558d54c Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 16:44:02 +0100 Subject: [PATCH 03/19] added Numpy Array protocol to accepted literal types. allow to add numpy ndarray without creating unknown type errors if the library isn't installed in the venv --- _duckdb-stubs/_typing.pyi | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 9c0ab487..a24f4ffd 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -1,13 +1,37 @@ from __future__ import annotations -from typing import TypeAlias, TYPE_CHECKING +from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic from datetime import date, datetime, time, timedelta from decimal import Decimal from uuid import UUID -from collections.abc import Mapping +from collections.abc import Mapping, Iterator if TYPE_CHECKING: from ._expression import Expression +_T_co = TypeVar("_T_co", covariant=True) +_S_co = TypeVar("_S_co", bound=tuple[Any, ...], covariant=True) +_D_co = TypeVar("_D_co", covariant=True) + +class NPTypeLike(Protocol, Generic[_T_co]): ... + +class NPArrayLike(Protocol, Generic[_S_co, _D_co]): + def __len__(self) -> int: ... + def __contains__(self, value: object, /) -> bool: ... + def __iter__(self) -> Iterator[_D_co]: ... + def __array__(self, *args: Any, **kwargs: Any) -> Any: ... + def __array_finalize__(self, *args: Any, **kwargs: Any) -> None: ... + def __array_wrap__(self, *args: Any, **kwargs: Any) -> Any: ... + def __getitem__(self, *args: Any, **kwargs: Any) -> Any: ... + def __setitem__(self, *args: Any, **kwargs: Any) -> None: ... + @property + def shape(self) -> _S_co: ... + @property + def dtype(self) -> Any: ... + @property + def ndim(self) -> int: ... + @property + def size(self) -> int: ... + NumericLiteral: TypeAlias = int | float | Decimal """Python objects that can be converted to a numerical `ConstantExpression` (integer or floating points numbers.)""" TemporalLiteral: TypeAlias = date | datetime | time | timedelta @@ -19,7 +43,12 @@ Note: """ NonNestedLiteral: TypeAlias = NumericLiteral | TemporalLiteral | str | bool | BlobLiteral | UUID PythonLiteral: TypeAlias = ( - NonNestedLiteral | list[PythonLiteral] | tuple[PythonLiteral, ...] | dict[PythonLiteral, PythonLiteral] | None + NonNestedLiteral + | list[PythonLiteral] + | tuple[PythonLiteral, ...] + | dict[PythonLiteral, PythonLiteral] + | NPArrayLike[Any, Any] + | None ) """Python objects that can be converted to a `ConstantExpression`.""" # the field_ids argument to to_parquet and write_parquet has a recursive structure From 33e9fd8430ba5cca32baf486b4df6cb8efa7b868 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 17:00:12 +0100 Subject: [PATCH 04/19] sync lst builtin fix with 3.10 branch --- _duckdb-stubs/__init__.pyi | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 2290ba71..ace4a5ea 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -10,29 +10,10 @@ if typing.TYPE_CHECKING: import polars import pandas import pyarrow.lib + from builtins import list as lst from collections.abc import Callable, Iterable, Sequence from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn from duckdb import sqltypes, func - from builtins import list as lst # needed to avoid mypy error on DuckDBPyRelation.list method shadowing - - # the field_ids argument to to_parquet and write_parquet has a recursive structure - ParquetFieldIdsType = Mapping[str, int | "ParquetFieldIdsType"] - -_ExpressionLike: typing.TypeAlias = ( - "Expression" - | str - | int - | float - | bool - | bytes - | None - | datetime.date - | datetime.datetime - | datetime.time - | datetime.timedelta - | decimal.Decimal - | uuid.UUID -) __all__: lst[str] = [ "BinderException", @@ -692,9 +673,6 @@ class DuckDBPyRelation: def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def select(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... - def select_dtypes(self, types: list[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... - def select_types(self, types: list[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... - def select(self, *args: _ExpressionLike, groups: str = "") -> DuckDBPyRelation: ... def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... def select_types(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... def set_alias(self, alias: str) -> DuckDBPyRelation: ... From 4348df803c834334506fc816098f7a2fb6298d9e Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 18:01:29 +0100 Subject: [PATCH 05/19] fix: dict keys can't be nested literals --- _duckdb-stubs/_typing.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index a24f4ffd..7280bb24 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -46,7 +46,7 @@ PythonLiteral: TypeAlias = ( NonNestedLiteral | list[PythonLiteral] | tuple[PythonLiteral, ...] - | dict[PythonLiteral, PythonLiteral] + | dict[NonNestedLiteral, PythonLiteral] | NPArrayLike[Any, Any] | None ) From 630593f42968263d96ba0cd705bec6b8598adab6 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 21:56:48 +0100 Subject: [PATCH 06/19] Relation.update set argument can accept a mapping --- _duckdb-stubs/__init__.pyi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index ace4a5ea..ad0ccdbc 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -11,7 +11,7 @@ if typing.TYPE_CHECKING: import pandas import pyarrow.lib from builtins import list as lst - from collections.abc import Callable, Iterable, Sequence + from collections.abc import Callable, Iterable, Sequence, Mapping from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn from duckdb import sqltypes, func @@ -749,7 +749,7 @@ class DuckDBPyRelation: def torch(self) -> dict[str, typing.Any]: ... def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... - def update(self, set: dict[str, IntoExpr], *, condition: IntoExpr = None) -> None: ... + def update(self, set: Mapping[str, IntoExpr], *, condition: IntoExpr = None) -> None: ... def value_counts(self, expression: str, groups: str = "") -> DuckDBPyRelation: ... def var( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" From bc673d116c00535794648257715f760b1c61970f Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:10:16 +0100 Subject: [PATCH 07/19] values function and Connection method can accept list of Python Literals, not list of Any element --- _duckdb-stubs/__init__.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index ad0ccdbc..1d3f1523 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -12,7 +12,7 @@ if typing.TYPE_CHECKING: import pyarrow.lib from builtins import list as lst from collections.abc import Callable, Iterable, Sequence, Mapping - from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn + from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn, PythonLiteral from duckdb import sqltypes, func __all__: lst[str] = [ @@ -458,7 +458,7 @@ class DuckDBPyConnection: ) -> sqltypes.DuckDBPyType: ... def unregister(self, view_name: str) -> DuckDBPyConnection: ... def unregister_filesystem(self, name: str) -> None: ... - def values(self, *args: lst[typing.Any] | tuple[Expression, ...] | Expression) -> DuckDBPyRelation: ... + def values(self, *args: lst[PythonLiteral] | tuple[Expression, ...] | Expression) -> DuckDBPyRelation: ... def view(self, view_name: str) -> DuckDBPyRelation: ... @property def description(self) -> lst[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]]: ... @@ -1434,7 +1434,7 @@ def union_type( def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def values( - *args: lst[typing.Any] | tuple[Expression, ...] | Expression, connection: DuckDBPyConnection | None = None + *args: lst[PythonLiteral] | tuple[Expression, ...] | Expression, connection: DuckDBPyConnection | None = None ) -> DuckDBPyRelation: ... def view(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def write_csv( From 0ef19f50a93780b9b4480f193687926741b8454a Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 22:30:26 +0100 Subject: [PATCH 08/19] - added IntoValues type alias - Using IntoExprColumn on StarExpression - fixed lhs type for LambdaExpression, and value type for ConstantExpression --- _duckdb-stubs/__init__.pyi | 14 ++++++-------- _duckdb-stubs/_typing.pyi | 3 ++- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 1d3f1523..2e9ed644 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -12,7 +12,7 @@ if typing.TYPE_CHECKING: import pyarrow.lib from builtins import list as lst from collections.abc import Callable, Iterable, Sequence, Mapping - from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn, PythonLiteral + from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn, PythonLiteral, IntoValues from duckdb import sqltypes, func __all__: lst[str] = [ @@ -458,7 +458,7 @@ class DuckDBPyConnection: ) -> sqltypes.DuckDBPyType: ... def unregister(self, view_name: str) -> DuckDBPyConnection: ... def unregister_filesystem(self, name: str) -> None: ... - def values(self, *args: lst[PythonLiteral] | tuple[Expression, ...] | Expression) -> DuckDBPyRelation: ... + def values(self, *args: IntoValues) -> DuckDBPyRelation: ... def view(self, view_name: str) -> DuckDBPyRelation: ... @property def description(self) -> lst[tuple[str, sqltypes.DuckDBPyType, None, None, None, None, None]]: ... @@ -1009,12 +1009,12 @@ class token_type: def CaseExpression(condition: IntoExpr, value: IntoExpr) -> Expression: ... def CoalesceOperator(*args: IntoExpr) -> Expression: ... def ColumnExpression(*args: str) -> Expression: ... -def ConstantExpression(value: typing.Any) -> Expression: ... +def ConstantExpression(value: PythonLiteral) -> Expression: ... def DefaultExpression() -> Expression: ... def FunctionExpression(function_name: str, *args: IntoExpr) -> Expression: ... -def LambdaExpression(lhs: typing.Any, rhs: IntoExpr) -> Expression: ... +def LambdaExpression(lhs: IntoExprColumn | tuple[IntoExprColumn, ...], rhs: IntoExpr) -> Expression: ... def SQLExpression(expression: str) -> Expression: ... -def StarExpression(*, exclude: Iterable[str | Expression] | None = None) -> Expression: ... +def StarExpression(*, exclude: Iterable[IntoExprColumn] | None = None) -> Expression: ... def aggregate( df: pandas.DataFrame, aggr_expr: str | Iterable[IntoExpr], @@ -1433,9 +1433,7 @@ def union_type( ) -> sqltypes.DuckDBPyType: ... def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... -def values( - *args: lst[PythonLiteral] | tuple[Expression, ...] | Expression, connection: DuckDBPyConnection | None = None -) -> DuckDBPyRelation: ... +def values(*args: IntoValues, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def view(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def write_csv( df: pandas.DataFrame, diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 7280bb24..6828c7cb 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -53,7 +53,8 @@ PythonLiteral: TypeAlias = ( """Python objects that can be converted to a `ConstantExpression`.""" # the field_ids argument to to_parquet and write_parquet has a recursive structure ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] - +IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression +"""Types that can be converted to a table of values.""" IntoExprColumn: TypeAlias = Expression | str """Types that are, or can be used as a `ColumnExpression`.""" IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral From 2fba63d90e7f2d74d37ef3548b6094ba9449af4d Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Thu, 26 Feb 2026 23:08:24 +0100 Subject: [PATCH 09/19] refactor of datatypes typing: - fixed all places where it was too narrow. Most of the time str are accepted for sqltypes. odd exception seems to be the map method on Relation - using Self for annotations on arguments when pertinent --- _duckdb-stubs/__init__.pyi | 75 ++++++++++++++--------------------- _duckdb-stubs/_expression.pyi | 5 +-- _duckdb-stubs/_typing.pyi | 14 ++++++- 3 files changed, 45 insertions(+), 49 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 2e9ed644..1728914f 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -12,7 +12,15 @@ if typing.TYPE_CHECKING: import pyarrow.lib from builtins import list as lst from collections.abc import Callable, Iterable, Sequence, Mapping - from ._typing import ParquetFieldIdsType, IntoExpr, IntoExprColumn, PythonLiteral, IntoValues + from ._typing import ( + ParquetFieldIdsType, + IntoExpr, + IntoExprColumn, + PythonLiteral, + IntoValues, + IntoDType, + IntoNestedDType, + ) from duckdb import sqltypes, func __all__: lst[str] = [ @@ -193,7 +201,7 @@ class DuckDBPyConnection: def __enter__(self) -> Self: ... def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... - def array_type(self, type: sqltypes.DuckDBPyType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... + def array_type(self, type: IntoDType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def arrow(self, rows_per_batch: typing.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" ... @@ -207,8 +215,8 @@ class DuckDBPyConnection: self, name: str, function: Callable[..., typing.Any], - parameters: lst[sqltypes.DuckDBPyType] | None = None, - return_type: sqltypes.DuckDBPyType | None = None, + parameters: lst[IntoDType] | None = None, + return_type: IntoDType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -327,9 +335,9 @@ class DuckDBPyConnection: def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... def list_filesystems(self) -> lst[str]: ... - def list_type(self, type: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def list_type(self, type: IntoDType) -> sqltypes.DuckDBPyType: ... def load_extension(self, extension: str) -> None: ... - def map_type(self, key: sqltypes.DuckDBPyType, value: sqltypes.DuckDBPyType) -> sqltypes.DuckDBPyType: ... + def map_type(self, key: IntoDType, value: IntoDType) -> sqltypes.DuckDBPyType: ... @typing.overload def pl( self, rows_per_batch: typing.SupportsInt = 1000000, *, lazy: typing.Literal[False] = ... @@ -439,23 +447,17 @@ class DuckDBPyConnection: def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... def remove_function(self, name: str) -> DuckDBPyConnection: ... def rollback(self) -> DuckDBPyConnection: ... - def row_type( - self, fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def row_type(self, fields: IntoNestedDType) -> sqltypes.DuckDBPyType: ... def sql(self, query: Statement | str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... def sqltype(self, type_str: str) -> sqltypes.DuckDBPyType: ... def string_type(self, collation: str = "") -> sqltypes.DuckDBPyType: ... - def struct_type( - self, fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def struct_type(self, fields: IntoNestedDType) -> sqltypes.DuckDBPyType: ... def table(self, table_name: str) -> DuckDBPyRelation: ... def table_function(self, name: str, parameters: object = None) -> DuckDBPyRelation: ... def tf(self) -> dict[str, typing.Any]: ... def torch(self) -> dict[str, typing.Any]: ... def type(self, type_str: str) -> sqltypes.DuckDBPyType: ... - def union_type( - self, members: lst[sqltypes.DuckDBPyType] | dict[str, sqltypes.DuckDBPyType] - ) -> sqltypes.DuckDBPyType: ... + def union_type(self, members: IntoNestedDType) -> sqltypes.DuckDBPyType: ... def unregister(self, view_name: str) -> DuckDBPyConnection: ... def unregister_filesystem(self, name: str) -> None: ... def values(self, *args: IntoValues) -> DuckDBPyRelation: ... @@ -527,13 +529,13 @@ class DuckDBPyRelation: ) -> DuckDBPyRelation: ... def create(self, table_name: str) -> None: ... def create_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... - def cross(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def cross(self, other_rel: Self) -> DuckDBPyRelation: ... def cume_dist(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def dense_rank(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def describe(self) -> DuckDBPyRelation: ... def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... def distinct(self) -> DuckDBPyRelation: ... - def except_(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def except_(self, other_rel: Self) -> DuckDBPyRelation: ... def execute(self) -> DuckDBPyRelation: ... def explain(self, type: ExplainType = ExplainType.STANDARD) -> str: ... def favg( @@ -568,10 +570,10 @@ class DuckDBPyRelation: ) -> DuckDBPyRelation: ... def insert(self, values: lst[object]) -> None: ... def insert_into(self, table_name: str) -> None: ... - def intersect(self, other_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def intersect(self, other_rel: Self) -> DuckDBPyRelation: ... def join( self, - other_rel: DuckDBPyRelation, + other_rel: Self, condition: IntoExprColumn, how: typing.Literal["inner", "left", "right", "outer", "semi", "anti"] = "inner", ) -> DuckDBPyRelation: ... @@ -747,7 +749,7 @@ class DuckDBPyRelation: def to_table(self, table_name: str) -> None: ... def to_view(self, view_name: str, replace: bool = True) -> DuckDBPyRelation: ... def torch(self) -> dict[str, typing.Any]: ... - def union(self, union_rel: DuckDBPyRelation) -> DuckDBPyRelation: ... + def union(self, union_rel: Self) -> DuckDBPyRelation: ... def unique(self, unique_aggr: str) -> DuckDBPyRelation: ... def update(self, set: Mapping[str, IntoExpr], *, condition: IntoExpr = None) -> None: ... def value_counts(self, expression: str, groups: str = "") -> DuckDBPyRelation: ... @@ -1027,7 +1029,7 @@ def append( table_name: str, df: pandas.DataFrame, *, by_name: bool = False, connection: DuckDBPyConnection | None = None ) -> DuckDBPyConnection: ... def array_type( - type: sqltypes.DuckDBPyType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None + type: IntoDType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... @typing.overload def arrow( @@ -1056,8 +1058,8 @@ def connect( def create_function( name: str, function: Callable[..., typing.Any], - parameters: lst[sqltypes.DuckDBPyType] | None = None, - return_type: sqltypes.DuckDBPyType | None = None, + parameters: lst[IntoDType] | None = None, + return_type: IntoDType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -1240,15 +1242,10 @@ def get_profiling_information(*, connection: DuckDBPyConnection | None = None, f def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> lst[str]: ... -def list_type( - type: sqltypes.DuckDBPyType, *, connection: DuckDBPyConnection | None = None -) -> sqltypes.DuckDBPyType: ... +def list_type(type: IntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def load_extension(extension: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def map_type( - key: sqltypes.DuckDBPyType, - value: sqltypes.DuckDBPyType, - *, - connection: DuckDBPyConnection | None = None, + key: IntoDType, value: IntoDType, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... def order( df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection | None = None @@ -1394,11 +1391,7 @@ def register_filesystem( ) -> None: ... def remove_function(name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def rollback(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... -def row_type( - fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def row_type(fields: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def rowcount(*, connection: DuckDBPyConnection | None = None) -> int: ... def set_default_connection(connection: DuckDBPyConnection) -> None: ... def sql( @@ -1410,11 +1403,7 @@ def sql( ) -> DuckDBPyRelation: ... def sqltype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def string_type(collation: str = "", *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def struct_type( - fields: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def struct_type(fields: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def table(table_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def table_function( name: str, @@ -1426,11 +1415,7 @@ def tf(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any] def tokenize(query: str) -> lst[tuple[int, token_type]]: ... def torch(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any]: ... def type(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def union_type( - members: dict[str, sqltypes.DuckDBPyType] | lst[sqltypes.DuckDBPyType], - *, - connection: DuckDBPyConnection | None = None, -) -> sqltypes.DuckDBPyType: ... +def union_type(members: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def values(*args: IntoValues, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... diff --git a/_duckdb-stubs/_expression.pyi b/_duckdb-stubs/_expression.pyi index 34b08940..b4d4b1a6 100644 --- a/_duckdb-stubs/_expression.pyi +++ b/_duckdb-stubs/_expression.pyi @@ -1,8 +1,7 @@ from typing import TYPE_CHECKING, Any, overload -from duckdb import sqltypes if TYPE_CHECKING: - from ._typing import IntoExpr + from ._typing import IntoExpr, IntoDType class Expression: def __add__(self, other: IntoExpr) -> Expression: ... @@ -40,7 +39,7 @@ class Expression: def alias(self, name: str) -> Expression: ... def asc(self) -> Expression: ... def between(self, lower: IntoExpr, upper: IntoExpr) -> Expression: ... - def cast(self, type: sqltypes.DuckDBPyType) -> Expression: ... + def cast(self, type: IntoDType) -> Expression: ... def collate(self, collation: str) -> Expression: ... def desc(self) -> Expression: ... def get_name(self) -> str: ... diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 6828c7cb..3b129b44 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -1,4 +1,5 @@ from __future__ import annotations + from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic from datetime import date, datetime, time, timedelta from decimal import Decimal @@ -7,6 +8,7 @@ from collections.abc import Mapping, Iterator if TYPE_CHECKING: from ._expression import Expression + from ._sqltypes import DuckDBPyType _T_co = TypeVar("_T_co", covariant=True) _S_co = TypeVar("_S_co", bound=tuple[Any, ...], covariant=True) @@ -54,7 +56,17 @@ PythonLiteral: TypeAlias = ( # the field_ids argument to to_parquet and write_parquet has a recursive structure ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression -"""Types that can be converted to a table of values.""" +"""Types that can be converted to a table.""" +IntoDType: TypeAlias = DuckDBPyType | str +"""Types that can be converted to a `DuckDBPyType`. + +Passing `INTEGER` is equivalent to passing `DuckDBPyType("INTEGER")` or `DuckDBPyType.INTEGER`. + +Note: + A `StrEnum` will be handled the same way as a `str`. +""" +IntoNestedDType: TypeAlias = dict[str, IntoDType] | list[IntoDType] +"""Types that can be converted to a nested `DuckDBPyType` (e.g. for struct or union types).""" IntoExprColumn: TypeAlias = Expression | str """Types that are, or can be used as a `ColumnExpression`.""" IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral From fac91ba1731f3060767a454acaaffcca97258a71 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Fri, 27 Feb 2026 10:32:36 +0100 Subject: [PATCH 10/19] using PythonLiteral types in place of typing.Any for Connexion.create_function and Relation.map --- _duckdb-stubs/__init__.pyi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 1728914f..3a0e2f30 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -214,7 +214,7 @@ class DuckDBPyConnection: def create_function( self, name: str, - function: Callable[..., typing.Any], + function: Callable[..., PythonLiteral], parameters: lst[IntoDType] | None = None, return_type: IntoDType | None = None, *, @@ -602,7 +602,7 @@ class DuckDBPyRelation: self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... def map( - self, map_function: Callable[..., typing.Any], *, schema: dict[str, sqltypes.DuckDBPyType] | None = None + self, map_function: Callable[..., PythonLiteral], *, schema: dict[str, sqltypes.DuckDBPyType] | None = None ) -> DuckDBPyRelation: ... def max( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" @@ -1057,7 +1057,7 @@ def connect( ) -> DuckDBPyConnection: ... def create_function( name: str, - function: Callable[..., typing.Any], + function: Callable[..., PythonLiteral], parameters: lst[IntoDType] | None = None, return_type: IntoDType | None = None, *, From 8edf762ced97536c5f2083449ca1fffad3025c36 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:02:16 +0100 Subject: [PATCH 11/19] numpy protocols improvements --- _duckdb-stubs/_typing.pyi | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 3b129b44..894a89e6 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -10,28 +10,46 @@ if TYPE_CHECKING: from ._expression import Expression from ._sqltypes import DuckDBPyType +# Numpy protocols + _T_co = TypeVar("_T_co", covariant=True) _S_co = TypeVar("_S_co", bound=tuple[Any, ...], covariant=True) _D_co = TypeVar("_D_co", covariant=True) -class NPTypeLike(Protocol, Generic[_T_co]): ... +class NPProtocol(Protocol): + """Base Protocol for numpy objects.""" + @property + def dtype(self) -> Any: ... + @property + def ndim(self) -> int: ... + def __array__(self, *args: Any, **kwargs: Any) -> Any: ... + def __array_wrap__(self, *args: Any, **kwargs: Any) -> Any: ... + @property + def __array_interface__(self) -> dict[str, Any]: ... + @property + def __array_priority__(self) -> float: ... -class NPArrayLike(Protocol, Generic[_S_co, _D_co]): +class NPScalarTypeLike(NPProtocol, Protocol): + @property + def itemsize(self) -> int: ... + +class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol): + """`numpy.ndarray` Protocol. + + This is needed to accept numpy arrays as literals in expressions, without emitting type checker errors about unknown symbol if the user doesn't have numpy installed. + + Note: + Using `np.typing.NDArray` is still the best option for return types. + """ def __len__(self) -> int: ... def __contains__(self, value: object, /) -> bool: ... def __iter__(self) -> Iterator[_D_co]: ... - def __array__(self, *args: Any, **kwargs: Any) -> Any: ... def __array_finalize__(self, *args: Any, **kwargs: Any) -> None: ... - def __array_wrap__(self, *args: Any, **kwargs: Any) -> Any: ... def __getitem__(self, *args: Any, **kwargs: Any) -> Any: ... def __setitem__(self, *args: Any, **kwargs: Any) -> None: ... @property def shape(self) -> _S_co: ... @property - def dtype(self) -> Any: ... - @property - def ndim(self) -> int: ... - @property def size(self) -> int: ... NumericLiteral: TypeAlias = int | float | Decimal From d96a4cc5f292e98b7d2343f06f2bf884d34cad3b Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:16:25 +0100 Subject: [PATCH 12/19] refactor: - reorganized expressions/values conversions types, improved their doc - added Literals for sqltypes ids and string conversion, and various type aliases, covering all paths. - using aformentionned literals in _sqltypes signatures --- _duckdb-stubs/_sqltypes.pyi | 5 +- _duckdb-stubs/_typing.pyi | 135 +++++++++++++++++++++++++++++------- 2 files changed, 112 insertions(+), 28 deletions(-) diff --git a/_duckdb-stubs/_sqltypes.pyi b/_duckdb-stubs/_sqltypes.pyi index 82e768eb..f5942805 100644 --- a/_duckdb-stubs/_sqltypes.pyi +++ b/_duckdb-stubs/_sqltypes.pyi @@ -1,5 +1,6 @@ import duckdb import typing +from ._typing import StrIntoDType, DTypeIdentifiers __all__: list[str] = [ "BIGINT", @@ -39,13 +40,13 @@ class DuckDBPyType: def __getitem__(self, name: str) -> DuckDBPyType: ... def __hash__(self) -> int: ... @typing.overload - def __init__(self, type_str: str, connection: duckdb.DuckDBPyConnection) -> None: ... + def __init__(self, type_str: StrIntoDType, connection: duckdb.DuckDBPyConnection) -> None: ... @typing.overload def __init__(self, obj: object) -> None: ... @property def children(self) -> list[tuple[str, DuckDBPyType | int | list[str]]]: ... @property - def id(self) -> str: ... + def id(self) -> DTypeIdentifiers: ... BIGINT: DuckDBPyType # value = BIGINT BIT: DuckDBPyType # value = BIT diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 894a89e6..7b85d85d 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -1,10 +1,10 @@ from __future__ import annotations -from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic +from typing import TypeAlias, TYPE_CHECKING, Protocol, Any, TypeVar, Generic, Literal from datetime import date, datetime, time, timedelta from decimal import Decimal from uuid import UUID -from collections.abc import Mapping, Iterator +from collections.abc import Mapping, Iterator, Sequence if TYPE_CHECKING: from ._expression import Expression @@ -52,44 +52,127 @@ class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol): @property def size(self) -> int: ... -NumericLiteral: TypeAlias = int | float | Decimal -"""Python objects that can be converted to a numerical `ConstantExpression` (integer or floating points numbers.)""" +# Expression and values conversions + +NumericLiteral: TypeAlias = int | float +"""Python objects that can be converted to a numerical `Expression` or `DuckDBPyType` (integer or floating points numbers.)""" TemporalLiteral: TypeAlias = date | datetime | time | timedelta -BlobLiteral: TypeAlias = bytes | bytearray | memoryview -"""Python objects that can be converted to a `BLOB` `ConstantExpression`. +BlobLiteral: TypeAlias = bytes | bytearray +"""Python objects that can be converted to a `BLOB` `ConstantExpression` or `DuckDBPyType`. Note: `bytes` can also be converted to a `BITSTRING`. """ -NonNestedLiteral: TypeAlias = NumericLiteral | TemporalLiteral | str | bool | BlobLiteral | UUID -PythonLiteral: TypeAlias = ( - NonNestedLiteral - | list[PythonLiteral] - | tuple[PythonLiteral, ...] - | dict[NonNestedLiteral, PythonLiteral] - | NPArrayLike[Any, Any] - | None -) +ScalarLiteral: TypeAlias = NumericLiteral | BlobLiteral | str | bool +NonNestedLiteral: TypeAlias = ScalarLiteral | TemporalLiteral | UUID | Decimal | memoryview + +# NOTE: +# Using `Sequence` and `Mapping` instead of `list | tuple` and `dict` would make the covariance of the element types work. +# Thus, this would allow to avoid the use of `Any` for them. +# However, this would also be incorrect at runtime, since only the 3 aformentioned containers types are accepted. +NestedLiteral: TypeAlias = list[Any] | tuple[Any, ...] | dict[Any, Any] | NPArrayLike[Any, Any] +"""Containers types that can be converted to a nested `ConstantExpression` (e.g. to `ARRAY` or `STRUCT`). + +Those types can be aribtraly nested, as long as their leaf values are `PythonLiteral`.""" + +PythonLiteral: TypeAlias = NonNestedLiteral | NestedLiteral | None """Python objects that can be converted to a `ConstantExpression`.""" + +IntoExprColumn: TypeAlias = Expression | str +"""Types that are, or can be used as a `ColumnExpression`.""" + +IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral +"""Any type that can be converted to an `Expression` (or is already one). + +See Also: + https://duckdb.org/docs/stable/clients/python/conversion +""" + # the field_ids argument to to_parquet and write_parquet has a recursive structure ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression """Types that can be converted to a table.""" -IntoDType: TypeAlias = DuckDBPyType | str -"""Types that can be converted to a `DuckDBPyType`. +# Datatypes conversions -Passing `INTEGER` is equivalent to passing `DuckDBPyType("INTEGER")` or `DuckDBPyType.INTEGER`. +Builtins: TypeAlias = Literal[ + "bigint", + "bit", + "bignum", + "blob", + "boolean", + "date", + "double", + "float", + "hugeint", + "integer", + "interval", + "smallint", + "null", + "time_tz", + "time", + "timestamp_ms", + "timestamp_ns", + "timestamp_s", + "timestamp_tz", + "timestamp", + "tinyint", + "ubigint", + "uhugeint", + "uinteger", + "usmallint", + "utinyint", + "uuid", + "varchar", +] +"""Literals strings convertibles into `DuckDBPyType` instances. Note: - A `StrEnum` will be handled the same way as a `str`. + Passing the same values in uppercase is also accepted. + We use lowercase here to be able to reuse this `Literal` in the `DTypeIdentifiers` `Literal`. """ -IntoNestedDType: TypeAlias = dict[str, IntoDType] | list[IntoDType] -"""Types that can be converted to a nested `DuckDBPyType` (e.g. for struct or union types).""" -IntoExprColumn: TypeAlias = Expression | str -"""Types that are, or can be used as a `ColumnExpression`.""" -IntoExpr: TypeAlias = IntoExprColumn | PythonLiteral -"""Any type that can be converted to an `Expression` (or is already one). + +NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decimal", "union"] +"""Identifiers for nested types in `DuckDBPyType.id`.""" + +DTypeIdentifiers: TypeAlias = Builtins | NestedIds +"""All possible identifiers for `DuckDBPyType.id`.""" + +StrIntoDType = Builtins | Literal["json"] | str +"""Any `str` that can be converted into a `DuckDBPyType`. + +The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL` + +Note: + A `StrEnum` will be handled the same way as a `str`.""" + +# NOTE: +# the `dict` and `list` types are `Any` due to the same limitation mentionned in `NestedLiteral`. +IntoDType: TypeAlias = ( + DuckDBPyType + | StrIntoDType + | type[NPScalarTypeLike] + | type[ScalarLiteral] + | type[list[Any]] + | type[dict[Any, Any]] + | dict[Any, Any] +) +"""All types that can be converted to a `DuckDBPyType`. + +They can be arbitrarily nested as long as their leaf values are convertible to `DuckDBPyType`. See Also: - https://duckdb.org/docs/stable/clients/python/conversion + https://duckdb.org/docs/stable/clients/python/types +""" + +# NOTE: here we keep the covariance "hack" and warn the user in the docstring, +# because otherwise we can just resort to `Any` for the `dict` and `list` types. +IntoNestedDType: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType] +"""Types that can be converted either into: + +- a nested `DuckDBPyType` (e.g. `STRUCT` or `UNION`) +- a schema for file reads + +Warning: + Only `dict` and `list` containers are accepted at runtime. + We use `Mapping` and `Sequence` here to satisfy the covariance of the element types. """ From 4cecd4e0b799024c6d8b03c6e282315977905c15 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:37:52 +0100 Subject: [PATCH 13/19] feat: - added various new literals for files arguments - moved join "how" literal in _typing for centralization - renamed IntoNestedDType -> IntoFields --- _duckdb-stubs/__init__.pyi | 22 +++++++++------------- _duckdb-stubs/_typing.pyi | 36 ++++++++++++++++++++++++++++++++---- 2 files changed, 41 insertions(+), 17 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 3a0e2f30..7e585d80 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -19,7 +19,8 @@ if typing.TYPE_CHECKING: PythonLiteral, IntoValues, IntoDType, - IntoNestedDType, + IntoFields, + JoinType, ) from duckdb import sqltypes, func @@ -447,17 +448,17 @@ class DuckDBPyConnection: def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... def remove_function(self, name: str) -> DuckDBPyConnection: ... def rollback(self) -> DuckDBPyConnection: ... - def row_type(self, fields: IntoNestedDType) -> sqltypes.DuckDBPyType: ... + def row_type(self, fields: IntoFields) -> sqltypes.DuckDBPyType: ... def sql(self, query: Statement | str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... def sqltype(self, type_str: str) -> sqltypes.DuckDBPyType: ... def string_type(self, collation: str = "") -> sqltypes.DuckDBPyType: ... - def struct_type(self, fields: IntoNestedDType) -> sqltypes.DuckDBPyType: ... + def struct_type(self, fields: IntoFields) -> sqltypes.DuckDBPyType: ... def table(self, table_name: str) -> DuckDBPyRelation: ... def table_function(self, name: str, parameters: object = None) -> DuckDBPyRelation: ... def tf(self) -> dict[str, typing.Any]: ... def torch(self) -> dict[str, typing.Any]: ... def type(self, type_str: str) -> sqltypes.DuckDBPyType: ... - def union_type(self, members: IntoNestedDType) -> sqltypes.DuckDBPyType: ... + def union_type(self, members: IntoFields) -> sqltypes.DuckDBPyType: ... def unregister(self, view_name: str) -> DuckDBPyConnection: ... def unregister_filesystem(self, name: str) -> None: ... def values(self, *args: IntoValues) -> DuckDBPyRelation: ... @@ -571,12 +572,7 @@ class DuckDBPyRelation: def insert(self, values: lst[object]) -> None: ... def insert_into(self, table_name: str) -> None: ... def intersect(self, other_rel: Self) -> DuckDBPyRelation: ... - def join( - self, - other_rel: Self, - condition: IntoExprColumn, - how: typing.Literal["inner", "left", "right", "outer", "semi", "anti"] = "inner", - ) -> DuckDBPyRelation: ... + def join(self, other_rel: Self, condition: IntoExprColumn, how: JoinType = "inner") -> DuckDBPyRelation: ... def lag( self, expression: str, @@ -1391,7 +1387,7 @@ def register_filesystem( ) -> None: ... def remove_function(name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def rollback(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... -def row_type(fields: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def row_type(fields: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def rowcount(*, connection: DuckDBPyConnection | None = None) -> int: ... def set_default_connection(connection: DuckDBPyConnection) -> None: ... def sql( @@ -1403,7 +1399,7 @@ def sql( ) -> DuckDBPyRelation: ... def sqltype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def string_type(collation: str = "", *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def struct_type(fields: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def struct_type(fields: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def table(table_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def table_function( name: str, @@ -1415,7 +1411,7 @@ def tf(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any] def tokenize(query: str) -> lst[tuple[int, token_type]]: ... def torch(*, connection: DuckDBPyConnection | None = None) -> dict[str, typing.Any]: ... def type(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... -def union_type(members: IntoNestedDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def union_type(members: IntoFields, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def unregister(view_name: str, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def unregister_filesystem(name: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def values(*args: IntoValues, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 7b85d85d..83ce1183 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -88,8 +88,6 @@ See Also: https://duckdb.org/docs/stable/clients/python/conversion """ -# the field_ids argument to to_parquet and write_parquet has a recursive structure -ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression """Types that can be converted to a table.""" # Datatypes conversions @@ -140,7 +138,7 @@ DTypeIdentifiers: TypeAlias = Builtins | NestedIds StrIntoDType = Builtins | Literal["json"] | str """Any `str` that can be converted into a `DuckDBPyType`. -The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL` +The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. Note: A `StrEnum` will be handled the same way as a `str`.""" @@ -166,7 +164,7 @@ See Also: # NOTE: here we keep the covariance "hack" and warn the user in the docstring, # because otherwise we can just resort to `Any` for the `dict` and `list` types. -IntoNestedDType: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType] +IntoFields: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType] """Types that can be converted either into: - a nested `DuckDBPyType` (e.g. `STRUCT` or `UNION`) @@ -176,3 +174,33 @@ Warning: Only `dict` and `list` containers are accepted at runtime. We use `Mapping` and `Sequence` here to satisfy the covariance of the element types. """ + +# Files related + +# NOTE: ideally HiveTypes should also be accepted as a Mapping[str, StrIntoDType]. +ColumnsTypes: TypeAlias = Mapping[str, StrIntoDType] +HiveTypes: TypeAlias = dict[str, StrIntoDType] +ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] + +_Auto: TypeAlias = Literal["auto"] +ParquetFieldsOptions: TypeAlias = _Auto | ParquetFieldIdsType +"""Types accepted for the `field_ids` parameter in parquet writing methods.""" + +_CompressionOptions: TypeAlias = Literal["none", "gzip", "zstd"] +"""Generally available compression options.""" + +CsvCompression: TypeAlias = _Auto | _CompressionOptions +CsvEncoding: TypeAlias = Literal["utf-8", "utf-16", "latin-1"] | str +"""Encdoding options. + +All availables options not in the literal values can be seen here: + https://duckdb.org/docs/stable/core_extensions/encodings +""" +JsonCompression: TypeAlias = _CompressionOptions | Literal["auto_detect"] +JsonFormat: TypeAlias = _Auto | Literal["unstructured", "newline_delimited", "array"] +JsonRecordOptions: TypeAlias = _Auto | Literal["true", "false"] + +# Other + +JoinType = Literal["inner", "left", "right", "outer", "semi", "anti"] +"""Types of join accepted by `DuckDBPyRelation.join` method.""" From edbdcf4ad760384974198448614b37abc61f4c3b Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Fri, 27 Feb 2026 15:42:16 +0100 Subject: [PATCH 14/19] feat: - added all new literals and type aliases in the main init file --- _duckdb-stubs/__init__.pyi | 102 ++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 47 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 7e585d80..7d927139 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -13,14 +13,22 @@ if typing.TYPE_CHECKING: from builtins import list as lst from collections.abc import Callable, Iterable, Sequence, Mapping from ._typing import ( - ParquetFieldIdsType, + ParquetFieldsOptions, IntoExpr, IntoExprColumn, PythonLiteral, IntoValues, IntoDType, IntoFields, + StrIntoDType, JoinType, + JsonCompression, + JsonFormat, + JsonRecordOptions, + CsvEncoding, + CsvCompression, + HiveTypes, + ColumnsTypes, ) from duckdb import sqltypes, func @@ -227,7 +235,7 @@ class DuckDBPyConnection: def cursor(self) -> DuckDBPyConnection: ... def decimal_type(self, width: typing.SupportsInt, scale: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def dtype(self, type_str: str) -> sqltypes.DuckDBPyType: ... + def dtype(self, type_str: StrIntoDType) -> sqltypes.DuckDBPyType: ... def duplicate(self) -> DuckDBPyConnection: ... def enum_type(self, name: str, type: sqltypes.DuckDBPyType, values: lst[typing.Any]) -> sqltypes.DuckDBPyType: ... def execute(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... @@ -254,18 +262,18 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -276,8 +284,8 @@ class DuckDBPyConnection: null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -291,7 +299,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -355,18 +363,18 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -377,8 +385,8 @@ class DuckDBPyConnection: null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -392,7 +400,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -400,14 +408,14 @@ class DuckDBPyConnection: self, path_or_buffer: str | bytes | os.PathLike[str], *, - columns: dict[str, str] | None = None, + columns: ColumnsTypes | None = None, sample_size: int | None = None, maximum_depth: int | None = None, - records: str | None = None, - format: str | None = None, + records: JsonRecordOptions | None = None, + format: JsonFormat | None = None, date_format: str | None = None, timestamp_format: str | None = None, - compression: str | None = None, + compression: JsonCompression | None = None, maximum_object_size: int | None = None, ignore_errors: bool | None = None, convert_strings_to_integers: bool | None = None, @@ -417,7 +425,7 @@ class DuckDBPyConnection: filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -671,8 +679,8 @@ class DuckDBPyRelation: def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def select(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... - def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... - def select_types(self, types: lst[sqltypes.DuckDBPyType | str]) -> DuckDBPyRelation: ... + def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... + def select_types(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... def set_alias(self, alias: str) -> DuckDBPyRelation: ... def show( self, @@ -716,8 +724,8 @@ class DuckDBPyRelation: date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, @@ -730,7 +738,7 @@ class DuckDBPyRelation: file_name: str, *, compression: str | None = None, - field_ids: ParquetFieldIdsType | typing.Literal["auto"] | None = None, + field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: int | str | None = None, row_group_size: int | None = None, overwrite: bool | None = None, @@ -773,8 +781,8 @@ class DuckDBPyRelation: date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, @@ -786,7 +794,7 @@ class DuckDBPyRelation: file_name: str, *, compression: str | None = None, - field_ids: ParquetFieldIdsType | typing.Literal["auto"] | None = None, + field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: str | int | None = None, row_group_size: int | None = None, overwrite: bool | None = None, @@ -1076,7 +1084,7 @@ def df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = @typing.overload def df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def distinct(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... -def dtype(type_str: str, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def dtype(type_str: StrIntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def duplicate(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def enum_type( name: str, @@ -1141,18 +1149,18 @@ def from_arrow( def from_csv_auto( path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -1163,8 +1171,8 @@ def from_csv_auto( null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -1178,7 +1186,7 @@ def from_csv_auto( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... @@ -1288,18 +1296,18 @@ def query_progress(*, connection: DuckDBPyConnection | None = None) -> float: .. def read_csv( path_or_buffer: str | bytes | os.PathLike[str] | os.PathLike[bytes] | typing.IO[bytes], header: bool | int | None = None, - compression: str | None = None, + compression: CsvCompression | None = None, sep: str | None = None, delimiter: str | None = None, files_to_sniff: int | None = None, comment: str | None = None, thousands: str | None = None, - dtype: dict[str, str] | lst[str] | None = None, + dtype: IntoFields | None = None, na_values: str | lst[str] | None = None, skiprows: int | None = None, quotechar: str | None = None, escapechar: str | None = None, - encoding: str | None = None, + encoding: CsvEncoding | None = None, parallel: bool | None = None, date_format: str | None = None, timestamp_format: str | None = None, @@ -1310,8 +1318,8 @@ def read_csv( null_padding: bool | None = None, names: lst[str] | None = None, lineterminator: str | None = None, - columns: dict[str, str] | None = None, - auto_type_candidates: lst[str] | None = None, + columns: ColumnsTypes | None = None, + auto_type_candidates: lst[StrIntoDType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -1325,21 +1333,21 @@ def read_csv( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, strict_mode: bool | None = None, ) -> DuckDBPyRelation: ... def read_json( path_or_buffer: str | bytes | os.PathLike[str], *, - columns: dict[str, str] | None = None, + columns: ColumnsTypes | None = None, sample_size: int | None = None, maximum_depth: int | None = None, - records: str | None = None, - format: str | None = None, + records: JsonRecordOptions | None = None, + format: JsonFormat | None = None, date_format: str | None = None, timestamp_format: str | None = None, - compression: str | None = None, + compression: JsonCompression | None = None, maximum_object_size: int | None = None, ignore_errors: bool | None = None, convert_strings_to_integers: bool | None = None, @@ -1349,7 +1357,7 @@ def read_json( filename: bool | str | None = None, hive_partitioning: bool | None = None, union_by_name: bool | None = None, - hive_types: dict[str, str] | None = None, + hive_types: HiveTypes | None = None, hive_types_autocast: bool | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -1428,8 +1436,8 @@ def write_csv( date_format: str | None = None, timestamp_format: str | None = None, quoting: str | int | None = None, - encoding: str | None = None, - compression: str | None = None, + encoding: CsvEncoding | None = None, + compression: CsvCompression | None = None, overwrite: bool | None = None, per_thread_output: bool | None = None, use_tmp_file: bool | None = None, From abf1a44003e938ca380f41fdf0cf3d2c97cc6647 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:57:29 +0100 Subject: [PATCH 15/19] fixs: - Builtins Literal had incorrect values for time/timestamp with time zone - typos fixes - renamed `DType` for Literals to `PyType` to keep the naming conventions consistent --- _duckdb-stubs/__init__.pyi | 40 +++++++++++++++++------------------ _duckdb-stubs/_expression.pyi | 4 ++-- _duckdb-stubs/_sqltypes.pyi | 6 +++--- _duckdb-stubs/_typing.pyi | 35 +++++++++++++++--------------- 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 7d927139..8cde3879 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -18,9 +18,9 @@ if typing.TYPE_CHECKING: IntoExprColumn, PythonLiteral, IntoValues, - IntoDType, + IntoPyType, IntoFields, - StrIntoDType, + StrIntoPyType, JoinType, JsonCompression, JsonFormat, @@ -210,7 +210,7 @@ class DuckDBPyConnection: def __enter__(self) -> Self: ... def __exit__(self, exc_type: object, exc: object, traceback: object) -> None: ... def append(self, table_name: str, df: pandas.DataFrame, *, by_name: bool = False) -> DuckDBPyConnection: ... - def array_type(self, type: IntoDType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... + def array_type(self, type: IntoPyType, size: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def arrow(self, rows_per_batch: typing.SupportsInt = 1000000) -> pyarrow.lib.RecordBatchReader: """Alias of to_arrow_reader(). We recommend using to_arrow_reader() instead.""" ... @@ -224,8 +224,8 @@ class DuckDBPyConnection: self, name: str, function: Callable[..., PythonLiteral], - parameters: lst[IntoDType] | None = None, - return_type: IntoDType | None = None, + parameters: lst[IntoPyType] | None = None, + return_type: IntoPyType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -235,7 +235,7 @@ class DuckDBPyConnection: def cursor(self) -> DuckDBPyConnection: ... def decimal_type(self, width: typing.SupportsInt, scale: typing.SupportsInt) -> sqltypes.DuckDBPyType: ... def df(self, *, date_as_object: bool = False) -> pandas.DataFrame: ... - def dtype(self, type_str: StrIntoDType) -> sqltypes.DuckDBPyType: ... + def dtype(self, type_str: StrIntoPyType) -> sqltypes.DuckDBPyType: ... def duplicate(self) -> DuckDBPyConnection: ... def enum_type(self, name: str, type: sqltypes.DuckDBPyType, values: lst[typing.Any]) -> sqltypes.DuckDBPyType: ... def execute(self, query: Statement | str, parameters: object = None) -> DuckDBPyConnection: ... @@ -285,7 +285,7 @@ class DuckDBPyConnection: names: lst[str] | None = None, lineterminator: str | None = None, columns: ColumnsTypes | None = None, - auto_type_candidates: lst[StrIntoDType] | None = None, + auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -344,9 +344,9 @@ class DuckDBPyConnection: def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... def list_filesystems(self) -> lst[str]: ... - def list_type(self, type: IntoDType) -> sqltypes.DuckDBPyType: ... + def list_type(self, type: IntoPyType) -> sqltypes.DuckDBPyType: ... def load_extension(self, extension: str) -> None: ... - def map_type(self, key: IntoDType, value: IntoDType) -> sqltypes.DuckDBPyType: ... + def map_type(self, key: IntoPyType, value: IntoPyType) -> sqltypes.DuckDBPyType: ... @typing.overload def pl( self, rows_per_batch: typing.SupportsInt = 1000000, *, lazy: typing.Literal[False] = ... @@ -386,7 +386,7 @@ class DuckDBPyConnection: names: lst[str] | None = None, lineterminator: str | None = None, columns: ColumnsTypes | None = None, - auto_type_candidates: lst[StrIntoDType] | None = None, + auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -679,8 +679,8 @@ class DuckDBPyRelation: def rank_dense(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def row_number(self, window_spec: str, projected_columns: str = "") -> DuckDBPyRelation: ... def select(self, *args: IntoExpr, groups: str = "") -> DuckDBPyRelation: ... - def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... - def select_types(self, types: lst[sqltypes.DuckDBPyType | StrIntoDType]) -> DuckDBPyRelation: ... + def select_dtypes(self, types: lst[sqltypes.DuckDBPyType | StrIntoPyType]) -> DuckDBPyRelation: ... + def select_types(self, types: lst[sqltypes.DuckDBPyType | StrIntoPyType]) -> DuckDBPyRelation: ... def set_alias(self, alias: str) -> DuckDBPyRelation: ... def show( self, @@ -1033,7 +1033,7 @@ def append( table_name: str, df: pandas.DataFrame, *, by_name: bool = False, connection: DuckDBPyConnection | None = None ) -> DuckDBPyConnection: ... def array_type( - type: IntoDType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None + type: IntoPyType, size: typing.SupportsInt, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... @typing.overload def arrow( @@ -1062,8 +1062,8 @@ def connect( def create_function( name: str, function: Callable[..., PythonLiteral], - parameters: lst[IntoDType] | None = None, - return_type: IntoDType | None = None, + parameters: lst[IntoPyType] | None = None, + return_type: IntoPyType | None = None, *, type: func.PythonUDFType = ..., null_handling: func.FunctionNullHandling = ..., @@ -1084,7 +1084,7 @@ def df(*, date_as_object: bool = False, connection: DuckDBPyConnection | None = @typing.overload def df(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... def distinct(df: pandas.DataFrame, *, connection: DuckDBPyConnection | None = None) -> DuckDBPyRelation: ... -def dtype(type_str: StrIntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def dtype(type_str: StrIntoPyType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def duplicate(*, connection: DuckDBPyConnection | None = None) -> DuckDBPyConnection: ... def enum_type( name: str, @@ -1172,7 +1172,7 @@ def from_csv_auto( names: lst[str] | None = None, lineterminator: str | None = None, columns: ColumnsTypes | None = None, - auto_type_candidates: lst[StrIntoDType] | None = None, + auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, @@ -1246,10 +1246,10 @@ def get_profiling_information(*, connection: DuckDBPyConnection | None = None, f def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> lst[str]: ... -def list_type(type: IntoDType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... +def list_type(type: IntoPyType, *, connection: DuckDBPyConnection | None = None) -> sqltypes.DuckDBPyType: ... def load_extension(extension: str, *, connection: DuckDBPyConnection | None = None) -> None: ... def map_type( - key: IntoDType, value: IntoDType, *, connection: DuckDBPyConnection | None = None + key: IntoPyType, value: IntoPyType, *, connection: DuckDBPyConnection | None = None ) -> sqltypes.DuckDBPyType: ... def order( df: pandas.DataFrame, order_expr: str, *, connection: DuckDBPyConnection | None = None @@ -1319,7 +1319,7 @@ def read_csv( names: lst[str] | None = None, lineterminator: str | None = None, columns: ColumnsTypes | None = None, - auto_type_candidates: lst[StrIntoDType] | None = None, + auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, ignore_errors: bool | None = None, store_rejects: bool | None = None, diff --git a/_duckdb-stubs/_expression.pyi b/_duckdb-stubs/_expression.pyi index b4d4b1a6..b40513e4 100644 --- a/_duckdb-stubs/_expression.pyi +++ b/_duckdb-stubs/_expression.pyi @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING, Any, overload if TYPE_CHECKING: - from ._typing import IntoExpr, IntoDType + from ._typing import IntoExpr, IntoPyType class Expression: def __add__(self, other: IntoExpr) -> Expression: ... @@ -39,7 +39,7 @@ class Expression: def alias(self, name: str) -> Expression: ... def asc(self) -> Expression: ... def between(self, lower: IntoExpr, upper: IntoExpr) -> Expression: ... - def cast(self, type: IntoDType) -> Expression: ... + def cast(self, type: IntoPyType) -> Expression: ... def collate(self, collation: str) -> Expression: ... def desc(self) -> Expression: ... def get_name(self) -> str: ... diff --git a/_duckdb-stubs/_sqltypes.pyi b/_duckdb-stubs/_sqltypes.pyi index f5942805..44a9bc6b 100644 --- a/_duckdb-stubs/_sqltypes.pyi +++ b/_duckdb-stubs/_sqltypes.pyi @@ -1,6 +1,6 @@ import duckdb import typing -from ._typing import StrIntoDType, DTypeIdentifiers +from ._typing import StrIntoPyType, PyTypeIds __all__: list[str] = [ "BIGINT", @@ -40,13 +40,13 @@ class DuckDBPyType: def __getitem__(self, name: str) -> DuckDBPyType: ... def __hash__(self) -> int: ... @typing.overload - def __init__(self, type_str: StrIntoDType, connection: duckdb.DuckDBPyConnection) -> None: ... + def __init__(self, type_str: StrIntoPyType, connection: duckdb.DuckDBPyConnection) -> None: ... @typing.overload def __init__(self, obj: object) -> None: ... @property def children(self) -> list[tuple[str, DuckDBPyType | int | list[str]]]: ... @property - def id(self) -> DTypeIdentifiers: ... + def id(self) -> PyTypeIds: ... BIGINT: DuckDBPyType # value = BIGINT BIT: DuckDBPyType # value = BIT diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 83ce1183..6995ffb3 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -36,7 +36,7 @@ class NPScalarTypeLike(NPProtocol, Protocol): class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol): """`numpy.ndarray` Protocol. - This is needed to accept numpy arrays as literals in expressions, without emitting type checker errors about unknown symbol if the user doesn't have numpy installed. + This is needed to accept numpy arrays as literals in expressions, without emitting type checker errors about unknown symbol if the user doesn't have `numpy` installed. Note: Using `np.typing.NDArray` is still the best option for return types. @@ -73,7 +73,7 @@ NonNestedLiteral: TypeAlias = ScalarLiteral | TemporalLiteral | UUID | Decimal | NestedLiteral: TypeAlias = list[Any] | tuple[Any, ...] | dict[Any, Any] | NPArrayLike[Any, Any] """Containers types that can be converted to a nested `ConstantExpression` (e.g. to `ARRAY` or `STRUCT`). -Those types can be aribtraly nested, as long as their leaf values are `PythonLiteral`.""" +Those types can be arbitrarily nested, as long as their leaf values are `PythonLiteral`.""" PythonLiteral: TypeAlias = NonNestedLiteral | NestedLiteral | None """Python objects that can be converted to a `ConstantExpression`.""" @@ -89,8 +89,9 @@ See Also: """ IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression -"""Types that can be converted to a table.""" -# Datatypes conversions +"""Types that can be converted to a table of values.""" + +# PyType conversions Builtins: TypeAlias = Literal[ "bigint", @@ -106,12 +107,12 @@ Builtins: TypeAlias = Literal[ "interval", "smallint", "null", - "time_tz", + "time with time zone", "time", "timestamp_ms", "timestamp_ns", "timestamp_s", - "timestamp_tz", + "timestamp with time zone", "timestamp", "tinyint", "ubigint", @@ -122,7 +123,7 @@ Builtins: TypeAlias = Literal[ "uuid", "varchar", ] -"""Literals strings convertibles into `DuckDBPyType` instances. +"""Literals `str` that can be converted into `DuckDBPyType` instances. Note: Passing the same values in uppercase is also accepted. @@ -132,22 +133,22 @@ Note: NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decimal", "union"] """Identifiers for nested types in `DuckDBPyType.id`.""" -DTypeIdentifiers: TypeAlias = Builtins | NestedIds +PyTypeIds: TypeAlias = Builtins | NestedIds """All possible identifiers for `DuckDBPyType.id`.""" -StrIntoDType = Builtins | Literal["json"] | str +StrIntoPyType = Builtins | Literal["json"] | str """Any `str` that can be converted into a `DuckDBPyType`. -The dtypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. +The pytypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. Note: A `StrEnum` will be handled the same way as a `str`.""" # NOTE: # the `dict` and `list` types are `Any` due to the same limitation mentionned in `NestedLiteral`. -IntoDType: TypeAlias = ( +IntoPyType: TypeAlias = ( DuckDBPyType - | StrIntoDType + | StrIntoPyType | type[NPScalarTypeLike] | type[ScalarLiteral] | type[list[Any]] @@ -164,7 +165,7 @@ See Also: # NOTE: here we keep the covariance "hack" and warn the user in the docstring, # because otherwise we can just resort to `Any` for the `dict` and `list` types. -IntoFields: TypeAlias = Mapping[str, IntoDType] | Sequence[IntoDType] +IntoFields: TypeAlias = Mapping[str, IntoPyType] | Sequence[IntoPyType] """Types that can be converted either into: - a nested `DuckDBPyType` (e.g. `STRUCT` or `UNION`) @@ -177,9 +178,9 @@ Warning: # Files related -# NOTE: ideally HiveTypes should also be accepted as a Mapping[str, StrIntoDType]. -ColumnsTypes: TypeAlias = Mapping[str, StrIntoDType] -HiveTypes: TypeAlias = dict[str, StrIntoDType] +# NOTE: ideally HiveTypes should also be accepted as a Mapping[str, StrIntoPyType]. +ColumnsTypes: TypeAlias = Mapping[str, StrIntoPyType] +HiveTypes: TypeAlias = dict[str, StrIntoPyType] ParquetFieldIdsType: TypeAlias = Mapping[str, int | ParquetFieldIdsType] _Auto: TypeAlias = Literal["auto"] @@ -196,7 +197,7 @@ CsvEncoding: TypeAlias = Literal["utf-8", "utf-16", "latin-1"] | str All availables options not in the literal values can be seen here: https://duckdb.org/docs/stable/core_extensions/encodings """ -JsonCompression: TypeAlias = _CompressionOptions | Literal["auto_detect"] +JsonCompression: TypeAlias = Literal["auto_detect"] | _CompressionOptions JsonFormat: TypeAlias = _Auto | Literal["unstructured", "newline_delimited", "array"] JsonRecordOptions: TypeAlias = _Auto | Literal["true", "false"] From 2d541b9833211c0a02e708e60e0c8b746cce5589 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:09:17 +0100 Subject: [PATCH 16/19] lint fixes + fixed missing TypeAlias markers --- _duckdb-stubs/_typing.pyi | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 6995ffb3..25514dca 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -12,7 +12,6 @@ if TYPE_CHECKING: # Numpy protocols -_T_co = TypeVar("_T_co", covariant=True) _S_co = TypeVar("_S_co", bound=tuple[Any, ...], covariant=True) _D_co = TypeVar("_D_co", covariant=True) @@ -55,7 +54,7 @@ class NPArrayLike(NPProtocol, Generic[_S_co, _D_co], Protocol): # Expression and values conversions NumericLiteral: TypeAlias = int | float -"""Python objects that can be converted to a numerical `Expression` or `DuckDBPyType` (integer or floating points numbers.)""" +"""Python objects that can be converted to a numerical `Expression` or `DuckDBPyType`.""" TemporalLiteral: TypeAlias = date | datetime | time | timedelta BlobLiteral: TypeAlias = bytes | bytearray """Python objects that can be converted to a `BLOB` `ConstantExpression` or `DuckDBPyType`. @@ -136,8 +135,8 @@ NestedIds: TypeAlias = Literal["list", "struct", "array", "enum", "map", "decima PyTypeIds: TypeAlias = Builtins | NestedIds """All possible identifiers for `DuckDBPyType.id`.""" -StrIntoPyType = Builtins | Literal["json"] | str -"""Any `str` that can be converted into a `DuckDBPyType`. +StrIntoPyType: TypeAlias = Builtins | Literal["json"] | str +"""Any `str` that can be converted into a `DuckDBPyType`. The pytypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. @@ -203,5 +202,5 @@ JsonRecordOptions: TypeAlias = _Auto | Literal["true", "false"] # Other -JoinType = Literal["inner", "left", "right", "outer", "semi", "anti"] +JoinType: TypeAlias = Literal["inner", "left", "right", "outer", "semi", "anti"] """Types of join accepted by `DuckDBPyRelation.join` method.""" From cbd0efdac3d715d1cc5e18b85e0e1e9da7fc364b Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:59:50 +0100 Subject: [PATCH 17/19] bugfix/refactor: - Fixed StatementType members, they had incorrect values. the "_STATEMENT" part was only on the C++ side, not on the python side - Moved all enums in __init__ file in a new _enums.pyi file, to avoid bloating the init file - Created a new CppEnum Protocol, and used it as a base class for all public enums to reduce duplication. - Created literals type and using them as argument in conjunction of the corresponding enum whenever pertinent --- _duckdb-stubs/__init__.pyi | 193 +++---------------------------------- _duckdb-stubs/_enums.pyi | 110 +++++++++++++++++++++ _duckdb-stubs/_func.pyi | 41 ++------ 3 files changed, 135 insertions(+), 209 deletions(-) create mode 100644 _duckdb-stubs/_enums.pyi diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 8cde3879..3e17e08c 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -3,6 +3,15 @@ import pathlib import typing from typing_extensions import Self from ._expression import Expression +from ._enums import ( + CSVLineTerminator, + StatementType, + ExpectedResultType, + ExplainType, + PythonExceptionHandling, + RenderMode, + token_type, +) if typing.TYPE_CHECKING: import fsspec @@ -30,6 +39,7 @@ if typing.TYPE_CHECKING: HiveTypes, ColumnsTypes, ) + from ._enums import ExplainTypeLiteral, CSVLineTerminatorLiteral, RenderModeLiteral from duckdb import sqltypes, func __all__: lst[str] = [ @@ -175,28 +185,6 @@ __all__: lst[str] = [ ] class BinderException(ProgrammingError): ... - -class CSVLineTerminator: - CARRIAGE_RETURN_LINE_FEED: typing.ClassVar[ - CSVLineTerminator - ] # value = - LINE_FEED: typing.ClassVar[CSVLineTerminator] # value = - __members__: typing.ClassVar[ - dict[str, CSVLineTerminator] - ] # value = {'LINE_FEED': , 'CARRIAGE_RETURN_LINE_FEED': } # noqa: E501 - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - class CatalogException(ProgrammingError): ... class ConnectionException(OperationalError): ... class ConstraintException(IntegrityError): ... @@ -283,7 +271,7 @@ class DuckDBPyConnection: normalize_names: bool | None = None, null_padding: bool | None = None, names: lst[str] | None = None, - lineterminator: str | None = None, + lineterminator: CSVLineTerminator | CSVLineTerminatorLiteral | None = None, columns: ColumnsTypes | None = None, auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, @@ -384,7 +372,7 @@ class DuckDBPyConnection: normalize_names: bool | None = None, null_padding: bool | None = None, names: lst[str] | None = None, - lineterminator: str | None = None, + lineterminator: CSVLineTerminator | CSVLineTerminatorLiteral | None = None, columns: ColumnsTypes | None = None, auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, @@ -546,7 +534,7 @@ class DuckDBPyRelation: def distinct(self) -> DuckDBPyRelation: ... def except_(self, other_rel: Self) -> DuckDBPyRelation: ... def execute(self) -> DuckDBPyRelation: ... - def explain(self, type: ExplainType = ExplainType.STANDARD) -> str: ... + def explain(self, type: ExplainType | ExplainTypeLiteral = ExplainType.STANDARD) -> str: ... def favg( self, expression: str, groups: str = "", window_spec: str = "", projected_columns: str = "" ) -> DuckDBPyRelation: ... @@ -689,7 +677,7 @@ class DuckDBPyRelation: max_rows: typing.SupportsInt | None = None, max_col_width: typing.SupportsInt | None = None, null_value: str | None = None, - render_mode: RenderMode | None = None, + render_mode: RenderMode | RenderModeLiteral | None = None, ) -> None: ... def sort(self, *args: IntoExpr) -> DuckDBPyRelation: ... def sql_query(self) -> str: ... @@ -822,46 +810,6 @@ class DuckDBPyRelation: def types(self) -> lst[sqltypes.DuckDBPyType]: ... class Error(Exception): ... - -class ExpectedResultType: - CHANGED_ROWS: typing.ClassVar[ExpectedResultType] # value = - NOTHING: typing.ClassVar[ExpectedResultType] # value = - QUERY_RESULT: typing.ClassVar[ExpectedResultType] # value = - __members__: typing.ClassVar[ - dict[str, ExpectedResultType] - ] # value = {'QUERY_RESULT': , 'CHANGED_ROWS': , 'NOTHING': } # noqa: E501 - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class ExplainType: - ANALYZE: typing.ClassVar[ExplainType] # value = - STANDARD: typing.ClassVar[ExplainType] # value = - __members__: typing.ClassVar[ - dict[str, ExplainType] - ] # value = {'STANDARD': , 'ANALYZE': } - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - class FatalException(DatabaseError): ... class HTTPException(IOException): @@ -885,45 +833,6 @@ class OutOfRangeException(DataError): ... class ParserException(ProgrammingError): ... class PermissionException(DatabaseError): ... class ProgrammingError(DatabaseError): ... - -class PythonExceptionHandling: - DEFAULT: typing.ClassVar[PythonExceptionHandling] # value = - RETURN_NULL: typing.ClassVar[PythonExceptionHandling] # value = - __members__: typing.ClassVar[ - dict[str, PythonExceptionHandling] - ] # value = {'DEFAULT': , 'RETURN_NULL': } # noqa: E501 - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - -class RenderMode: - COLUMNS: typing.ClassVar[RenderMode] # value = - ROWS: typing.ClassVar[RenderMode] # value = - __members__: typing.ClassVar[ - dict[str, RenderMode] - ] # value = {'ROWS': , 'COLUMNS': } - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - class SequenceException(DatabaseError): ... class SerializationException(OperationalError): ... @@ -937,81 +846,11 @@ class Statement: @property def type(self) -> StatementType: ... -class StatementType: - ALTER_STATEMENT: typing.ClassVar[StatementType] # value = - ANALYZE_STATEMENT: typing.ClassVar[StatementType] # value = - ATTACH_STATEMENT: typing.ClassVar[StatementType] # value = - CALL_STATEMENT: typing.ClassVar[StatementType] # value = - COPY_DATABASE_STATEMENT: typing.ClassVar[StatementType] # value = - COPY_STATEMENT: typing.ClassVar[StatementType] # value = - CREATE_FUNC_STATEMENT: typing.ClassVar[StatementType] # value = - CREATE_STATEMENT: typing.ClassVar[StatementType] # value = - DELETE_STATEMENT: typing.ClassVar[StatementType] # value = - DETACH_STATEMENT: typing.ClassVar[StatementType] # value = - DROP_STATEMENT: typing.ClassVar[StatementType] # value = - EXECUTE_STATEMENT: typing.ClassVar[StatementType] # value = - EXPLAIN_STATEMENT: typing.ClassVar[StatementType] # value = - EXPORT_STATEMENT: typing.ClassVar[StatementType] # value = - EXTENSION_STATEMENT: typing.ClassVar[StatementType] # value = - INSERT_STATEMENT: typing.ClassVar[StatementType] # value = - INVALID_STATEMENT: typing.ClassVar[StatementType] # value = - LOAD_STATEMENT: typing.ClassVar[StatementType] # value = - LOGICAL_PLAN_STATEMENT: typing.ClassVar[StatementType] # value = - MERGE_INTO_STATEMENT: typing.ClassVar[StatementType] # value = - MULTI_STATEMENT: typing.ClassVar[StatementType] # value = - PRAGMA_STATEMENT: typing.ClassVar[StatementType] # value = - PREPARE_STATEMENT: typing.ClassVar[StatementType] # value = - RELATION_STATEMENT: typing.ClassVar[StatementType] # value = - SELECT_STATEMENT: typing.ClassVar[StatementType] # value = - SET_STATEMENT: typing.ClassVar[StatementType] # value = - TRANSACTION_STATEMENT: typing.ClassVar[StatementType] # value = - UPDATE_STATEMENT: typing.ClassVar[StatementType] # value = - VACUUM_STATEMENT: typing.ClassVar[StatementType] # value = - VARIABLE_SET_STATEMENT: typing.ClassVar[StatementType] # value = - __members__: typing.ClassVar[ - dict[str, StatementType] - ] # value = {'INVALID_STATEMENT': , 'SELECT_STATEMENT': , 'INSERT_STATEMENT': , 'UPDATE_STATEMENT': , 'CREATE_STATEMENT': , 'DELETE_STATEMENT': , 'PREPARE_STATEMENT': , 'EXECUTE_STATEMENT': , 'ALTER_STATEMENT': , 'TRANSACTION_STATEMENT': , 'COPY_STATEMENT': , 'ANALYZE_STATEMENT': , 'VARIABLE_SET_STATEMENT': , 'CREATE_FUNC_STATEMENT': , 'EXPLAIN_STATEMENT': , 'DROP_STATEMENT': , 'EXPORT_STATEMENT': , 'PRAGMA_STATEMENT': , 'VACUUM_STATEMENT': , 'CALL_STATEMENT': , 'SET_STATEMENT': , 'LOAD_STATEMENT': , 'RELATION_STATEMENT': , 'EXTENSION_STATEMENT': , 'LOGICAL_PLAN_STATEMENT': , 'ATTACH_STATEMENT': , 'DETACH_STATEMENT': , 'MULTI_STATEMENT': , 'COPY_DATABASE_STATEMENT': , 'MERGE_INTO_STATEMENT': } # noqa: E501 - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - class SyntaxException(ProgrammingError): ... class TransactionException(OperationalError): ... class TypeMismatchException(DataError): ... class Warning(Exception): ... -class token_type: - __members__: typing.ClassVar[ - dict[str, token_type] - ] # value = {'identifier': , 'numeric_const': , 'string_const': , 'operator': , 'keyword': , 'comment': } # noqa: E501 - comment: typing.ClassVar[token_type] # value = - identifier: typing.ClassVar[token_type] # value = - keyword: typing.ClassVar[token_type] # value = - numeric_const: typing.ClassVar[token_type] # value = - operator: typing.ClassVar[token_type] # value = - string_const: typing.ClassVar[token_type] # value = - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... - def CaseExpression(condition: IntoExpr, value: IntoExpr) -> Expression: ... def CoalesceOperator(*args: IntoExpr) -> Expression: ... def ColumnExpression(*args: str) -> Expression: ... @@ -1170,7 +1009,7 @@ def from_csv_auto( normalize_names: bool | None = None, null_padding: bool | None = None, names: lst[str] | None = None, - lineterminator: str | None = None, + lineterminator: CSVLineTerminator | CSVLineTerminatorLiteral | None = None, columns: ColumnsTypes | None = None, auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, @@ -1317,7 +1156,7 @@ def read_csv( normalize_names: bool | None = None, null_padding: bool | None = None, names: lst[str] | None = None, - lineterminator: str | None = None, + lineterminator: CSVLineTerminator | CSVLineTerminatorLiteral | None = None, columns: ColumnsTypes | None = None, auto_type_candidates: lst[StrIntoPyType] | None = None, max_line_size: int | None = None, diff --git a/_duckdb-stubs/_enums.pyi b/_duckdb-stubs/_enums.pyi new file mode 100644 index 00000000..edc41236 --- /dev/null +++ b/_duckdb-stubs/_enums.pyi @@ -0,0 +1,110 @@ +from typing import ClassVar, Protocol, SupportsInt, Literal, TypeAlias + +class CppEnum(Protocol): + """Base Enum-like Protocol class in C++ code. + + Correspond to `py::enum_` in Pybind11. + + Note: + This is marked as a `Protocol` to specify that an `isinstance` check against this class won't work, as this is a typing-only construct. + """ + def __eq__(self, other: object) -> bool: ... + def __getstate__(self) -> int: ... + def __hash__(self) -> int: ... + def __index__(self) -> int: ... + def __init__(self, value: SupportsInt) -> None: ... + def __int__(self) -> int: ... + def __ne__(self, other: object) -> bool: ... + def __setstate__(self, state: SupportsInt) -> None: ... + @property + def name(self) -> str: ... + @property + def value(self) -> int: ... + +class CSVLineTerminator(CppEnum): + CARRIAGE_RETURN_LINE_FEED: ClassVar[CSVLineTerminator] # value = + LINE_FEED: ClassVar[CSVLineTerminator] # value = + __members__: ClassVar[ + dict[str, CSVLineTerminator] + ] # value = {'LINE_FEED': , 'CARRIAGE_RETURN_LINE_FEED': } # noqa: E501 + +CSVLineTerminatorLiteral: TypeAlias = Literal["\\r\\n", "\\n"] + +class ExpectedResultType(CppEnum): + CHANGED_ROWS: ClassVar[ExpectedResultType] # value = + NOTHING: ClassVar[ExpectedResultType] # value = + QUERY_RESULT: ClassVar[ExpectedResultType] # value = + __members__: ClassVar[ + dict[str, ExpectedResultType] + ] # value = {'QUERY_RESULT': , 'CHANGED_ROWS': , 'NOTHING': } # noqa: E501 + +class ExplainType: + ANALYZE: ClassVar[ExplainType] # value = + STANDARD: ClassVar[ExplainType] # value = + __members__: ClassVar[ + dict[str, ExplainType] + ] # value = {'STANDARD': , 'ANALYZE': } + +ExplainTypeLiteral: TypeAlias = Literal["analyze", "standard"] + +class PythonExceptionHandling: + DEFAULT: ClassVar[PythonExceptionHandling] # value = + RETURN_NULL: ClassVar[PythonExceptionHandling] # value = + __members__: ClassVar[ + dict[str, PythonExceptionHandling] + ] # value = {'DEFAULT': , 'RETURN_NULL': } # noqa: E501 + +class RenderMode: + COLUMNS: ClassVar[RenderMode] # value = + ROWS: ClassVar[RenderMode] # value = + __members__: ClassVar[ + dict[str, RenderMode] + ] # value = {'ROWS': , 'COLUMNS': } + +RenderModeLiteral: TypeAlias = Literal["columns", "rows"] + +class StatementType: + ALTER: ClassVar[StatementType] # value = + ANALYZE: ClassVar[StatementType] # value = + ATTACH: ClassVar[StatementType] # value = + CALL: ClassVar[StatementType] # value = + COPY_DATABASE: ClassVar[StatementType] # value = + COPY: ClassVar[StatementType] # value = + CREATE_FUNC: ClassVar[StatementType] # value = + CREATE: ClassVar[StatementType] # value = + DELETE: ClassVar[StatementType] # value = + DETACH: ClassVar[StatementType] # value = + DROP: ClassVar[StatementType] # value = + EXECUTE: ClassVar[StatementType] # value = + EXPLAIN: ClassVar[StatementType] # value = + EXPORT: ClassVar[StatementType] # value = + EXTENSION: ClassVar[StatementType] # value = + INSERT: ClassVar[StatementType] # value = + INVALID: ClassVar[StatementType] # value = + LOAD: ClassVar[StatementType] # value = + LOGICAL_PLAN: ClassVar[StatementType] # value = + MERGE_INTO: ClassVar[StatementType] # value = + MULTI: ClassVar[StatementType] # value = + PRAGMA: ClassVar[StatementType] # value = + PREPARE: ClassVar[StatementType] # value = + RELATION: ClassVar[StatementType] # value = + SELECT: ClassVar[StatementType] # value = + SET: ClassVar[StatementType] # value = + TRANSACTION: ClassVar[StatementType] # value = + UPDATE: ClassVar[StatementType] # value = + VACUUM: ClassVar[StatementType] # value = + VARIABLE_SET: ClassVar[StatementType] # value = + __members__: ClassVar[ + dict[str, StatementType] + ] # value = {'INVALID': , 'SELECT': , 'INSERT': , 'UPDATE': , 'CREATE': , 'DELETE': , 'PREPARE': , 'EXECUTE': , 'ALTER': , 'TRANSACTION': , 'COPY': , 'ANALYZE': , 'VARIABLE_SET': , 'CREATE_FUNC': , 'EXPLAIN': , 'DROP': , 'EXPORT': , 'PRAGMA': , 'VACUUM': , 'CALL': , 'SET': , 'LOAD': , 'RELATION': , 'EXTENSION': , 'LOGICAL_PLAN': , 'ATTACH': , 'DETACH': , 'MULTI': , 'COPY_DATABASE': , 'MERGE_INTO': } # noqa: E501 + +class token_type: + __members__: ClassVar[ + dict[str, token_type] + ] # value = {'identifier': , 'numeric_const': , 'string_const': , 'operator': , 'keyword': , 'comment': } # noqa: E501 + comment: ClassVar[token_type] # value = + identifier: ClassVar[token_type] # value = + keyword: ClassVar[token_type] # value = + numeric_const: ClassVar[token_type] # value = + operator: ClassVar[token_type] # value = + string_const: ClassVar[token_type] # value = diff --git a/_duckdb-stubs/_func.pyi b/_duckdb-stubs/_func.pyi index 5330ed04..2ff4d3ce 100644 --- a/_duckdb-stubs/_func.pyi +++ b/_duckdb-stubs/_func.pyi @@ -1,44 +1,21 @@ -import typing +from typing import ClassVar +from ._enums import CppEnum __all__: list[str] = ["ARROW", "DEFAULT", "NATIVE", "SPECIAL", "FunctionNullHandling", "PythonUDFType"] -class FunctionNullHandling: - DEFAULT: typing.ClassVar[FunctionNullHandling] # value = - SPECIAL: typing.ClassVar[FunctionNullHandling] # value = - __members__: typing.ClassVar[ +class FunctionNullHandling(CppEnum): + DEFAULT: ClassVar[FunctionNullHandling] # value = + SPECIAL: ClassVar[FunctionNullHandling] # value = + __members__: ClassVar[ dict[str, FunctionNullHandling] ] # value = {'DEFAULT': , 'SPECIAL': } - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... class PythonUDFType: - ARROW: typing.ClassVar[PythonUDFType] # value = - NATIVE: typing.ClassVar[PythonUDFType] # value = - __members__: typing.ClassVar[ + ARROW: ClassVar[PythonUDFType] # value = + NATIVE: ClassVar[PythonUDFType] # value = + __members__: ClassVar[ dict[str, PythonUDFType] ] # value = {'NATIVE': , 'ARROW': } - def __eq__(self, other: object) -> bool: ... - def __getstate__(self) -> int: ... - def __hash__(self) -> int: ... - def __index__(self) -> int: ... - def __init__(self, value: typing.SupportsInt) -> None: ... - def __int__(self) -> int: ... - def __ne__(self, other: object) -> bool: ... - def __setstate__(self, state: typing.SupportsInt) -> None: ... - @property - def name(self) -> str: ... - @property - def value(self) -> int: ... ARROW: PythonUDFType # value = DEFAULT: FunctionNullHandling # value = From ec22557028a1b3d93efde92e13bdef9b5a98f9be Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Tue, 3 Mar 2026 14:02:17 +0100 Subject: [PATCH 18/19] Typing : added ParquetCompression and ProfilerFormat literals --- _duckdb-stubs/__init__.pyi | 28 ++++++++++++++++------------ _duckdb-stubs/_typing.pyi | 19 +++++++++++++------ 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/_duckdb-stubs/__init__.pyi b/_duckdb-stubs/__init__.pyi index 3e17e08c..e9d64c5c 100644 --- a/_duckdb-stubs/__init__.pyi +++ b/_duckdb-stubs/__init__.pyi @@ -38,6 +38,8 @@ if typing.TYPE_CHECKING: CsvCompression, HiveTypes, ColumnsTypes, + ProfilerFormat, + ParquetCompression, ) from ._enums import ExplainTypeLiteral, CSVLineTerminatorLiteral, RenderModeLiteral from duckdb import sqltypes, func @@ -302,7 +304,7 @@ class DuckDBPyConnection: filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: str | None = None, + compression: ParquetCompression | None = None, ) -> DuckDBPyRelation: ... @typing.overload def from_parquet( @@ -314,7 +316,7 @@ class DuckDBPyConnection: filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: str | None = None, + compression: ParquetCompression | None = None, ) -> DuckDBPyRelation: ... def from_query(self, query: str, *, alias: str = "", params: object = None) -> DuckDBPyRelation: ... def get_table_names(self, query: str, *, qualified: bool = False) -> set[str]: ... @@ -327,7 +329,7 @@ class DuckDBPyConnection: repository_url: str | None = None, version: str | None = None, ) -> None: ... - def get_profiling_information(self, format: str = "json") -> str: ... + def get_profiling_information(self, format: ProfilerFormat = "json") -> str: ... def enable_profiling(self) -> None: ... def disable_profiling(self) -> None: ... def interrupt(self) -> None: ... @@ -426,7 +428,7 @@ class DuckDBPyConnection: filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: str | None = None, + compression: ParquetCompression | None = None, ) -> DuckDBPyRelation: ... @typing.overload def read_parquet( @@ -438,7 +440,7 @@ class DuckDBPyConnection: filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: typing.Any = None, + compression: ParquetCompression | None = None, ) -> DuckDBPyRelation: ... def register(self, view_name: str, python_object: object) -> DuckDBPyConnection: ... def register_filesystem(self, filesystem: fsspec.AbstractFileSystem) -> None: ... @@ -725,7 +727,7 @@ class DuckDBPyRelation: self, file_name: str, *, - compression: str | None = None, + compression: ParquetCompression | None = None, field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: int | str | None = None, row_group_size: int | None = None, @@ -781,7 +783,7 @@ class DuckDBPyRelation: self, file_name: str, *, - compression: str | None = None, + compression: ParquetCompression | None = None, field_ids: ParquetFieldsOptions | None = None, row_group_size_bytes: str | int | None = None, row_group_size: int | None = None, @@ -1039,7 +1041,7 @@ def from_parquet( filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: str | None = None, + compression: ParquetCompression | None = None, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -1051,7 +1053,7 @@ def from_parquet( filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: typing.Any = None, + compression: ParquetCompression | None = None, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... def from_query( @@ -1081,7 +1083,9 @@ def limit( *, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... -def get_profiling_information(*, connection: DuckDBPyConnection | None = None, format: str = "json") -> str: ... +def get_profiling_information( + *, connection: DuckDBPyConnection | None = None, format: ProfilerFormat = "json" +) -> str: ... def enable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def disable_profiling(*, connection: DuckDBPyConnection | None = None) -> None: ... def list_filesystems(*, connection: DuckDBPyConnection | None = None) -> lst[str]: ... @@ -1208,7 +1212,7 @@ def read_parquet( filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: str | None = None, + compression: ParquetCompression | None = None, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... @typing.overload @@ -1220,7 +1224,7 @@ def read_parquet( filename: bool = False, hive_partitioning: bool = False, union_by_name: bool = False, - compression: typing.Any = None, + compression: ParquetCompression | None = None, connection: DuckDBPyConnection | None = None, ) -> DuckDBPyRelation: ... def register( diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 25514dca..168e12f3 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -138,7 +138,7 @@ PyTypeIds: TypeAlias = Builtins | NestedIds StrIntoPyType: TypeAlias = Builtins | Literal["json"] | str """Any `str` that can be converted into a `DuckDBPyType`. -The pytypes not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. +The `DuckDBPyType` not present in the literal values are the composed ones, like `STRUCT` or `DECIMAL`. Note: A `StrEnum` will be handled the same way as a `str`.""" @@ -186,21 +186,28 @@ _Auto: TypeAlias = Literal["auto"] ParquetFieldsOptions: TypeAlias = _Auto | ParquetFieldIdsType """Types accepted for the `field_ids` parameter in parquet writing methods.""" -_CompressionOptions: TypeAlias = Literal["none", "gzip", "zstd"] -"""Generally available compression options.""" - -CsvCompression: TypeAlias = _Auto | _CompressionOptions CsvEncoding: TypeAlias = Literal["utf-8", "utf-16", "latin-1"] | str """Encdoding options. All availables options not in the literal values can be seen here: https://duckdb.org/docs/stable/core_extensions/encodings """ -JsonCompression: TypeAlias = Literal["auto_detect"] | _CompressionOptions JsonFormat: TypeAlias = _Auto | Literal["unstructured", "newline_delimited", "array"] JsonRecordOptions: TypeAlias = _Auto | Literal["true", "false"] +# compression kinds + +_CompressionOptions: TypeAlias = Literal["gzip", "zstd"] +"""Generally available compression options.""" +_None: TypeAlias = Literal["none"] +CsvCompression: TypeAlias = _Auto | _None | _CompressionOptions +JsonCompression: TypeAlias = Literal["auto_detect"] | _None | _CompressionOptions +ParquetCompression: TypeAlias = Literal["uncompressed", "brotli", "snappy", "lz4", "lz4_raw"] | _CompressionOptions + # Other JoinType: TypeAlias = Literal["inner", "left", "right", "outer", "semi", "anti"] """Types of join accepted by `DuckDBPyRelation.join` method.""" + +ProfilerFormat: TypeAlias = Literal["json", "query_tree", "query_tree_optimizer", "no_output", "html", "graphviz"] +"""Formats available in `get_profiling_information` method/function.""" From b129f72bb674068a6c618b47c24acd1fe47c06a9 Mon Sep 17 00:00:00 2001 From: OutSquareCapital <166045166+OutSquareCapital@users.noreply.github.com> Date: Wed, 4 Mar 2026 15:54:15 +0100 Subject: [PATCH 19/19] raw NPArray can be used for value conversions --- _duckdb-stubs/_typing.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_duckdb-stubs/_typing.pyi b/_duckdb-stubs/_typing.pyi index 168e12f3..f75c7e78 100644 --- a/_duckdb-stubs/_typing.pyi +++ b/_duckdb-stubs/_typing.pyi @@ -87,7 +87,7 @@ See Also: https://duckdb.org/docs/stable/clients/python/conversion """ -IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression +IntoValues: TypeAlias = list[PythonLiteral] | tuple[Expression, ...] | Expression | NPArrayLike[Any, Any] """Types that can be converted to a table of values.""" # PyType conversions