5
5
if TYPE_CHECKING :
6
6
from typing_extensions import Self
7
7
8
- from dataframe_api .dataframe_object import DataFrame
9
-
10
- from .typing import DType , Namespace , NullType , Scalar
8
+ from .typing import (
9
+ AnyScalar ,
10
+ DataFrame ,
11
+ DType ,
12
+ Namespace ,
13
+ NullType ,
14
+ Scalar ,
15
+ )
11
16
12
17
13
18
__all__ = ["Column" ]
@@ -224,7 +229,7 @@ def sorted_indices(
224
229
"""
225
230
...
226
231
227
- def __eq__ (self , other : Self | Scalar ) -> Self : # type: ignore[override]
232
+ def __eq__ (self , other : Self | AnyScalar ) -> Self : # type: ignore[override]
228
233
"""Compare for equality.
229
234
230
235
Nulls should follow Kleene Logic.
@@ -247,7 +252,7 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
247
252
"""
248
253
...
249
254
250
- def __ne__ (self , other : Self | Scalar ) -> Self : # type: ignore[override]
255
+ def __ne__ (self , other : Self | AnyScalar ) -> Self : # type: ignore[override]
251
256
"""Compare for non-equality.
252
257
253
258
Nulls should follow Kleene Logic.
@@ -270,7 +275,7 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
270
275
"""
271
276
...
272
277
273
- def __ge__ (self , other : Self | Scalar ) -> Self :
278
+ def __ge__ (self , other : Self | AnyScalar ) -> Self :
274
279
"""Compare for "greater than or equal to" `other`.
275
280
276
281
Parameters
@@ -291,7 +296,7 @@ def __ge__(self, other: Self | Scalar) -> Self:
291
296
"""
292
297
...
293
298
294
- def __gt__ (self , other : Self | Scalar ) -> Self :
299
+ def __gt__ (self , other : Self | AnyScalar ) -> Self :
295
300
"""Compare for "greater than" `other`.
296
301
297
302
Parameters
@@ -312,7 +317,7 @@ def __gt__(self, other: Self | Scalar) -> Self:
312
317
"""
313
318
...
314
319
315
- def __le__ (self , other : Self | Scalar ) -> Self :
320
+ def __le__ (self , other : Self | AnyScalar ) -> Self :
316
321
"""Compare for "less than or equal to" `other`.
317
322
318
323
Parameters
@@ -333,7 +338,7 @@ def __le__(self, other: Self | Scalar) -> Self:
333
338
"""
334
339
...
335
340
336
- def __lt__ (self , other : Self | Scalar ) -> Self :
341
+ def __lt__ (self , other : Self | AnyScalar ) -> Self :
337
342
"""Compare for "less than" `other`.
338
343
339
344
Parameters
@@ -354,7 +359,7 @@ def __lt__(self, other: Self | Scalar) -> Self:
354
359
"""
355
360
...
356
361
357
- def __and__ (self , other : Self | bool ) -> Self :
362
+ def __and__ (self , other : Self | bool | Scalar ) -> Self :
358
363
"""Apply logical 'and' to `other` Column (or scalar) and this Column.
359
364
360
365
Nulls should follow Kleene Logic.
@@ -380,7 +385,7 @@ def __and__(self, other: Self | bool) -> Self:
380
385
"""
381
386
...
382
387
383
- def __or__ (self , other : Self | bool ) -> Self :
388
+ def __or__ (self , other : Self | bool | Scalar ) -> Self :
384
389
"""Apply logical 'or' to `other` Column (or scalar) and this column.
385
390
386
391
Nulls should follow Kleene Logic.
@@ -406,7 +411,7 @@ def __or__(self, other: Self | bool) -> Self:
406
411
"""
407
412
...
408
413
409
- def __add__ (self , other : Self | Scalar ) -> Self :
414
+ def __add__ (self , other : Self | AnyScalar ) -> Self :
410
415
"""Add `other` column or scalar to this column.
411
416
412
417
Parameters
@@ -427,7 +432,7 @@ def __add__(self, other: Self | Scalar) -> Self:
427
432
"""
428
433
...
429
434
430
- def __sub__ (self , other : Self | Scalar ) -> Self :
435
+ def __sub__ (self , other : Self | AnyScalar ) -> Self :
431
436
"""Subtract `other` column or scalar from this column.
432
437
433
438
Parameters
@@ -448,7 +453,7 @@ def __sub__(self, other: Self | Scalar) -> Self:
448
453
"""
449
454
...
450
455
451
- def __mul__ (self , other : Self | Scalar ) -> Self :
456
+ def __mul__ (self , other : Self | AnyScalar ) -> Self :
452
457
"""Multiply `other` column or scalar with this column.
453
458
454
459
Parameters
@@ -469,7 +474,7 @@ def __mul__(self, other: Self | Scalar) -> Self:
469
474
"""
470
475
...
471
476
472
- def __truediv__ (self , other : Self | Scalar ) -> Self :
477
+ def __truediv__ (self , other : Self | AnyScalar ) -> Self :
473
478
"""Divide this column by `other` column or scalar. True division, returns floats.
474
479
475
480
Parameters
@@ -490,7 +495,7 @@ def __truediv__(self, other: Self | Scalar) -> Self:
490
495
"""
491
496
...
492
497
493
- def __floordiv__ (self , other : Self | Scalar ) -> Self :
498
+ def __floordiv__ (self , other : Self | AnyScalar ) -> Self :
494
499
"""Floor-divide `other` column or scalar to this column.
495
500
496
501
Parameters
@@ -511,7 +516,7 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
511
516
"""
512
517
...
513
518
514
- def __pow__ (self , other : Self | Scalar ) -> Self :
519
+ def __pow__ (self , other : Self | AnyScalar ) -> Self :
515
520
"""Raise this column to the power of `other`.
516
521
517
522
Integer dtype to the power of non-negative integer dtype is integer dtype.
@@ -536,7 +541,7 @@ def __pow__(self, other: Self | Scalar) -> Self:
536
541
"""
537
542
...
538
543
539
- def __mod__ (self , other : Self | Scalar ) -> Self :
544
+ def __mod__ (self , other : Self | AnyScalar ) -> Self :
540
545
"""Return modulus of this column by `other` (`%` operator).
541
546
542
547
Parameters
@@ -557,7 +562,7 @@ def __mod__(self, other: Self | Scalar) -> Self:
557
562
"""
558
563
...
559
564
560
- def __divmod__ (self , other : Self | Scalar ) -> tuple [Column , Column ]:
565
+ def __divmod__ (self , other : Self | AnyScalar ) -> tuple [Column , Column ]:
561
566
"""Return quotient and remainder of integer division. See `divmod` builtin.
562
567
563
568
Parameters
@@ -578,16 +583,16 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
578
583
"""
579
584
...
580
585
581
- def __radd__ (self , other : Self | Scalar ) -> Self :
586
+ def __radd__ (self , other : Self | AnyScalar ) -> Self :
582
587
...
583
588
584
- def __rsub__ (self , other : Self | Scalar ) -> Self :
589
+ def __rsub__ (self , other : Self | AnyScalar ) -> Self :
585
590
...
586
591
587
- def __rmul__ (self , other : Self | Scalar ) -> Self :
592
+ def __rmul__ (self , other : Self | AnyScalar ) -> Self :
588
593
...
589
594
590
- def __rtruediv__ (self , other : Self | Scalar ) -> Self :
595
+ def __rtruediv__ (self , other : Self | AnyScalar ) -> Self :
591
596
...
592
597
593
598
def __rand__ (self , other : Self | bool ) -> Self :
@@ -596,13 +601,13 @@ def __rand__(self, other: Self | bool) -> Self:
596
601
def __ror__ (self , other : Self | bool ) -> Self :
597
602
...
598
603
599
- def __rfloordiv__ (self , other : Self | Scalar ) -> Self :
604
+ def __rfloordiv__ (self , other : Self | AnyScalar ) -> Self :
600
605
...
601
606
602
- def __rpow__ (self , other : Self | Scalar ) -> Self :
607
+ def __rpow__ (self , other : Self | AnyScalar ) -> Self :
603
608
...
604
609
605
- def __rmod__ (self , other : Self | Scalar ) -> Self :
610
+ def __rmod__ (self , other : Self | AnyScalar ) -> Self :
606
611
...
607
612
608
613
def __invert__ (self ) -> Self :
@@ -615,7 +620,7 @@ def __invert__(self) -> Self:
615
620
"""
616
621
...
617
622
618
- def any (self , * , skip_nulls : bool = True ) -> bool | NullType :
623
+ def any (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
619
624
"""Reduction returns a bool.
620
625
621
626
Raises
@@ -625,7 +630,7 @@ def any(self, *, skip_nulls: bool = True) -> bool | NullType:
625
630
"""
626
631
...
627
632
628
- def all (self , * , skip_nulls : bool = True ) -> bool | NullType :
633
+ def all (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
629
634
"""Reduction returns a bool.
630
635
631
636
Raises
@@ -635,23 +640,23 @@ def all(self, *, skip_nulls: bool = True) -> bool | NullType:
635
640
"""
636
641
...
637
642
638
- def min (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
643
+ def min (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
639
644
"""Reduction returns a scalar.
640
645
641
646
Any data type that supports comparisons
642
647
must be supported. The returned value has the same dtype as the column.
643
648
"""
644
649
...
645
650
646
- def max (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
651
+ def max (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
647
652
"""Reduction returns a scalar.
648
653
649
654
Any data type that supports comparisons
650
655
must be supported. The returned value has the same dtype as the column.
651
656
"""
652
657
...
653
658
654
- def sum (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
659
+ def sum (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
655
660
"""Reduction returns a scalar.
656
661
657
662
Must be supported for numerical and
@@ -660,15 +665,15 @@ def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
660
665
"""
661
666
...
662
667
663
- def prod (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
668
+ def prod (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
664
669
"""Reduction returns a scalar.
665
670
666
671
Must be supported for numerical data types.
667
672
The returned value has the same dtype as the column.
668
673
"""
669
674
...
670
675
671
- def median (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
676
+ def median (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
672
677
"""Reduction returns a scalar.
673
678
674
679
Must be supported for numerical and
@@ -678,7 +683,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
678
683
"""
679
684
...
680
685
681
- def mean (self , * , skip_nulls : bool = True ) -> Scalar | NullType :
686
+ def mean (self , * , skip_nulls : bool | Scalar = True ) -> Scalar :
682
687
"""Reduction returns a scalar.
683
688
684
689
Must be supported for numerical and
@@ -691,9 +696,9 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
691
696
def std (
692
697
self ,
693
698
* ,
694
- correction : int | float = 1 ,
695
- skip_nulls : bool = True ,
696
- ) -> Scalar | NullType :
699
+ correction : float = 1 ,
700
+ skip_nulls : bool | Scalar = True ,
701
+ ) -> Scalar :
697
702
"""Reduction returns a scalar.
698
703
699
704
Must be supported for numerical and
@@ -724,9 +729,9 @@ def std(
724
729
def var (
725
730
self ,
726
731
* ,
727
- correction : int | float = 1 ,
728
- skip_nulls : bool = True ,
729
- ) -> Scalar | NullType :
732
+ correction : float | Scalar = 1 ,
733
+ skip_nulls : bool | Scalar = True ,
734
+ ) -> Scalar :
730
735
"""Reduction returns a scalar.
731
736
732
737
Must be supported for numerical and
@@ -835,7 +840,7 @@ def is_in(self, values: Self) -> Self:
835
840
"""
836
841
...
837
842
838
- def unique_indices (self , * , skip_nulls : bool = True ) -> Self :
843
+ def unique_indices (self , * , skip_nulls : bool | Scalar = True ) -> Self :
839
844
"""Return indices corresponding to unique values in Column.
840
845
841
846
Returns
@@ -855,7 +860,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Self:
855
860
"""
856
861
...
857
862
858
- def fill_nan (self , value : float | NullType , / ) -> Self :
863
+ def fill_nan (self , value : float | NullType | Scalar , / ) -> Self :
859
864
"""Fill floating point ``nan`` values with the given fill value.
860
865
861
866
Parameters
@@ -868,7 +873,7 @@ def fill_nan(self, value: float | NullType, /) -> Self:
868
873
"""
869
874
...
870
875
871
- def fill_null (self , value : Scalar , / ) -> Self :
876
+ def fill_null (self , value : AnyScalar , / ) -> Self :
872
877
"""Fill null values with the given fill value.
873
878
874
879
Parameters
@@ -914,7 +919,7 @@ def to_array(self) -> Any:
914
919
"""
915
920
...
916
921
917
- def rename (self , name : str ) -> Self :
922
+ def rename (self , name : str | Scalar ) -> Self :
918
923
"""Rename column.
919
924
920
925
Parameters
@@ -929,17 +934,17 @@ def rename(self, name: str) -> Self:
929
934
"""
930
935
...
931
936
932
- def shift (self , offset : int ) -> Self :
937
+ def shift (self , offset : int | Scalar ) -> Self :
933
938
"""Shift values by `offset` positions, filling missing values with `null`.
934
939
935
940
For example, if the original column contains values `[1, 4, 2]`, then:
936
941
937
942
- `.shift(1)` will return `[null, 1, 4]`,
938
943
- `.shift(-1)` will return `[4, 2, null]`,
939
-
944
+
940
945
Parameters
941
946
----------
942
- offset
947
+ offset : int
943
948
How many positions to shift by.
944
949
"""
945
950
...
@@ -1020,7 +1025,7 @@ def iso_weekday(self) -> Self:
1020
1025
"""
1021
1026
...
1022
1027
1023
- def unix_timestamp (self , * , time_unit : Literal [ "s" , "ms" , "us" ] = "s" ) -> Self :
1028
+ def unix_timestamp (self , * , time_unit : str | Scalar = "s" ) -> Self :
1024
1029
"""Return number of seconds / milliseconds / microseconds since the Unix epoch.
1025
1030
1026
1031
The Unix epoch is 00:00:00 UTC on 1 January 1970.
@@ -1039,3 +1044,16 @@ def unix_timestamp(self, *, time_unit: Literal["s", "ms", "us"] = "s") -> Self:
1039
1044
discarded.
1040
1045
"""
1041
1046
...
1047
+
1048
+ def persist (self ) -> Self :
1049
+ """Hint that computation prior to this point should not be repeated.
1050
+
1051
+ This is intended as a hint, rather than as a directive. Implementations
1052
+ which do not separate lazy vs eager execution may ignore this method and
1053
+ treat it as a no-op.
1054
+
1055
+ .. note::
1056
+ This method may trigger execution. If necessary, it should be called
1057
+ at most once per dataframe, and as late as possible in the pipeline.
1058
+ """
1059
+ ...
0 commit comments