Skip to content

Commit 2cb2e40

Browse files
committed
test fixes
1 parent 771fe48 commit 2cb2e40

File tree

1 file changed

+46
-55
lines changed

1 file changed

+46
-55
lines changed

pandas/_libs/new_vector.cpp

Lines changed: 46 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@ namespace nb = nanobind;
2222
/// and support arbitrary types
2323
///
2424
template <typename T> struct PandasHashFunction {
25-
auto operator()(const T &value) const { return std::hash<T>()(value); }
25+
constexpr auto operator()(const T &value) const {
26+
return std::hash<T>()(value);
27+
}
2628
};
2729

2830
template <>
29-
auto PandasHashFunction<float>::operator()(const float &value) const {
31+
constexpr auto PandasHashFunction<float>::operator()(const float &value) const {
3032
if (std::isnan(value)) {
3133
return static_cast<decltype(std::hash<float>()(value))>(0);
3234
}
@@ -35,7 +37,8 @@ auto PandasHashFunction<float>::operator()(const float &value) const {
3537
}
3638

3739
template <>
38-
auto PandasHashFunction<double>::operator()(const double &value) const {
40+
constexpr auto
41+
PandasHashFunction<double>::operator()(const double &value) const {
3942
if (std::isnan(value)) {
4043
return static_cast<decltype(std::hash<double>()(value))>(0);
4144
}
@@ -67,6 +70,15 @@ auto PandasHashEquality<double>::operator()(const double &lhs,
6770
return lhs == rhs;
6871
}
6972

73+
template <typename T> auto PandasIsNA(bool mask_value, T &scalar_value) {
74+
// TODO: should NaN / pd.NA always be treated the same?
75+
if constexpr (std::is_floating_point_v<T>) {
76+
return mask_value || std::isnan(scalar_value);
77+
} else {
78+
return mask_value;
79+
}
80+
}
81+
7082
template <typename T> class PandasVector {
7183
public:
7284
explicit PandasVector<T>() : external_view_exists_(false) {}
@@ -119,8 +131,14 @@ template <typename T, bool IsMasked> class PandasHashTable {
119131
uint64_t>::type;
120132
explicit PandasHashTable<T, IsMasked>() = default;
121133
explicit PandasHashTable<T, IsMasked>(HashValueT new_size) {
134+
#if __APPLE__
135+
// macOS cannot resolve size_t to uint32_t or uint64_t that khash needs
136+
hash_map_.resize(static_cast<uint64_t>(new_size));
137+
hash_set_.resize(static_cast < uint64_t < (new_size));
138+
#else
122139
hash_map_.resize(new_size);
123140
hash_set_.resize(new_size);
141+
#endif
124142
}
125143

126144
auto __len__() const noexcept { return hash_map_.size(); }
@@ -309,15 +327,18 @@ template <typename T, bool IsMasked> class PandasHashTable {
309327
mask_vector = UniqueWithResultMask(values, uniques, mask);
310328

311329
return nb::make_tuple(uniques.ToNdArray(), mask_vector.ToNdArray());
330+
} else {
331+
UniquesOnly(values, uniques);
332+
const auto out_array = uniques.ToNdArray();
333+
return nb::cast(out_array);
312334
}
313-
UniquesOnly(values, uniques, mask);
314-
const auto out_array = uniques.ToNdArray();
315-
return nb::cast(out_array);
335+
336+
throw std::runtime_error("Should not hit this");
316337
}
317338

318339
auto Factorize(const nb::ndarray<const T, nb::ndim<1>> &values,
319340
Py_ssize_t na_sentinel = -1, nb::object na_value = nb::none(),
320-
nb::object mask = nb::none(), bool ignore_na = true)
341+
nb::object mask = nb::none(), bool ignore_na = false)
321342
-> nb::object {
322343
PandasVector<T> uniques;
323344

@@ -444,14 +465,14 @@ template <typename T, bool IsMasked> class PandasHashTable {
444465
const auto mask_v = mask.view();
445466

446467
for (auto i = decltype(n){0}; i < n; i++) {
468+
const auto val = values_v(i);
447469
if constexpr (IgnoreNA) {
448-
if (mask_v(i)) {
470+
if (PandasIsNA(mask_v(i), val)) {
449471
labels[i] = na_sentinel;
450472
continue;
451473
}
452474
}
453475

454-
const auto val = values_v(i);
455476
auto k = hash_map_.get(val);
456477
if (k == hash_map_.end()) {
457478
int dummy;
@@ -472,7 +493,7 @@ template <typename T, bool IsMasked> class PandasHashTable {
472493
const auto val = values_v(i);
473494

474495
if constexpr (IgnoreNA) {
475-
if constexpr (std::is_same_v<T, float> || std::is_same_v<T, double>) {
496+
if constexpr (std::is_floating_point_v<T>) {
476497
if (std::isnan(val)) {
477498
labels[i] = na_sentinel;
478499
continue;
@@ -534,17 +555,12 @@ template <typename T, bool IsMasked> class PandasHashTable {
534555
for (auto i = decltype(n){0}; i < n; i++) {
535556
const auto val = values_v(i);
536557

537-
bool should_append_na;
538-
// NaN / pd.NA are treated the same? hmmm
539-
if constexpr (std::is_floating_point_v<T>) {
540-
should_append_na = !seen_na && (mask_v(i) || std::isnan(val));
541-
} else {
542-
should_append_na = !seen_na && mask_v(i);
543-
}
544-
if (should_append_na) {
545-
seen_na = true;
546-
uniques.Append(val);
547-
result.Append(1);
558+
if (PandasIsNA(mask_v(i), val)) {
559+
if (!seen_na) {
560+
uniques.Append(val);
561+
result.Append(1);
562+
seen_na = true;
563+
}
548564
continue;
549565
}
550566

@@ -572,44 +588,19 @@ template <typename T, bool IsMasked> class PandasHashTable {
572588
}
573589

574590
auto UniquesOnly(const nb::ndarray<const T, nb::ndim<1>> &values,
575-
PandasVector<T> &uniques,
576-
[[maybe_unused]] nb::object mask_obj = nb::none()) -> void {
577-
if constexpr (IsMasked) {
578-
if (mask_obj.is_none()) {
579-
throw std::invalid_argument("mask must not be None!");
580-
}
581-
}
591+
PandasVector<T> &uniques) -> void {
582592

583593
const auto values_v = values.view();
584594
const auto n = values.shape(0);
585595

586-
if constexpr (IsMasked) {
587-
using MaskT = nb::ndarray<const uint8_t, nb::ndim<1>>;
588-
MaskT mask;
589-
if (!nb::try_cast<MaskT>(mask_obj, mask, false)) {
590-
throw std::invalid_argument("Could not convert mask to uint8_t array!");
591-
}
592-
nb::call_guard<nb::gil_scoped_release>();
593-
594-
for (auto i = decltype(n){0}; i < n; i++) {
595-
const auto val = values_v(i);
596-
auto k = hash_map_.get(val);
597-
if (k == hash_map_.end()) {
598-
int dummy;
599-
k = hash_map_.put(val, &dummy);
600-
uniques.Append(val);
601-
}
602-
}
603-
} else {
604-
nb::call_guard<nb::gil_scoped_release>();
605-
for (auto i = decltype(n){0}; i < n; i++) {
606-
const auto val = values_v(i);
607-
auto k = hash_map_.get(val);
608-
if (k == hash_map_.end()) {
609-
int dummy;
610-
k = hash_map_.put(val, &dummy);
611-
uniques.Append(val);
612-
}
596+
nb::call_guard<nb::gil_scoped_release>();
597+
for (auto i = decltype(n){0}; i < n; i++) {
598+
const auto val = values_v(i);
599+
auto k = hash_map_.get(val);
600+
if (k == hash_map_.end()) {
601+
int dummy;
602+
k = hash_map_.put(val, &dummy);
603+
uniques.Append(val);
613604
}
614605
}
615606

0 commit comments

Comments
 (0)