Skip to content

Commit f5042c7

Browse files
committed
hash
Signed-off-by: coldWater <forsaken628@gmail.com>
1 parent 70dab60 commit f5042c7

File tree

8 files changed

+689
-28
lines changed

8 files changed

+689
-28
lines changed

src/query/expression/src/function.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,11 +186,11 @@ pub struct FunctionRegistry {
186186

187187
impl Function {
188188
pub fn passthrough_nullable(self) -> Self {
189-
debug_assert!(!self
189+
debug_assert!(self
190190
.signature
191191
.args_type
192192
.iter()
193-
.any(|ty| ty.is_nullable_or_null()));
193+
.all(|ty| !ty.is_nullable_or_null()));
194194

195195
let (calc_domain, eval) = self.eval.into_scalar().unwrap();
196196

src/query/expression/src/types/decimal.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use std::ops::Mul;
2424
use std::ops::MulAssign;
2525
use std::ops::Neg;
2626
use std::ops::Range;
27+
use std::ops::Rem;
2728
use std::ops::Sub;
2829
use std::ops::SubAssign;
2930

@@ -2363,6 +2364,14 @@ impl Div for i256 {
23632364
}
23642365
}
23652366

2367+
impl Rem for i256 {
2368+
type Output = Self;
2369+
2370+
fn rem(self, rhs: Self) -> Self::Output {
2371+
Self(self.0 % rhs.0)
2372+
}
2373+
}
2374+
23662375
macro_rules! impl_from {
23672376
($($t:ty),* $(,)?) => {$(
23682377
impl From<$t> for i256 {

src/query/expression/src/values.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ impl<T: AccessType> Value<T> {
283283
}
284284
}
285285

286-
impl<T: ArgType> Value<T> {
286+
impl<T: ReturnType> Value<T> {
287287
pub fn upcast(self) -> Value<AnyType> {
288288
match self {
289289
Value::Scalar(scalar) => Value::Scalar(T::upcast_scalar(scalar)),
Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
// Copyright 2021 Datafuse Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::hash::Hash;
16+
use std::hash::Hasher;
17+
use std::sync::Arc;
18+
19+
use databend_common_expression::types::AccessType;
20+
use databend_common_expression::types::AnyType;
21+
use databend_common_expression::types::ArgType;
22+
use databend_common_expression::types::DataType;
23+
use databend_common_expression::types::Decimal;
24+
use databend_common_expression::types::Decimal128As256Type;
25+
use databend_common_expression::types::Decimal128Type;
26+
use databend_common_expression::types::Decimal256As128Type;
27+
use databend_common_expression::types::Decimal256Type;
28+
use databend_common_expression::types::DecimalDataType;
29+
use databend_common_expression::types::DecimalSize;
30+
use databend_common_expression::types::NumberDataType;
31+
use databend_common_expression::types::NumberType;
32+
use databend_common_expression::types::ReturnType;
33+
use databend_common_expression::types::UInt32Type;
34+
use databend_common_expression::types::UInt64Type;
35+
use databend_common_expression::types::F32;
36+
use databend_common_expression::types::F64;
37+
use databend_common_expression::vectorize_with_builder_1_arg;
38+
use databend_common_expression::vectorize_with_builder_2_arg;
39+
use databend_common_expression::with_integer_mapped_type;
40+
use databend_common_expression::EvalContext;
41+
use databend_common_expression::Function;
42+
use databend_common_expression::FunctionDomain;
43+
use databend_common_expression::FunctionEval;
44+
use databend_common_expression::FunctionRegistry;
45+
use databend_common_expression::FunctionSignature;
46+
use databend_common_expression::Value;
47+
48+
pub fn register_decimal_hash<H: HashFunction>(registry: &mut FunctionRegistry) {
49+
registry.register_function_factory(H::name(), |_, args_type| {
50+
decimal_hash_factory_1_arg::<H>(args_type)
51+
});
52+
}
53+
54+
pub fn register_decimal_hash_with_seed<H: HashFunctionWithSeed>(registry: &mut FunctionRegistry) {
55+
registry.register_function_factory(H::name(), |_, args_type| {
56+
decimal_hash_factory_2_arg::<H>(args_type)
57+
});
58+
}
59+
60+
pub trait HashFunction {
61+
type Hasher: Hasher + Default;
62+
const IS_HASH_32: bool = false;
63+
fn name() -> &'static str;
64+
}
65+
66+
pub trait HashFunctionWithSeed: Hasher {
67+
fn name() -> &'static str;
68+
69+
fn with_seed(seed: u64) -> Self;
70+
}
71+
72+
fn decimal_hash_factory_1_arg<H: HashFunction>(args_type: &[DataType]) -> Option<Arc<Function>> {
73+
let (nullable, size) = match args_type {
74+
[DataType::Null] => (true, DecimalSize::default_128()),
75+
[DataType::Nullable(box DataType::Decimal(size))] => (true, *size),
76+
[DataType::Decimal(size)] => (false, *size),
77+
_ => return None,
78+
};
79+
80+
let function = Function {
81+
signature: FunctionSignature {
82+
name: H::name().to_string(),
83+
args_type: [DataType::Decimal(size)].into(),
84+
return_type: if H::IS_HASH_32 {
85+
UInt32Type::data_type()
86+
} else {
87+
UInt64Type::data_type()
88+
},
89+
},
90+
eval: FunctionEval::Scalar {
91+
calc_domain: Box::new(|_, _| FunctionDomain::Full),
92+
eval: Box::new(move |args, ctx| {
93+
let arg = args[0].clone();
94+
if H::IS_HASH_32 {
95+
decimal_hash::<H::Hasher, UInt32Type>(arg, ctx, |res| res.try_into().unwrap())
96+
} else {
97+
decimal_hash::<H::Hasher, UInt64Type>(arg, ctx, |res| res)
98+
}
99+
}),
100+
},
101+
};
102+
103+
if nullable {
104+
Some(Arc::new(function.passthrough_nullable()))
105+
} else {
106+
Some(Arc::new(function))
107+
}
108+
}
109+
110+
fn decimal_hash_factory_2_arg<H: HashFunctionWithSeed>(
111+
args_type: &[DataType],
112+
) -> Option<Arc<Function>> {
113+
let (nullable, size, seed_type) = match args_type {
114+
[DataType::Null, DataType::Number(number)] => (true, DecimalSize::default_128(), *number),
115+
[DataType::Nullable(box DataType::Decimal(size)), DataType::Number(number)] => {
116+
(true, *size, *number)
117+
}
118+
[DataType::Decimal(size), DataType::Number(number)] => (false, *size, *number),
119+
[DataType::Null, DataType::Nullable(box DataType::Number(number))] => {
120+
(true, DecimalSize::default_128(), *number)
121+
}
122+
[DataType::Nullable(box DataType::Decimal(size)), DataType::Nullable(box DataType::Number(number))] => {
123+
(true, *size, *number)
124+
}
125+
[DataType::Decimal(size), DataType::Nullable(box DataType::Number(number))] => {
126+
(true, *size, *number)
127+
}
128+
_ => return None,
129+
};
130+
131+
let function = Function {
132+
signature: FunctionSignature {
133+
name: H::name().to_string(),
134+
args_type: [DataType::Decimal(size), DataType::Number(seed_type)].into(),
135+
return_type: UInt64Type::data_type(),
136+
},
137+
eval: FunctionEval::Scalar {
138+
calc_domain: Box::new(|_, _| FunctionDomain::Full),
139+
eval: Box::new(move |args, ctx| {
140+
let arg = args[0].clone();
141+
let seed = args[1].clone();
142+
with_integer_mapped_type!(|NUM| match seed_type {
143+
NumberDataType::NUM =>
144+
decimal_hash_with_seed::<H, NumberType<NUM>>(arg, seed, ctx, |s| s as _),
145+
NumberDataType::Float32 =>
146+
decimal_hash_with_seed::<H, NumberType<F32>>(arg, seed, ctx, |s| s.0 as _),
147+
NumberDataType::Float64 =>
148+
decimal_hash_with_seed::<H, NumberType<F64>>(arg, seed, ctx, |s| s.0 as _),
149+
})
150+
}),
151+
},
152+
};
153+
154+
if nullable {
155+
Some(Arc::new(function.passthrough_nullable()))
156+
} else {
157+
Some(Arc::new(function))
158+
}
159+
}
160+
161+
fn decimal_hash_typed<H, R, D, T>(
162+
arg: Value<D>,
163+
ctx: &mut EvalContext,
164+
scale: u8,
165+
cast: fn(u64) -> R::Scalar,
166+
) -> Value<AnyType>
167+
where
168+
T: Decimal + Hash,
169+
H: Hasher + Default,
170+
R: ReturnType,
171+
D: for<'a> AccessType<ScalarRef<'a> = T>,
172+
{
173+
vectorize_with_builder_1_arg::<D, R>(|arg, output, _| {
174+
let mut state = H::default();
175+
scale.hash(&mut state);
176+
arg.hash(&mut state);
177+
R::push_item(output, R::to_scalar_ref(&cast(state.finish())));
178+
})(arg, ctx)
179+
.upcast()
180+
}
181+
182+
fn decimal_hash<H, R>(
183+
arg: Value<AnyType>,
184+
ctx: &mut EvalContext,
185+
cast: fn(u64) -> R::Scalar,
186+
) -> Value<AnyType>
187+
where
188+
H: Hasher + Default,
189+
R: ReturnType,
190+
{
191+
let (decimal_type, _) = DecimalDataType::from_value(&arg).unwrap();
192+
let size = decimal_type.size();
193+
if size.is_128() {
194+
match decimal_type {
195+
DecimalDataType::Decimal128(_) => {
196+
let arg: Value<Decimal128Type> = arg.try_downcast().unwrap();
197+
decimal_hash_typed::<H, R, _, _>(arg, ctx, size.scale(), cast)
198+
}
199+
DecimalDataType::Decimal256(_) => {
200+
let arg = arg.try_downcast::<Decimal256As128Type>().unwrap();
201+
decimal_hash_typed::<H, R, _, _>(arg, ctx, size.scale(), cast)
202+
}
203+
}
204+
} else {
205+
match decimal_type {
206+
DecimalDataType::Decimal128(_) => {
207+
let arg = arg.try_downcast::<Decimal128As256Type>().unwrap();
208+
decimal_hash_typed::<H, R, _, _>(arg, ctx, size.scale(), cast)
209+
}
210+
DecimalDataType::Decimal256(_) => {
211+
let arg = arg.try_downcast::<Decimal256Type>().unwrap();
212+
decimal_hash_typed::<H, R, _, _>(arg, ctx, size.scale(), cast)
213+
}
214+
}
215+
}
216+
}
217+
218+
fn decimal_hash_typed_with_seed<H, S, T, D>(
219+
arg: Value<D>,
220+
seed: Value<S>,
221+
ctx: &mut EvalContext,
222+
scale: u8,
223+
cast: fn(S::ScalarRef<'_>) -> u64,
224+
) -> Value<AnyType>
225+
where
226+
H: HashFunctionWithSeed,
227+
S: AccessType,
228+
T: Decimal + Hash,
229+
D: for<'a> AccessType<ScalarRef<'a> = T>,
230+
{
231+
vectorize_with_builder_2_arg::<D, S, UInt64Type>(|arg, seed, output, _| {
232+
let mut state = H::with_seed(cast(seed));
233+
scale.hash(&mut state);
234+
arg.hash(&mut state);
235+
output.push(state.finish());
236+
})(arg, seed, ctx)
237+
.upcast()
238+
}
239+
240+
fn decimal_hash_with_seed<H, S>(
241+
arg: Value<AnyType>,
242+
seed: Value<AnyType>,
243+
ctx: &mut EvalContext,
244+
cast: fn(S::ScalarRef<'_>) -> u64,
245+
) -> Value<AnyType>
246+
where
247+
H: HashFunctionWithSeed,
248+
S: AccessType,
249+
{
250+
let (decimal_type, _) = DecimalDataType::from_value(&arg).unwrap();
251+
let size = decimal_type.size();
252+
let scale = size.scale();
253+
if size.is_128() {
254+
match decimal_type {
255+
DecimalDataType::Decimal128(_) => {
256+
let arg: Value<Decimal128Type> = arg.try_downcast().unwrap();
257+
let seed: Value<S> = seed.try_downcast().unwrap();
258+
decimal_hash_typed_with_seed::<H, S, _, _>(arg, seed, ctx, scale, cast)
259+
}
260+
DecimalDataType::Decimal256(_) => {
261+
let arg = arg.try_downcast::<Decimal256As128Type>().unwrap();
262+
let seed: Value<S> = seed.try_downcast().unwrap();
263+
decimal_hash_typed_with_seed::<H, S, _, _>(arg, seed, ctx, size.scale(), cast)
264+
}
265+
}
266+
} else {
267+
match decimal_type {
268+
DecimalDataType::Decimal128(_) => {
269+
let arg = arg.try_downcast::<Decimal128As256Type>().unwrap();
270+
let seed: Value<S> = seed.try_downcast().unwrap();
271+
decimal_hash_typed_with_seed::<H, S, _, _>(arg, seed, ctx, size.scale(), cast)
272+
}
273+
DecimalDataType::Decimal256(_) => {
274+
let arg = arg.try_downcast::<Decimal256Type>().unwrap();
275+
let seed: Value<S> = seed.try_downcast().unwrap();
276+
decimal_hash_typed_with_seed::<H, S, _, _>(arg, seed, ctx, size.scale(), cast)
277+
}
278+
}
279+
}
280+
}

src/query/functions/src/scalars/decimal/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
mod arithmetic;
2828
mod cast;
2929
mod comparison;
30+
mod hash;
3031
mod math;
3132
mod uuid;
3233

@@ -38,5 +39,6 @@ pub use cast::register_decimal_to_int;
3839
pub use cast::register_decimal_to_string;
3940
pub use cast::register_to_decimal;
4041
pub use comparison::register_decimal_compare;
42+
pub use hash::*;
4143
pub use math::register_decimal_math;
4244
pub use uuid::register_decimal_to_uuid;

0 commit comments

Comments
 (0)