Skip to content

Commit 7efc1bf

Browse files
committed
add tpch q5 example
1 parent e9adcc8 commit 7efc1bf

File tree

1 file changed

+57
-0
lines changed
  • spec/API_specification/examples/tpch

1 file changed

+57
-0
lines changed
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING, Any
4+
5+
if TYPE_CHECKING:
6+
from dataframe_api.typing import DataFrame, SupportsDataFrameAPI
7+
8+
9+
def query(
10+
customer_raw: SupportsDataFrameAPI,
11+
orders_raw: SupportsDataFrameAPI,
12+
lineitem_raw: SupportsDataFrameAPI,
13+
supplier_raw: SupportsDataFrameAPI,
14+
nation_raw: SupportsDataFrameAPI,
15+
region_raw: SupportsDataFrameAPI,
16+
) -> Any:
17+
customer = customer_raw.__dataframe_consortium_standard__()
18+
orders = orders_raw.__dataframe_consortium_standard__()
19+
lineitem = lineitem_raw.__dataframe_consortium_standard__()
20+
supplier = supplier_raw.__dataframe_consortium_standard__()
21+
nation = nation_raw.__dataframe_consortium_standard__()
22+
region = region_raw.__dataframe_consortium_standard__()
23+
24+
namespace = customer.__dataframe_namespace__()
25+
26+
result = (
27+
region.join(nation, how="inner", left_on="r_regionkey", right_on="n_regionkey")
28+
.join(customer, how="inner", left_on="n_nationkey", right_on="c_nationkey")
29+
.join(orders, how="inner", left_on="c_custkey", right_on="o_custkey")
30+
.join(lineitem, how="inner", left_on="o_orderkey", right_on="l_orderkey")
31+
.join(
32+
supplier,
33+
how="inner",
34+
left_on=["l_suppkey", "n_nationkey"],
35+
right_on=["s_suppkey", "s_nationkey"],
36+
)
37+
)
38+
mask = (
39+
(
40+
result.get_column_by_name("c_nationkey")
41+
== result.get_column_by_name("s_nationkey")
42+
)
43+
& (result.get_column_by_name("r_name") == "ASIA")
44+
& (result.get_column_by_name("o_orderdate") >= namespace.date(1994, 1, 1)) # type: ignore
45+
& (result.get_column_by_name("o_orderdate") < namespace.date(1995, 1, 1)) # type: ignore
46+
)
47+
result = result.filter(mask)
48+
49+
new_column = (
50+
result.get_column_by_name("l_extendedprice")
51+
* (result.get_column_by_name("l_discount") * -1 + 1)
52+
).rename("revenue")
53+
result = result.assign(new_column)
54+
result = result.select(["revenue", "n_name"])
55+
result = result.group_by("n_name").sum()
56+
57+
return result.dataframe

0 commit comments

Comments
 (0)