From fce715a778339a5eaa3248b36d76dc8863a39776 Mon Sep 17 00:00:00 2001 From: yanglbme Date: Tue, 20 May 2025 20:56:38 +0800 Subject: [PATCH] feat: add solutions to lc problem: No.3554 No.3554.Find Category Recommendation Pairs --- .../README.md | 62 ++++++++++++++++++- .../README_EN.md | 62 ++++++++++++++++++- .../Solution.py | 28 +++++++++ .../Solution.sql | 24 +++++++ 4 files changed, 174 insertions(+), 2 deletions(-) create mode 100644 solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.py create mode 100644 solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.sql diff --git a/solution/3500-3599/3554.Find Category Recommendation Pairs/README.md b/solution/3500-3599/3554.Find Category Recommendation Pairs/README.md index 4d6311b2874fa..8dd8e676337db 100644 --- a/solution/3500-3599/3554.Find Category Recommendation Pairs/README.md +++ b/solution/3500-3599/3554.Find Category Recommendation Pairs/README.md @@ -171,14 +171,74 @@ product_id 是这张表的唯一主键。 -### 方法一 +### 方法一:连接 + 分组聚合 + +我们先将表 `ProductPurchases` 和表 `ProductInfo` 按照 `product_id` 进行连接,得到由 `user_id` 和 `category` 组成的表 `user_category`。接着,我们在 `user_category` 表中自连接,得到每个用户购买的所有类别对。最后,我们对这些类别对进行分组,统计每个类别对的用户数量,并筛选出用户数量大于等于 3 的类别对。 + +最后,我们按照用户数量降序、`category1` 升序、`category2` 升序的顺序进行排序,得到最终结果。 #### MySQL ```sql +# Write your MySQL query statement below +WITH + user_category AS ( + SELECT DISTINCT + user_id, + category + FROM + ProductPurchases + JOIN ProductInfo USING (product_id) + ), + pair_per_user AS ( + SELECT + a.user_id, + a.category AS category1, + b.category AS category2 + FROM + user_category AS a + JOIN user_category AS b ON a.user_id = b.user_id AND a.category < b.category + ) +SELECT category1, category2, COUNT(DISTINCT user_id) AS customer_count +FROM pair_per_user +GROUP BY 1, 2 +HAVING customer_count >= 3 +ORDER BY 3 DESC, 1, 2; +``` +#### Pandas + +```python +import pandas as pd + + +def find_category_recommendation_pairs( + product_purchases: pd.DataFrame, product_info: pd.DataFrame +) -> pd.DataFrame: + df = product_purchases[["user_id", "product_id"]].merge( + product_info[["product_id", "category"]], on="product_id", how="inner" + ) + user_category = df.drop_duplicates(subset=["user_id", "category"]) + pair_per_user = ( + user_category.merge(user_category, on="user_id") + .query("category_x < category_y") + .rename(columns={"category_x": "category1", "category_y": "category2"}) + ) + pair_counts = ( + pair_per_user.groupby(["category1", "category2"])["user_id"] + .nunique() + .reset_index(name="customer_count") + ) + result = ( + pair_counts.query("customer_count >= 3") + .sort_values( + ["customer_count", "category1", "category2"], ascending=[False, True, True] + ) + .reset_index(drop=True) + ) + return result ``` diff --git a/solution/3500-3599/3554.Find Category Recommendation Pairs/README_EN.md b/solution/3500-3599/3554.Find Category Recommendation Pairs/README_EN.md index ce82e1fb6f5e3..353159e7ec516 100644 --- a/solution/3500-3599/3554.Find Category Recommendation Pairs/README_EN.md +++ b/solution/3500-3599/3554.Find Category Recommendation Pairs/README_EN.md @@ -170,14 +170,74 @@ Each row assigns a category and price to a product. -### Solution 1 +### Solution 1: Join + Group Aggregation + +First, we join the `ProductPurchases` table and the `ProductInfo` table on `product_id` to obtain a `user_category` table consisting of `user_id` and `category`. Next, we self-join the `user_category` table to get all category pairs purchased by each user. Finally, we group these category pairs, count the number of users for each pair, and filter out the pairs with at least 3 users. + +Lastly, we sort the final result by customer count in descending order, then by `category1` in ascending order, and then by `category2` in ascending order. #### MySQL ```sql +# Write your MySQL query statement below +WITH + user_category AS ( + SELECT DISTINCT + user_id, + category + FROM + ProductPurchases + JOIN ProductInfo USING (product_id) + ), + pair_per_user AS ( + SELECT + a.user_id, + a.category AS category1, + b.category AS category2 + FROM + user_category AS a + JOIN user_category AS b ON a.user_id = b.user_id AND a.category < b.category + ) +SELECT category1, category2, COUNT(DISTINCT user_id) AS customer_count +FROM pair_per_user +GROUP BY 1, 2 +HAVING customer_count >= 3 +ORDER BY 3 DESC, 1, 2; +``` +#### Pandas + +```python +import pandas as pd + + +def find_category_recommendation_pairs( + product_purchases: pd.DataFrame, product_info: pd.DataFrame +) -> pd.DataFrame: + df = product_purchases[["user_id", "product_id"]].merge( + product_info[["product_id", "category"]], on="product_id", how="inner" + ) + user_category = df.drop_duplicates(subset=["user_id", "category"]) + pair_per_user = ( + user_category.merge(user_category, on="user_id") + .query("category_x < category_y") + .rename(columns={"category_x": "category1", "category_y": "category2"}) + ) + pair_counts = ( + pair_per_user.groupby(["category1", "category2"])["user_id"] + .nunique() + .reset_index(name="customer_count") + ) + result = ( + pair_counts.query("customer_count >= 3") + .sort_values( + ["customer_count", "category1", "category2"], ascending=[False, True, True] + ) + .reset_index(drop=True) + ) + return result ``` diff --git a/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.py b/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.py new file mode 100644 index 0000000000000..7c0b53858c7a3 --- /dev/null +++ b/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.py @@ -0,0 +1,28 @@ +import pandas as pd + + +def find_category_recommendation_pairs( + product_purchases: pd.DataFrame, product_info: pd.DataFrame +) -> pd.DataFrame: + df = product_purchases[["user_id", "product_id"]].merge( + product_info[["product_id", "category"]], on="product_id", how="inner" + ) + user_category = df.drop_duplicates(subset=["user_id", "category"]) + pair_per_user = ( + user_category.merge(user_category, on="user_id") + .query("category_x < category_y") + .rename(columns={"category_x": "category1", "category_y": "category2"}) + ) + pair_counts = ( + pair_per_user.groupby(["category1", "category2"])["user_id"] + .nunique() + .reset_index(name="customer_count") + ) + result = ( + pair_counts.query("customer_count >= 3") + .sort_values( + ["customer_count", "category1", "category2"], ascending=[False, True, True] + ) + .reset_index(drop=True) + ) + return result diff --git a/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.sql b/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.sql new file mode 100644 index 0000000000000..d6b38789818a6 --- /dev/null +++ b/solution/3500-3599/3554.Find Category Recommendation Pairs/Solution.sql @@ -0,0 +1,24 @@ +# Write your MySQL query statement below +WITH + user_category AS ( + SELECT DISTINCT + user_id, + category + FROM + ProductPurchases + JOIN ProductInfo USING (product_id) + ), + pair_per_user AS ( + SELECT + a.user_id, + a.category AS category1, + b.category AS category2 + FROM + user_category AS a + JOIN user_category AS b ON a.user_id = b.user_id AND a.category < b.category + ) +SELECT category1, category2, COUNT(DISTINCT user_id) AS customer_count +FROM pair_per_user +GROUP BY 1, 2 +HAVING customer_count >= 3 +ORDER BY 3 DESC, 1, 2;