Skip to content

Commit 393353c

Browse files
authored
Improved tasks 2882, 2883
1 parent 54b28d4 commit 393353c

File tree

2 files changed

+166
-0
lines changed

2 files changed

+166
-0
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import unittest
2+
import pandas as pd
3+
from pandas.testing import assert_frame_equal
4+
5+
def dropDuplicateEmails(customers: pd.DataFrame) -> pd.DataFrame:
6+
customers.drop_duplicates(subset='email', keep='first', inplace=True)
7+
return customers
8+
9+
class TestDropDuplicateEmails(unittest.TestCase):
10+
11+
def test_no_duplicates(self):
12+
data = {
13+
'customer_id': [1, 2, 3],
14+
'name': ['Ella', 'David', 'Zachary'],
15+
'email': ['emily@example.com', 'michael@example.com', 'sarah@example.com']
16+
}
17+
customers = pd.DataFrame(data)
18+
expected = pd.DataFrame(data)
19+
20+
result = dropDuplicateEmails(customers).reset_index(drop=True)
21+
expected = expected.reset_index(drop=True)
22+
23+
assert_frame_equal(result, expected)
24+
25+
def test_with_duplicates(self):
26+
data = {
27+
'customer_id': [1, 2, 3, 4, 5, 6],
28+
'name': ['Ella', 'David', 'Zachary', 'Alice', 'Finn', 'Violet'],
29+
'email': [
30+
'emily@example.com', 'michael@example.com', 'sarah@example.com',
31+
'john@example.com', 'john@example.com', 'alice@example.com'
32+
]
33+
}
34+
customers = pd.DataFrame(data)
35+
36+
expected_data = {
37+
'customer_id': [1, 2, 3, 4, 6],
38+
'name': ['Ella', 'David', 'Zachary', 'Alice', 'Violet'],
39+
'email': ['emily@example.com', 'michael@example.com', 'sarah@example.com', 'john@example.com', 'alice@example.com']
40+
}
41+
expected = pd.DataFrame(expected_data)
42+
43+
result = dropDuplicateEmails(customers).reset_index(drop=True)
44+
expected = expected.reset_index(drop=True)
45+
46+
assert_frame_equal(result, expected)
47+
48+
def test_empty_dataframe(self):
49+
customers = pd.DataFrame(columns=['customer_id', 'name', 'email'])
50+
expected = customers.copy()
51+
52+
result = dropDuplicateEmails(customers).reset_index(drop=True)
53+
expected = expected.reset_index(drop=True)
54+
55+
assert_frame_equal(result, expected)
56+
57+
def test_single_row(self):
58+
data = {
59+
'customer_id': [1],
60+
'name': ['Ella'],
61+
'email': ['emily@example.com']
62+
}
63+
customers = pd.DataFrame(data)
64+
expected = pd.DataFrame(data)
65+
66+
result = dropDuplicateEmails(customers).reset_index(drop=True)
67+
expected = expected.reset_index(drop=True)
68+
69+
assert_frame_equal(result, expected)
70+
71+
if __name__ == '__main__':
72+
unittest.main()
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import unittest
2+
import pandas as pd
3+
from pandas.testing import assert_frame_equal
4+
5+
def dropMissingData(students: pd.DataFrame) -> pd.DataFrame:
6+
r = pd.DataFrame(students)
7+
r.dropna(subset='name', inplace=True)
8+
return r
9+
10+
class TestDropMissingData(unittest.TestCase):
11+
12+
def test_no_missing_data(self):
13+
# Input DataFrame with no missing values in the 'name' column
14+
data = {
15+
'student_id': [32, 779, 849],
16+
'name': ['Piper', 'Georgia', 'Willow'],
17+
'age': [5, 20, 14]
18+
}
19+
students = pd.DataFrame(data)
20+
expected = pd.DataFrame(data)
21+
22+
result = dropMissingData(students).reset_index(drop=True)
23+
expected = expected.reset_index(drop=True)
24+
25+
assert_frame_equal(result, expected)
26+
27+
def test_with_missing_data(self):
28+
# Input DataFrame with missing values in the 'name' column
29+
data = {
30+
'student_id': [32, 217, 779, 849],
31+
'name': ['Piper', None, 'Georgia', 'Willow'],
32+
'age': [5, 19, 20, 14]
33+
}
34+
students = pd.DataFrame(data)
35+
36+
# Expected output after removing rows with missing 'name'
37+
expected_data = {
38+
'student_id': [32, 779, 849],
39+
'name': ['Piper', 'Georgia', 'Willow'],
40+
'age': [5, 20, 14]
41+
}
42+
expected = pd.DataFrame(expected_data)
43+
44+
result = dropMissingData(students).reset_index(drop=True)
45+
expected = expected.reset_index(drop=True)
46+
47+
assert_frame_equal(result, expected)
48+
49+
def test_empty_dataframe(self):
50+
# Input: Empty DataFrame
51+
students = pd.DataFrame(columns=['student_id', 'name', 'age'])
52+
expected = students.copy()
53+
54+
result = dropMissingData(students).reset_index(drop=True)
55+
expected = expected.reset_index(drop=True)
56+
57+
assert_frame_equal(result, expected)
58+
59+
def test_all_missing_data(self):
60+
# Input DataFrame where all 'name' values are missing
61+
data = {
62+
'student_id': [217, 301],
63+
'name': [None, None],
64+
'age': [19, 21]
65+
}
66+
students = pd.DataFrame(data)
67+
68+
# Expected: empty DataFrame since all 'name' values are missing
69+
expected = pd.DataFrame(columns=['student_id', 'name', 'age'])
70+
71+
result = dropMissingData(students).reset_index(drop=True)
72+
expected = expected.reset_index(drop=True)
73+
74+
assert_frame_equal(result, expected, check_dtype=False)
75+
76+
def test_single_row_with_missing_name(self):
77+
# Input DataFrame with a single row and missing 'name'
78+
data = {
79+
'student_id': [217],
80+
'name': [None],
81+
'age': [19]
82+
}
83+
students = pd.DataFrame(data)
84+
85+
# Expected: empty DataFrame since the single row has missing 'name'
86+
expected = pd.DataFrame(columns=['student_id', 'name', 'age'])
87+
88+
result = dropMissingData(students).reset_index(drop=True)
89+
expected = expected.reset_index(drop=True)
90+
91+
assert_frame_equal(result, expected, check_dtype=False)
92+
93+
if __name__ == '__main__':
94+
unittest.main()

0 commit comments

Comments
 (0)