From 89dd0ec3b03bd11d6361c7d0f083af8fae720280 Mon Sep 17 00:00:00 2001 From: Atughara2021 Date: Mon, 8 Apr 2024 10:32:45 +0600 Subject: [PATCH 1/4] new txt file added --- A_John.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 A_John.txt diff --git a/A_John.txt b/A_John.txt new file mode 100644 index 0000000..dd813da --- /dev/null +++ b/A_John.txt @@ -0,0 +1,2 @@ +Believe in yourself and all that you are. +Know that there is something inside you that is greater than any obstacle. \ No newline at end of file From 759c083bc2345c995376e57d88d11566389c0843 Mon Sep 17 00:00:00 2001 From: Atughara2021 Date: Tue, 9 Apr 2024 10:32:45 +0600 Subject: [PATCH 2/4] readme Update --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 3076250..f553470 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,6 @@ $ Plagiarism-checker-Python-> python3 app.py ('fatma.txt', 'juma.txt', 0.18643448370323362) ``` - ## A Python Library? Would you like to use a Python library instead to help you compare strings and documents without spending time writing the vectorizers by yourself, then take a look at [Pysimilar](https://github.com/Kalebu/pysimilar). @@ -57,7 +56,7 @@ you can raise an issue. ## Pull Requests -If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible. +If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible. ## Give it a Star From ef26efaa7e41399fc3034398f171757a19d0000f Mon Sep 17 00:00:00 2001 From: Atughara2021 Date: Fri, 12 Apr 2024 11:32:45 +0600 Subject: [PATCH 3/4] adding threshold for the similarity score --- app.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 7a6f452..b4f2915 100644 --- a/app.py +++ b/app.py @@ -15,8 +15,8 @@ def similarity(doc1, doc2): return cosine_similarity([doc1, doc2]) s_vectors = list(zip(student_files, vectors)) plagiarism_results = set() - -def check_plagiarism(): +# threshold for the similarity score +def check_plagiarism(threshold=0.8): global s_vectors for student_a, text_vector_a in s_vectors: new_vectors = s_vectors.copy() @@ -24,11 +24,9 @@ def check_plagiarism(): del new_vectors[current_index] for student_b, text_vector_b in new_vectors: sim_score = similarity(text_vector_a, text_vector_b)[0][1] - student_pair = sorted((student_a, student_b)) - score = (student_pair[0], student_pair[1], sim_score) - plagiarism_results.add(score) + if sim_score > threshold: + student_pair = sorted((student_a, student_b)) + score = (student_pair[0], student_pair[1], sim_score) + plagiarism_results.add(score) return plagiarism_results - -for data in check_plagiarism(): - print(data) From 78a4ae8c6ab34336e9c2270f05d58ff1c4f7b53a Mon Sep 17 00:00:00 2001 From: Atughara2021 Date: Sat, 13 Apr 2024 11:32:45 +0600 Subject: [PATCH 4/4] print data --- app.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/app.py b/app.py index b4f2915..6f51cb2 100644 --- a/app.py +++ b/app.py @@ -16,6 +16,7 @@ def similarity(doc1, doc2): return cosine_similarity([doc1, doc2]) plagiarism_results = set() # threshold for the similarity score + def check_plagiarism(threshold=0.8): global s_vectors for student_a, text_vector_a in s_vectors: @@ -30,3 +31,5 @@ def check_plagiarism(threshold=0.8): plagiarism_results.add(score) return plagiarism_results +for data in check_plagiarism(): + print(data)