From 89dd0ec3b03bd11d6361c7d0f083af8fae720280 Mon Sep 17 00:00:00 2001
From: Atughara2021 <atugharajohn@gmail.com>
Date: Mon, 8 Apr 2024 10:32:45 +0600
Subject: [PATCH 1/4]  new txt file added

---
 A_John.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 A_John.txt

diff --git a/A_John.txt b/A_John.txt
new file mode 100644
index 0000000..dd813da
--- /dev/null
+++ b/A_John.txt
@@ -0,0 +1,2 @@
+Believe in yourself and all that you are. 
+Know that there is something inside you that is greater than any obstacle.
\ No newline at end of file

From 759c083bc2345c995376e57d88d11566389c0843 Mon Sep 17 00:00:00 2001
From: Atughara2021 <atugharajohn@gmail.com>
Date: Tue, 9 Apr 2024 10:32:45 +0600
Subject: [PATCH 2/4]  readme Update

---
 README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 3076250..f553470 100644
--- a/README.md
+++ b/README.md
@@ -41,7 +41,6 @@ $ Plagiarism-checker-Python-> python3 app.py
 ('fatma.txt', 'juma.txt', 0.18643448370323362)
 
 ```
-
 ## A Python Library?
 
 Would you like to use a Python library instead to help you compare strings and documents without spending time writing the vectorizers by yourself, then take a look at [Pysimilar](https://github.com/Kalebu/pysimilar).
@@ -57,7 +56,7 @@ you can raise an issue.
 
 ## Pull Requests
 
-If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible.
+If you have something to add, I welcome pull requests on improvement; your helpful contribution will be merged as soon as possible. 
 
 ## Give it a Star
 

From ef26efaa7e41399fc3034398f171757a19d0000f Mon Sep 17 00:00:00 2001
From: Atughara2021 <atugharajohn@gmail.com>
Date: Fri, 12 Apr 2024 11:32:45 +0600
Subject: [PATCH 3/4]  adding threshold for the similarity score

---
 app.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/app.py b/app.py
index 7a6f452..b4f2915 100644
--- a/app.py
+++ b/app.py
@@ -15,8 +15,8 @@ def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])
 s_vectors = list(zip(student_files, vectors))
 plagiarism_results = set()
 
-
-def check_plagiarism():
+# threshold for the similarity score
+def check_plagiarism(threshold=0.8):
     global s_vectors
     for student_a, text_vector_a in s_vectors:
         new_vectors = s_vectors.copy()
@@ -24,11 +24,9 @@ def check_plagiarism():
         del new_vectors[current_index]
         for student_b, text_vector_b in new_vectors:
             sim_score = similarity(text_vector_a, text_vector_b)[0][1]
-            student_pair = sorted((student_a, student_b))
-            score = (student_pair[0], student_pair[1], sim_score)
-            plagiarism_results.add(score)
+            if sim_score > threshold:
+                student_pair = sorted((student_a, student_b))
+                score = (student_pair[0], student_pair[1], sim_score)
+                plagiarism_results.add(score)
     return plagiarism_results
 
-
-for data in check_plagiarism():
-    print(data)

From 78a4ae8c6ab34336e9c2270f05d58ff1c4f7b53a Mon Sep 17 00:00:00 2001
From: Atughara2021 <atugharajohn@gmail.com>
Date: Sat, 13 Apr 2024 11:32:45 +0600
Subject: [PATCH 4/4]  print data

---
 app.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app.py b/app.py
index b4f2915..6f51cb2 100644
--- a/app.py
+++ b/app.py
@@ -16,6 +16,7 @@ def similarity(doc1, doc2): return cosine_similarity([doc1, doc2])
 plagiarism_results = set()
 
 # threshold for the similarity score
+
 def check_plagiarism(threshold=0.8):
     global s_vectors
     for student_a, text_vector_a in s_vectors:
@@ -30,3 +31,5 @@ def check_plagiarism(threshold=0.8):
                 plagiarism_results.add(score)
     return plagiarism_results
 
+for data in check_plagiarism():
+    print(data)