From 9604c0bf385aaf354291f20156a94c841eee66c2 Mon Sep 17 00:00:00 2001
From: UdovenkoVolodymyr <uvv333@hotmail.com>
Date: Wed, 12 Dec 2018 19:05:09 +0200
Subject: [PATCH 1/4] homework done

---
 sourse_udovenko.py | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 sourse_udovenko.py

diff --git a/sourse_udovenko.py b/sourse_udovenko.py
new file mode 100644
index 0000000..82224b1
--- /dev/null
+++ b/sourse_udovenko.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+import json
+import re
+from nltk.wsd import lesk
+
+
+class TextProcessor:
+
+    def processing_text_to_json(self, input_file, output_file):
+
+        with open(input_file, encoding='utf-8') as data_file:
+            json_dict = dict()
+
+            # list of patterns for regular expressions
+            tags_patt = re.compile(r",\[.*\]")
+            clr_tags_patt = re.compile(r"\w+")
+            body_patt = re.compile(r"#\w+|@\w+")
+            clr_body_patt = re.compile(r"\w+")
+            url_patt = re.compile(r"https?://[A-Za-z0-9./]+")
+            sign_patt = re.compile(r"$\w+")
+            token_skip_patt = re.compile(r"#\w+|@\w+|\$\w+|https?://[A-Za-z0-9./]+|")
+            token_clr_patt = re.compile(r"[A-Za-z0-9.\-:]+")
+
+            # main loop for processing source file rows
+            for row in data_file:
+                # instances filled in loops
+                row_dict = dict()
+                body = str()
+                tokens_clr = list()
+                orphan_tokens = list()
+
+                # task 1 and 2  for the tags array || and task 3 for the text
+                sign_result = re.findall(sign_patt, row)
+                tags_result = re.findall(tags_patt, row)
+                tags_clr = re.findall(clr_tags_patt, str(tags_result))
+                url_result = re.findall(url_patt, row)
+                metadata = tags_clr + url_result
+
+                # task 1 for the text and processing text
+                for tag in tags_result:
+                    body = row.replace(tag, '')
+
+                clr_row = body.replace("&amp;", "&", ) \
+                    .replace("&#39;", "'").replace("&quot;", '"').replace('&rsquo;', '\'')
+
+                for sign in sign_result:
+                    clr_row = clr_row.replace(sign, '')
+
+                # task 2 for the text
+                body_tags = re.findall(body_patt, clr_row)
+                body_tags = re.findall(clr_body_patt, str(body_tags))
+
+                # task 4 for text
+                clr_row = clr_row.split()
+
+                for token in clr_row:
+                    token_skip = re.findall(token_skip_patt, token)
+                    if len(max(token_skip, key=len)) == 0:
+                        tokens_clr += re.findall(token_clr_patt, token)
+
+                for token in tokens_clr:
+                    if lesk(tokens_clr, token) is None:
+                        orphan_tokens.append(token)
+
+                # overall data save
+                row_dict["body"] = body
+                row_dict["body_tags"] = body_tags
+                row_dict["metadata"] = metadata
+                row_dict["orphan_tokens"] = orphan_tokens
+
+                json_dict.setdefault("records", []).append(row_dict)
+
+        with open(output_file, 'w') as out_file:
+            parsed = json.loads(json.dumps(json_dict))
+            json.dump(parsed, out_file, indent=2)
+
+
+if __name__ == "__main__":
+    t = TextProcessor()
+    t.processing_text_to_json(input_file='input.txt', output_file='output.json')

From 43b792212f79b65387de71db75fc72ec1c51321e Mon Sep 17 00:00:00 2001
From: UdovenkoVolodymyr <uvv333@hotmail.com>
Date: Wed, 12 Dec 2018 19:35:40 +0200
Subject: [PATCH 2/4] Homework is done master branch

---
 ...de-style-and-composition-course-python.iml |  11 ++
 .idea/encodings.xml                           |   4 +
 .idea/misc.xml                                |   4 +
 .idea/modules.xml                             |   8 +
 .idea/vcs.xml                                 |   6 +
 .idea/workspace.xml                           | 140 ++++++++++++++++++
 sourse_udovenko.py                            |   1 +
 7 files changed, 174 insertions(+)
 create mode 100644 .idea/code-style-and-composition-course-python.iml
 create mode 100644 .idea/encodings.xml
 create mode 100644 .idea/misc.xml
 create mode 100644 .idea/modules.xml
 create mode 100644 .idea/vcs.xml
 create mode 100644 .idea/workspace.xml

diff --git a/.idea/code-style-and-composition-course-python.iml b/.idea/code-style-and-composition-course-python.iml
new file mode 100644
index 0000000..6711606
--- /dev/null
+++ b/.idea/code-style-and-composition-course-python.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+  <component name="TestRunnerService">
+    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
new file mode 100644
index 0000000..15a15b2
--- /dev/null
+++ b/.idea/encodings.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Encoding" addBOMForNewFiles="with NO BOM" />
+</project>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..65531ca
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..27d5593
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/code-style-and-composition-course-python.iml" filepath="$PROJECT_DIR$/.idea/code-style-and-composition-course-python.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
new file mode 100644
index 0000000..59fd762
--- /dev/null
+++ b/.idea/workspace.xml
@@ -0,0 +1,140 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ChangeListManager">
+    <list default="true" id="30a89fb3-1ca9-4152-8998-53606947a8d7" name="Default Changelist" comment="Homework is done" />
+    <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileEditorManager">
+    <leaf>
+      <file pinned="false" current-in-tab="true">
+        <entry file="file://$PROJECT_DIR$/sourse_udovenko.py">
+          <provider selected="true" editor-type-id="text-editor">
+            <state relative-caret-position="30">
+              <caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
+            </state>
+          </provider>
+        </entry>
+      </file>
+    </leaf>
+  </component>
+  <component name="Git.Settings">
+    <option name="UPDATE_TYPE" value="MERGE" />
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="IdeDocumentHistory">
+    <option name="CHANGED_PATHS">
+      <list>
+        <option value="$PROJECT_DIR$/sourse_udovenko.py" />
+      </list>
+    </option>
+  </component>
+  <component name="ProjectFrameBounds" extendedState="6">
+    <option name="x" value="269" />
+    <option name="y" value="132" />
+    <option name="width" value="917" />
+    <option name="height" value="789" />
+  </component>
+  <component name="ProjectView">
+    <navigator proportions="" version="1">
+      <foldersAlwaysOnTop value="true" />
+    </navigator>
+    <panes>
+      <pane id="Scope" />
+      <pane id="ProjectPane">
+        <subPane>
+          <expand>
+            <path>
+              <item name="code-style-and-composition-course-python" type="b2602c69:ProjectViewProjectNode" />
+              <item name="code-style-and-composition-course-python" type="462c0819:PsiDirectoryNode" />
+            </path>
+          </expand>
+          <select />
+        </subPane>
+      </pane>
+    </panes>
+  </component>
+  <component name="PropertiesComponent">
+    <property name="last_opened_file_path" value="$PROJECT_DIR$" />
+    <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
+  </component>
+  <component name="RunDashboard">
+    <option name="ruleStates">
+      <list>
+        <RuleState>
+          <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
+        </RuleState>
+        <RuleState>
+          <option name="name" value="StatusDashboardGroupingRule" />
+        </RuleState>
+      </list>
+    </option>
+  </component>
+  <component name="SvnConfiguration">
+    <configuration />
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="30a89fb3-1ca9-4152-8998-53606947a8d7" name="Default Changelist" comment="" />
+      <created>1544634968644</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1544634968644</updated>
+    </task>
+    <task id="LOCAL-00001" summary="Homework is done">
+      <created>1544635096161</created>
+      <option name="number" value="00001" />
+      <option name="presentableId" value="LOCAL-00001" />
+      <option name="project" value="LOCAL" />
+      <updated>1544635096163</updated>
+    </task>
+    <task id="LOCAL-00002" summary="Homework is done">
+      <created>1544635184072</created>
+      <option name="number" value="00002" />
+      <option name="presentableId" value="LOCAL-00002" />
+      <option name="project" value="LOCAL" />
+      <updated>1544635184072</updated>
+    </task>
+    <option name="localTasksCounter" value="3" />
+    <servers />
+  </component>
+  <component name="ToolWindowManager">
+    <frame x="67" y="25" width="1853" height="1055" extended-state="6" />
+    <editor active="true" />
+    <layout>
+      <window_info id="Favorites" side_tool="true" />
+      <window_info active="true" content_ui="combo" id="Project" order="0" visible="true" weight="0.12076503" />
+      <window_info id="Structure" order="1" side_tool="true" weight="0.25" />
+      <window_info anchor="bottom" id="Version Control" />
+      <window_info anchor="bottom" id="Python Console" />
+      <window_info anchor="bottom" id="Terminal" />
+      <window_info anchor="bottom" id="Event Log" side_tool="true" />
+      <window_info anchor="bottom" id="Message" order="0" />
+      <window_info anchor="bottom" id="Find" order="1" />
+      <window_info anchor="bottom" id="Run" order="2" />
+      <window_info anchor="bottom" id="Debug" order="3" weight="0.4" />
+      <window_info anchor="bottom" id="Cvs" order="4" weight="0.25" />
+      <window_info anchor="bottom" id="Inspection" order="5" weight="0.4" />
+      <window_info anchor="bottom" id="TODO" order="6" />
+      <window_info anchor="right" id="Commander" internal_type="SLIDING" order="0" type="SLIDING" weight="0.4" />
+      <window_info anchor="right" id="Ant Build" order="1" weight="0.25" />
+      <window_info anchor="right" content_ui="combo" id="Hierarchy" order="2" weight="0.25" />
+    </layout>
+  </component>
+  <component name="VcsManagerConfiguration">
+    <MESSAGE value="Homework is done" />
+    <option name="LAST_COMMIT_MESSAGE" value="Homework is done" />
+  </component>
+  <component name="editorHistoryManager">
+    <entry file="file://$PROJECT_DIR$/sourse_udovenko.py">
+      <provider selected="true" editor-type-id="text-editor">
+        <state relative-caret-position="30">
+          <caret line="4" lean-forward="true" selection-start-line="4" selection-end-line="4" />
+        </state>
+      </provider>
+    </entry>
+  </component>
+</project>
\ No newline at end of file
diff --git a/sourse_udovenko.py b/sourse_udovenko.py
index 82224b1..4fb2eb1 100644
--- a/sourse_udovenko.py
+++ b/sourse_udovenko.py
@@ -76,5 +76,6 @@ def processing_text_to_json(self, input_file, output_file):
 
 
 if __name__ == "__main__":
+
     t = TextProcessor()
     t.processing_text_to_json(input_file='input.txt', output_file='output.json')

From acf4aba9d4bc6656b6f0d9e1c6ca851824c36a3f Mon Sep 17 00:00:00 2001
From: UdovenkoVolodymyr <uvv333@hotmail.com>
Date: Thu, 13 Dec 2018 15:33:53 +0200
Subject: [PATCH 3/4] refactored class methods

---
 main.py | 101 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 95 insertions(+), 6 deletions(-)

diff --git a/main.py b/main.py
index 99f91a7..14070ad 100644
--- a/main.py
+++ b/main.py
@@ -1,13 +1,102 @@
 #!/usr/bin/env python
+import re
+from nltk.wsd import lesk
 
-# Your classes or methods go here:
 
+class JsonParsedWriter:
 
-class Tool:
-    def my_method(self):
-        print("my_method called!")
+    @staticmethod
+    def write(data, output_name, indent=2):
+        import json
+        with open(output_name, 'w') as out_file:
+            parsed = json.loads(json.dumps(data))
+            json.dump(parsed, out_file, indent=indent)
+
+
+class TweetCleaner:
+
+    def __init__(self, input_file_name):
+
+        self.name = input_file_name
+
+        # list of patterns for regular expressions
+        self.tags_patt = re.compile(r",\[.*\]")
+        self.clr_tags_patt = re.compile(r"\w+")
+        self.body_patt = re.compile(r"#\w+|@\w+")
+        self.clr_body_patt = re.compile(r"\w+")
+        self.url_patt = re.compile(r"https?://[A-Za-z0-9./]+")
+        self.sign_patt = re.compile(r"$\w+")
+        self.token_skip_patt = re.compile(r"#\w+|@\w+|\$\w+|https?://[A-Za-z0-9./]+|")
+        self.token_clr_patt = re.compile(r"[A-Za-z0-9.\-:]+")
+
+    def __clr_tags_to_metadata(self, row):
+        # task 1 and 2  for the tags array || and task 3 for the text
+        self.sign_result = re.findall(self.sign_patt, row)
+        self.tags_result = re.findall(self.tags_patt, row)
+        tags_clr = re.findall(self.clr_tags_patt, str(self.tags_result))
+        url_result = re.findall(self.url_patt, row)
+        self.metadata = tags_clr + url_result
+
+    def __remove_dollar_sign_words(self, row, tag_results):
+        # task 1 for the text and processing text
+        self.body = str()
+        for tag in tag_results:
+            self.body = row.replace(tag, '')
+
+        self.clr_row = self.body.replace("&amp;", "&", ) \
+            .replace("&#39;", "'").replace("&quot;", '"').replace('&rsquo;', '\'')
+
+        for sign in self.sign_result:
+            self.clr_row = self.clr_row.replace(sign, '')
+
+    def __place_tags_wrd_to_body_tags(self, clr_row):
+        # task 2 for the text
+        self.body_tags = re.findall(self.body_patt, clr_row)
+        self.body_tags = re.findall(self.clr_body_patt, str(self.body_tags))
+
+    def __tokenize_add_orphan_tokens(self, clr_row):
+        # task 4 for text
+        self.orphan_tokens = list()
+        tokens_clr = list()
+        clr_row = clr_row.split()
+
+        for token in clr_row:
+            token_skip = re.findall(self.token_skip_patt, token)
+            if len(max(token_skip, key=len)) == 0:
+                tokens_clr += re.findall(self.token_clr_patt, token)
+
+        for token in tokens_clr:
+            if lesk(tokens_clr, token) is None:
+                self.orphan_tokens.append(token)
+
+    def processing_text(self):
+
+        with open(self.name, encoding='utf-8') as data_file:
+            json_dict = dict()
+
+            # main loop for processing source file rows
+            for row in data_file:
+                row_dict = dict()
+
+                self.__clr_tags_to_metadata(row=row)
+                self.__remove_dollar_sign_words(row=row, tag_results=self.tags_result)
+                self.__place_tags_wrd_to_body_tags(clr_row=self.clr_row)
+                self.__tokenize_add_orphan_tokens(clr_row=self.clr_row)
+
+                # overall data save
+                row_dict["body"] = self.body
+                row_dict["body_tags"] = self.body_tags
+                row_dict["metadata"] = self.metadata
+                row_dict["orphan_tokens"] = self.orphan_tokens
+
+                json_dict.setdefault("records", []).append(row_dict)
+
+        return json_dict
 
 
 if __name__ == "__main__":
-    t = Tool()
-    t.my_method()
+
+    tc = TweetCleaner(input_file_name='input.txt')
+    processed_text = tc.processing_text()
+    t = JsonParsedWriter()
+    t.write(processed_text, 'result.json')

From 6091bd7b5fedc553e0949eb2c1477e5d50cdb584 Mon Sep 17 00:00:00 2001
From: UdovenkoVolodymyr <uvv333@hotmail.com>
Date: Thu, 13 Dec 2018 15:42:55 +0200
Subject: [PATCH 4/4] refactored class methods

---
 sourse_udovenko.py | 132 ++++++++++++++++++++++++++-------------------
 1 file changed, 76 insertions(+), 56 deletions(-)

diff --git a/sourse_udovenko.py b/sourse_udovenko.py
index 4fb2eb1..77d0c41 100644
--- a/sourse_udovenko.py
+++ b/sourse_udovenko.py
@@ -1,81 +1,101 @@
 #!/usr/bin/env python
-import json
 import re
 from nltk.wsd import lesk
 
 
-class TextProcessor:
+class JsonParsedWriter:
 
-    def processing_text_to_json(self, input_file, output_file):
+    @staticmethod
+    def write(data, output_name, indent=2):
+        import json
+        with open(output_name, 'w') as out_file:
+            parsed = json.loads(json.dumps(data))
+            json.dump(parsed, out_file, indent=indent)
 
-        with open(input_file, encoding='utf-8') as data_file:
-            json_dict = dict()
 
-            # list of patterns for regular expressions
-            tags_patt = re.compile(r",\[.*\]")
-            clr_tags_patt = re.compile(r"\w+")
-            body_patt = re.compile(r"#\w+|@\w+")
-            clr_body_patt = re.compile(r"\w+")
-            url_patt = re.compile(r"https?://[A-Za-z0-9./]+")
-            sign_patt = re.compile(r"$\w+")
-            token_skip_patt = re.compile(r"#\w+|@\w+|\$\w+|https?://[A-Za-z0-9./]+|")
-            token_clr_patt = re.compile(r"[A-Za-z0-9.\-:]+")
+class TweetCleaner:
 
-            # main loop for processing source file rows
-            for row in data_file:
-                # instances filled in loops
-                row_dict = dict()
-                body = str()
-                tokens_clr = list()
-                orphan_tokens = list()
+    def __init__(self, input_file_name):
 
-                # task 1 and 2  for the tags array || and task 3 for the text
-                sign_result = re.findall(sign_patt, row)
-                tags_result = re.findall(tags_patt, row)
-                tags_clr = re.findall(clr_tags_patt, str(tags_result))
-                url_result = re.findall(url_patt, row)
-                metadata = tags_clr + url_result
+        self.name = input_file_name
 
-                # task 1 for the text and processing text
-                for tag in tags_result:
-                    body = row.replace(tag, '')
+        # list of patterns for regular expressions
+        self.tags_patt = re.compile(r",\[.*\]")
+        self.clr_tags_patt = re.compile(r"\w+")
+        self.body_patt = re.compile(r"#\w+|@\w+")
+        self.clr_body_patt = re.compile(r"\w+")
+        self.url_patt = re.compile(r"https?://[A-Za-z0-9./]+")
+        self.sign_patt = re.compile(r"$\w+")
+        self.token_skip_patt = re.compile(r"#\w+|@\w+|\$\w+|https?://[A-Za-z0-9./]+|")
+        self.token_clr_patt = re.compile(r"[A-Za-z0-9.\-:]+")
 
-                clr_row = body.replace("&amp;", "&", ) \
-                    .replace("&#39;", "'").replace("&quot;", '"').replace('&rsquo;', '\'')
+    def __clr_tags_to_metadata(self, row):
+        # task 1 and 2  for the tags array || and task 3 for the text
+        self.sign_result = re.findall(self.sign_patt, row)
+        self.tags_result = re.findall(self.tags_patt, row)
+        tags_clr = re.findall(self.clr_tags_patt, str(self.tags_result))
+        url_result = re.findall(self.url_patt, row)
+        self.metadata = tags_clr + url_result
 
-                for sign in sign_result:
-                    clr_row = clr_row.replace(sign, '')
+    def __remove_dollar_sign_words(self, row, tag_results):
+        # task 1 for the text and processing text
+        self.body = str()
+        for tag in tag_results:
+            self.body = row.replace(tag, '')
 
-                # task 2 for the text
-                body_tags = re.findall(body_patt, clr_row)
-                body_tags = re.findall(clr_body_patt, str(body_tags))
+        self.clr_row = self.body.replace("&amp;", "&", ) \
+            .replace("&#39;", "'").replace("&quot;", '"').replace('&rsquo;', '\'')
 
-                # task 4 for text
-                clr_row = clr_row.split()
+        for sign in self.sign_result:
+            self.clr_row = self.clr_row.replace(sign, '')
 
-                for token in clr_row:
-                    token_skip = re.findall(token_skip_patt, token)
-                    if len(max(token_skip, key=len)) == 0:
-                        tokens_clr += re.findall(token_clr_patt, token)
+    def __place_tags_wrd_to_body_tags(self, clr_row):
+        # task 2 for the text
+        self.body_tags = re.findall(self.body_patt, clr_row)
+        self.body_tags = re.findall(self.clr_body_patt, str(self.body_tags))
 
-                for token in tokens_clr:
-                    if lesk(tokens_clr, token) is None:
-                        orphan_tokens.append(token)
+    def __tokenize_add_orphan_tokens(self, clr_row):
+        # task 4 for text
+        self.orphan_tokens = list()
+        tokens_clr = list()
+        clr_row = clr_row.split()
+
+        for token in clr_row:
+            token_skip = re.findall(self.token_skip_patt, token)
+            if len(max(token_skip, key=len)) == 0:
+                tokens_clr += re.findall(self.token_clr_patt, token)
+
+        for token in tokens_clr:
+            if lesk(tokens_clr, token) is None:
+                self.orphan_tokens.append(token)
+
+    def processing_text(self):
+
+        with open(self.name, encoding='utf-8') as data_file:
+            json_dict = dict()
+
+            # main loop for processing source file rows
+            for row in data_file:
+                row_dict = dict()
+
+                self.__clr_tags_to_metadata(row=row)
+                self.__remove_dollar_sign_words(row=row, tag_results=self.tags_result)
+                self.__place_tags_wrd_to_body_tags(clr_row=self.clr_row)
+                self.__tokenize_add_orphan_tokens(clr_row=self.clr_row)
 
                 # overall data save
-                row_dict["body"] = body
-                row_dict["body_tags"] = body_tags
-                row_dict["metadata"] = metadata
-                row_dict["orphan_tokens"] = orphan_tokens
+                row_dict["body"] = self.body
+                row_dict["body_tags"] = self.body_tags
+                row_dict["metadata"] = self.metadata
+                row_dict["orphan_tokens"] = self.orphan_tokens
 
                 json_dict.setdefault("records", []).append(row_dict)
 
-        with open(output_file, 'w') as out_file:
-            parsed = json.loads(json.dumps(json_dict))
-            json.dump(parsed, out_file, indent=2)
+        return json_dict
 
 
 if __name__ == "__main__":
-
-    t = TextProcessor()
-    t.processing_text_to_json(input_file='input.txt', output_file='output.json')
+    tc = TweetCleaner(input_file_name='input.txt')
+    processed_text = tc.processing_text()
+    t = JsonParsedWriter()
+    t.write(processed_text, 'result.json')