refactor: * fix conflict

* update Dockerfile * move formatter to class
sahajsoft · Oct 9, 2024 · d798393 · d798393
1 parent 6ded60e
commit d798393
Show file tree

Hide file tree

Showing 4 changed files with 49 additions and 41 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -2,6 +2,7 @@ FROM python:3.11
 
 WORKDIR /usr/src/app
 RUN pip install poetry==1.8.3
+RUN apt-get update && apt-get install -y tesseract-ocr
 ENV POETRY_NO_INTERACTION=1
 ENV POETRY_VIRTUALENVS_IN_PROJECT=1
 ENV POETRY_VIRTUALENVS_CREATE=1

diff --git a/src/cli.py b/src/cli.py
@@ -9,6 +9,7 @@
 from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
 from presidio_anonymizer import AnonymizerEngine, BatchAnonymizerEngine
 from config.nlp_engine_config import FlairNLPEngine
+from utils.formatter import Formatter
 from operators.vault import Vault
 from PIL import Image
 from presidio_image_redactor import ImageRedactorEngine
@@ -25,21 +26,22 @@
 
 def analyze(args):
     analyzer_results = None
+    input_buffer = sys.stdin.buffer.read()
     text = None
     image = None
     if args.img:
-        image = Image.open(io.BytesIO(sys.stdin.buffer.read()))
+        image = Image.open(io.BytesIO(input_buffer))
         analyzer_results = ImageAnalyzerEngine().analyze(image=image, language=args.language)
     else:
         nlp_engine = FlairNLPEngine(NLP_ENGINE)
         nlp_engine, registry = nlp_engine.create_nlp_engine()
         engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)
-        text = sys.stdin.read()
+        text = input_buffer.decode("utf-8")
         if args.csv:
             engine = CSVAnalyzerEngine(engine)
         analyzer_results = engine.analyze(text=text, language=args.language)
 
-    output = format_output(analyzer_results, text, image)
+    output = Formatter().format_output(analyzer_results, text, image)
     print(json.dumps(output, indent=2))
     return analyzer_results
 
@@ -132,43 +134,5 @@ def main():
     args = parser.parse_args()
     args.func(args)
 
-
-def format_output(analyzer_results, text, image):
-    if image:
-        output = io.BytesIO()
-        image.convert('RGB').save(output, format='JPEG')
-        return {
-            "image": list(output.getvalue()),
-            "analyzer_results": [
-                {
-                    "entity_type": result.entity_type,
-                    "start": result.start,
-                    "end":  result.end,
-                    "score": result.score,
-                    "left" : result.left,
-                    "top" : result.top,
-                    "width" : result.width,
-                    "height" : result.height
-                }
-                for result in analyzer_results
-            ]
-        }
-
-    return {
-        "text": text,
-        "analyzer_results": [
-            {
-                "entity_type": result.entity_type,
-                "start": result.start,
-                "end": result.end,
-                "score": result.score,
-                "analysis_explanation": result.analysis_explanation,
-                "recognition_metadata": result.recognition_metadata,
-            }
-            for result in analyzer_results
-        ],
-    }
-
-
 if __name__ == "__main__":
     main()
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
diff --git a/src/utils/formatter.py b/src/utils/formatter.py
@@ -0,0 +1,43 @@
+import io
+
+
+class Formatter:
+
+    def __init__(self):
+        pass
+
+    def format_output(self ,analyzer_results, text, image):
+        if image:
+            output = io.BytesIO()
+            image.convert('RGB').save(output, format='JPEG')
+            return {
+                "image": list(output.getvalue()),
+                "analyzer_results": [
+                    {
+                        "entity_type": result.entity_type,
+                        "start": result.start,
+                        "end":  result.end,
+                        "score": result.score,
+                        "left" : result.left,
+                        "top" : result.top,
+                        "width" : result.width,
+                        "height" : result.height
+                    }
+                    for result in analyzer_results
+                ]
+            }
+
+        return {
+            "text": text,
+            "analyzer_results": [
+                {
+                    "entity_type": result.entity_type,
+                    "start": result.start,
+                    "end": result.end,
+                    "score": result.score,
+                    "analysis_explanation": result.analysis_explanation,
+                    "recognition_metadata": result.recognition_metadata,
+                }
+                for result in analyzer_results
+            ],
+        }