Skip to content

Commit

Permalink
refactor: * fix conflict
Browse files Browse the repository at this point in the history
          * update Dockerfile
          * move formatter to class
  • Loading branch information
mallikarjun-br committed Oct 9, 2024
1 parent 6ded60e commit d798393
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 41 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ FROM python:3.11

WORKDIR /usr/src/app
RUN pip install poetry==1.8.3
RUN apt-get update && apt-get install -y tesseract-ocr
ENV POETRY_NO_INTERACTION=1
ENV POETRY_VIRTUALENVS_IN_PROJECT=1
ENV POETRY_VIRTUALENVS_CREATE=1
Expand Down
46 changes: 5 additions & 41 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from analyzer_engine.csv_analyzer_engine import CSVAnalyzerEngine
from presidio_anonymizer import AnonymizerEngine, BatchAnonymizerEngine
from config.nlp_engine_config import FlairNLPEngine
from utils.formatter import Formatter
from operators.vault import Vault
from PIL import Image
from presidio_image_redactor import ImageRedactorEngine
Expand All @@ -25,21 +26,22 @@

def analyze(args):
analyzer_results = None
input_buffer = sys.stdin.buffer.read()
text = None
image = None
if args.img:
image = Image.open(io.BytesIO(sys.stdin.buffer.read()))
image = Image.open(io.BytesIO(input_buffer))
analyzer_results = ImageAnalyzerEngine().analyze(image=image, language=args.language)
else:
nlp_engine = FlairNLPEngine(NLP_ENGINE)
nlp_engine, registry = nlp_engine.create_nlp_engine()
engine = AnalyzerEngine(registry=registry, nlp_engine=nlp_engine)
text = sys.stdin.read()
text = input_buffer.decode("utf-8")
if args.csv:
engine = CSVAnalyzerEngine(engine)
analyzer_results = engine.analyze(text=text, language=args.language)

output = format_output(analyzer_results, text, image)
output = Formatter().format_output(analyzer_results, text, image)
print(json.dumps(output, indent=2))
return analyzer_results

Expand Down Expand Up @@ -132,43 +134,5 @@ def main():
args = parser.parse_args()
args.func(args)


def format_output(analyzer_results, text, image):
if image:
output = io.BytesIO()
image.convert('RGB').save(output, format='JPEG')
return {
"image": list(output.getvalue()),
"analyzer_results": [
{
"entity_type": result.entity_type,
"start": result.start,
"end": result.end,
"score": result.score,
"left" : result.left,
"top" : result.top,
"width" : result.width,
"height" : result.height
}
for result in analyzer_results
]
}

return {
"text": text,
"analyzer_results": [
{
"entity_type": result.entity_type,
"start": result.start,
"end": result.end,
"score": result.score,
"analysis_explanation": result.analysis_explanation,
"recognition_metadata": result.recognition_metadata,
}
for result in analyzer_results
],
}


if __name__ == "__main__":
main()
Empty file added src/utils/__init__.py
Empty file.
43 changes: 43 additions & 0 deletions src/utils/formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import io


class Formatter:

def __init__(self):
pass

def format_output(self ,analyzer_results, text, image):
if image:
output = io.BytesIO()
image.convert('RGB').save(output, format='JPEG')
return {
"image": list(output.getvalue()),
"analyzer_results": [
{
"entity_type": result.entity_type,
"start": result.start,
"end": result.end,
"score": result.score,
"left" : result.left,
"top" : result.top,
"width" : result.width,
"height" : result.height
}
for result in analyzer_results
]
}

return {
"text": text,
"analyzer_results": [
{
"entity_type": result.entity_type,
"start": result.start,
"end": result.end,
"score": result.score,
"analysis_explanation": result.analysis_explanation,
"recognition_metadata": result.recognition_metadata,
}
for result in analyzer_results
],
}

0 comments on commit d798393

Please sign in to comment.