Add to_pdf

This commit is contained in:
František Kmječ 2021-01-23 10:54:25 +01:00
parent 5107eefe8a
commit 6d29d7e191
3 changed files with 33 additions and 3 deletions

View File

@ -1,8 +1,10 @@
certifi==2020.12.5 certifi==2020.12.5
chardet==4.0.0 chardet==4.0.0
click==7.1.2 click==7.1.2
docx2pdf==0.1.7
docx2txt==0.8 docx2txt==0.8
Flask==1.1.2 Flask==1.1.2
Flask-Cors==3.0.10
geneea-nlp-client==1.2.0 geneea-nlp-client==1.2.0
idna==2.10 idna==2.10
itsdangerous==1.1.0 itsdangerous==1.1.0
@ -10,6 +12,7 @@ Jinja2==2.11.2
joblib==1.0.0 joblib==1.0.0
MarkupSafe==1.1.1 MarkupSafe==1.1.1
nltk==3.5 nltk==3.5
pdf2image==1.14.0
pdftotext==2.1.5 pdftotext==2.1.5
Pillow==8.1.0 Pillow==8.1.0
pytesseract==0.3.7 pytesseract==0.3.7
@ -20,4 +23,3 @@ six==1.15.0
tqdm==4.56.0 tqdm==4.56.0
urllib3==1.26.2 urllib3==1.26.2
Werkzeug==1.0.1 Werkzeug==1.0.1
flask-cors==3.0.10

View File

@ -1,6 +1,7 @@
from flask import Flask, request, jsonify from flask import Flask, request, jsonify, send_file
from flask_cors import CORS from flask_cors import CORS
import validator import validator
from docx2pdf import convert
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
@ -14,5 +15,17 @@ def validate():
validation_result = validator.validate(text_content) validation_result = validator.validate(text_content)
return jsonify(validation_result) return jsonify(validation_result)
@app.route('/to_pdf', methods=['POST'])
def convert_to_pdf():
if request.method == 'POST':
raw_file = request.files['file']
_, ext = os.path.splitext(f.filename)
if ext == ".docx":
return send_file(convert(raw_file))
elif ext == ".pdf":
return send_file(raw_file)
else:
return "Bad file format", 400
app.run() app.run()

View File

@ -6,6 +6,7 @@ from PIL import Image
import regex as re import regex as re
from typing import Union, Tuple, Literal from typing import Union, Tuple, Literal
from pdf2image import convert_from_bytes from pdf2image import convert_from_bytes
from geneeanlpclient import g3
PDF_CHARACTER_THRESHOLD = 10 PDF_CHARACTER_THRESHOLD = 10
@ -56,7 +57,21 @@ def validate_court(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
return match.span() return match.span()
def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]: def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]:
return False requestBuilder = g3.Request.Builder(analyses=[g3.AnalysisType.ALL])
with g3.Client.create(userKey='4330765d043bfd5366b04a20c18b2dc0') as analyzer:
result = analyzer.analyze(requestBuilder.build(id=str(1), text=text_content))
for e in result.entities:
print(f'\t{e.type}: {e.stdForm}')
for r in result.relations:
print(r)
for e in result.entities:
if e.stdForm == "žalobce":
pass
# hard to implement # hard to implement
def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]: def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]: