Add to_pdf
This commit is contained in:
parent
5107eefe8a
commit
6d29d7e191
@ -1,8 +1,10 @@
|
|||||||
certifi==2020.12.5
|
certifi==2020.12.5
|
||||||
chardet==4.0.0
|
chardet==4.0.0
|
||||||
click==7.1.2
|
click==7.1.2
|
||||||
|
docx2pdf==0.1.7
|
||||||
docx2txt==0.8
|
docx2txt==0.8
|
||||||
Flask==1.1.2
|
Flask==1.1.2
|
||||||
|
Flask-Cors==3.0.10
|
||||||
geneea-nlp-client==1.2.0
|
geneea-nlp-client==1.2.0
|
||||||
idna==2.10
|
idna==2.10
|
||||||
itsdangerous==1.1.0
|
itsdangerous==1.1.0
|
||||||
@ -10,6 +12,7 @@ Jinja2==2.11.2
|
|||||||
joblib==1.0.0
|
joblib==1.0.0
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
nltk==3.5
|
nltk==3.5
|
||||||
|
pdf2image==1.14.0
|
||||||
pdftotext==2.1.5
|
pdftotext==2.1.5
|
||||||
Pillow==8.1.0
|
Pillow==8.1.0
|
||||||
pytesseract==0.3.7
|
pytesseract==0.3.7
|
||||||
@ -20,4 +23,3 @@ six==1.15.0
|
|||||||
tqdm==4.56.0
|
tqdm==4.56.0
|
||||||
urllib3==1.26.2
|
urllib3==1.26.2
|
||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
flask-cors==3.0.10
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
from flask import Flask, request, jsonify
|
from flask import Flask, request, jsonify, send_file
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
import validator
|
import validator
|
||||||
|
from docx2pdf import convert
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
CORS(app)
|
CORS(app)
|
||||||
@ -14,5 +15,17 @@ def validate():
|
|||||||
validation_result = validator.validate(text_content)
|
validation_result = validator.validate(text_content)
|
||||||
return jsonify(validation_result)
|
return jsonify(validation_result)
|
||||||
|
|
||||||
|
@app.route('/to_pdf', methods=['POST'])
|
||||||
|
def convert_to_pdf():
|
||||||
|
if request.method == 'POST':
|
||||||
|
raw_file = request.files['file']
|
||||||
|
_, ext = os.path.splitext(f.filename)
|
||||||
|
if ext == ".docx":
|
||||||
|
return send_file(convert(raw_file))
|
||||||
|
elif ext == ".pdf":
|
||||||
|
return send_file(raw_file)
|
||||||
|
else:
|
||||||
|
return "Bad file format", 400
|
||||||
|
|
||||||
|
|
||||||
app.run()
|
app.run()
|
||||||
|
@ -6,6 +6,7 @@ from PIL import Image
|
|||||||
import regex as re
|
import regex as re
|
||||||
from typing import Union, Tuple, Literal
|
from typing import Union, Tuple, Literal
|
||||||
from pdf2image import convert_from_bytes
|
from pdf2image import convert_from_bytes
|
||||||
|
from geneeanlpclient import g3
|
||||||
|
|
||||||
PDF_CHARACTER_THRESHOLD = 10
|
PDF_CHARACTER_THRESHOLD = 10
|
||||||
|
|
||||||
@ -56,7 +57,21 @@ def validate_court(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
|
|||||||
return match.span()
|
return match.span()
|
||||||
|
|
||||||
def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
return False
|
requestBuilder = g3.Request.Builder(analyses=[g3.AnalysisType.ALL])
|
||||||
|
|
||||||
|
with g3.Client.create(userKey='4330765d043bfd5366b04a20c18b2dc0') as analyzer:
|
||||||
|
result = analyzer.analyze(requestBuilder.build(id=str(1), text=text_content))
|
||||||
|
|
||||||
|
for e in result.entities:
|
||||||
|
print(f'\t{e.type}: {e.stdForm}')
|
||||||
|
|
||||||
|
for r in result.relations:
|
||||||
|
print(r)
|
||||||
|
|
||||||
|
for e in result.entities:
|
||||||
|
if e.stdForm == "žalobce":
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
# hard to implement
|
# hard to implement
|
||||||
def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
|
Loading…
Reference in New Issue
Block a user