Basic validation form

This commit is contained in:
Vojtěch Káně 2021-01-23 06:29:43 +01:00
parent f55d90400a
commit c02e0b82bc
6 changed files with 43 additions and 41 deletions

View File

@ -37,7 +37,7 @@
};
in
pkgs.mkShell {
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask regex ];
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask flask-cors regex ];
shellHook = ''
read -p 'Insert Geneea API key:' -r -s geneea_api_key
export geneea_api_key

View File

@ -17,7 +17,12 @@ const routes = [
// this generates a separate chunk (about.[hash].js) for this route
// which is lazy-loaded when the route is visited.
component: () => import(/* webpackChunkName: "about" */ '../views/Edit.vue')
}
},
{
path: '/upload',
name: 'Upload',
component: () => import('../views/Upload'),
},
]
const router = new VueRouter({

View File

@ -20,3 +20,4 @@ six==1.15.0
tqdm==4.56.0
urllib3==1.26.2
Werkzeug==1.0.1
flask-cors=3.0.10

View File

@ -1,24 +1,18 @@
from flask import Flask, render_template, request, jsonify
import json
from flask import Flask, request, jsonify
from flask_cors import CORS
import validator
app = Flask(__name__)
CORS(app)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/validator', methods=['GET', 'POST'])
@app.route('/validator', methods=['POST'])
def validate():
if request.method == 'POST':
print(request.files)
f = request.files['file']
text = validator.process_file(f)
result = validator.validate(text)
return jsonify(result)
else:
return 'Soubor byl zvalidován. TODO musím ověřit, jak byl zvalidován.' # TODO change
raw_file = request.files['file']
text_content = validator.process_file(raw_file)
validation_result = validator.validate(text_content)
return jsonify(validation_result)
app.run()

View File

@ -1,9 +0,0 @@
<html>
<body>
<form action = "http://localhost:5000/validator" method = "POST"
enctype = "multipart/form-data">
<input type = "file" name = "file" /> <!-- TODO hláška-->
<input type = "submit"/>
</form>
</body>
</html>

View File

@ -4,6 +4,7 @@ import docx2txt
import pytesseract
from PIL import Image
import regex as re
from typing import Union, Tuple, Literal
def process_jpg(f):
text = pytesseract.image_to_string(Image.open(f), lang="ces")
@ -19,45 +20,55 @@ def process_docx(f):
return docx2txt.process(f)
def process_file(f):
def process_file(f) -> str:
# TODO proper file format distinguishing, not only by suffix?
_, ext = os.path.splitext(f.filename)
print(ext)
if ext == '.jpg':
return process_jpg(f)
elif ext == '.pdf':
return process_pdf(f)
elif ext == '.docx':
return process_docx(f)
else:
return str(f.read())
court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE)
def validate_court(lawsuit: str):
return court_pat.search(lawsuit)
def validate_court(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
match = court_pat.search(text_content)
if match is None:
return False
else:
return match.span()
def validate_accuser(lawsuit):
def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]:
pass
# hard to implement
def validate_topic():
def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]:
pass
# also hard to implement
def validate_intent():
def validate_intent(text_content) -> Union[Tuple[int, int], Literal[False]]:
pass
def validate_signature():
def validate_signature(text_content) -> Union[Tuple[int, int], Literal[False]]:
pass
date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE)
def validate_date_and_place(lawsuit: str):
return date_and_place_pat.search(lawsuit)
def validate_date_and_place(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
match = date_and_place_pat.search(text_content)
if match is None:
return False
else:
return match.span()
def validate(text):
pass
def validate(text_content: str) -> object:
return {
"court": validate_court(text_content),
"date_and_place": validate_date_and_place(text_content),
}
# debug
if __name__ == "__main__":
import sys
lawsuit = str(sys.stdin.read())
print(validate_court(lawsuit))
print(validate_date_and_place(lawsuit))
print(validate(str(sys.stdin.read())))