Basic validation form
This commit is contained in:
parent
f55d90400a
commit
c02e0b82bc
@ -37,7 +37,7 @@
|
|||||||
};
|
};
|
||||||
in
|
in
|
||||||
pkgs.mkShell {
|
pkgs.mkShell {
|
||||||
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask regex ];
|
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask flask-cors regex ];
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
read -p 'Insert Geneea API key:' -r -s geneea_api_key
|
read -p 'Insert Geneea API key:' -r -s geneea_api_key
|
||||||
export geneea_api_key
|
export geneea_api_key
|
||||||
|
@ -17,7 +17,12 @@ const routes = [
|
|||||||
// this generates a separate chunk (about.[hash].js) for this route
|
// this generates a separate chunk (about.[hash].js) for this route
|
||||||
// which is lazy-loaded when the route is visited.
|
// which is lazy-loaded when the route is visited.
|
||||||
component: () => import(/* webpackChunkName: "about" */ '../views/Edit.vue')
|
component: () => import(/* webpackChunkName: "about" */ '../views/Edit.vue')
|
||||||
}
|
},
|
||||||
|
{
|
||||||
|
path: '/upload',
|
||||||
|
name: 'Upload',
|
||||||
|
component: () => import('../views/Upload'),
|
||||||
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
const router = new VueRouter({
|
const router = new VueRouter({
|
||||||
|
@ -20,3 +20,4 @@ six==1.15.0
|
|||||||
tqdm==4.56.0
|
tqdm==4.56.0
|
||||||
urllib3==1.26.2
|
urllib3==1.26.2
|
||||||
Werkzeug==1.0.1
|
Werkzeug==1.0.1
|
||||||
|
flask-cors=3.0.10
|
||||||
|
@ -1,24 +1,18 @@
|
|||||||
from flask import Flask, render_template, request, jsonify
|
from flask import Flask, request, jsonify
|
||||||
import json
|
from flask_cors import CORS
|
||||||
import validator
|
import validator
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
CORS(app)
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def index():
|
|
||||||
return render_template('index.html')
|
|
||||||
|
|
||||||
@app.route('/validator', methods=['GET', 'POST'])
|
@app.route('/validator', methods=['POST'])
|
||||||
def validate():
|
def validate():
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
print(request.files)
|
raw_file = request.files['file']
|
||||||
f = request.files['file']
|
text_content = validator.process_file(raw_file)
|
||||||
|
validation_result = validator.validate(text_content)
|
||||||
text = validator.process_file(f)
|
return jsonify(validation_result)
|
||||||
result = validator.validate(text)
|
|
||||||
return jsonify(result)
|
|
||||||
else:
|
|
||||||
return 'Soubor byl zvalidován. TODO musím ověřit, jak byl zvalidován.' # TODO change
|
|
||||||
|
|
||||||
|
|
||||||
app.run()
|
app.run()
|
@ -1,9 +0,0 @@
|
|||||||
<html>
|
|
||||||
<body>
|
|
||||||
<form action = "http://localhost:5000/validator" method = "POST"
|
|
||||||
enctype = "multipart/form-data">
|
|
||||||
<input type = "file" name = "file" /> <!-- TODO hláška-->
|
|
||||||
<input type = "submit"/>
|
|
||||||
</form>
|
|
||||||
</body>
|
|
||||||
</html>
|
|
@ -4,6 +4,7 @@ import docx2txt
|
|||||||
import pytesseract
|
import pytesseract
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
import regex as re
|
import regex as re
|
||||||
|
from typing import Union, Tuple, Literal
|
||||||
|
|
||||||
def process_jpg(f):
|
def process_jpg(f):
|
||||||
text = pytesseract.image_to_string(Image.open(f), lang="ces")
|
text = pytesseract.image_to_string(Image.open(f), lang="ces")
|
||||||
@ -19,45 +20,55 @@ def process_docx(f):
|
|||||||
return docx2txt.process(f)
|
return docx2txt.process(f)
|
||||||
|
|
||||||
|
|
||||||
def process_file(f):
|
def process_file(f) -> str:
|
||||||
# TODO proper file format distinguishing, not only by suffix?
|
# TODO proper file format distinguishing, not only by suffix?
|
||||||
_, ext = os.path.splitext(f.filename)
|
_, ext = os.path.splitext(f.filename)
|
||||||
print(ext)
|
|
||||||
if ext == '.jpg':
|
if ext == '.jpg':
|
||||||
return process_jpg(f)
|
return process_jpg(f)
|
||||||
elif ext == '.pdf':
|
elif ext == '.pdf':
|
||||||
return process_pdf(f)
|
return process_pdf(f)
|
||||||
elif ext == '.docx':
|
elif ext == '.docx':
|
||||||
return process_docx(f)
|
return process_docx(f)
|
||||||
|
else:
|
||||||
|
return str(f.read())
|
||||||
|
|
||||||
court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE)
|
court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE)
|
||||||
def validate_court(lawsuit: str):
|
def validate_court(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
return court_pat.search(lawsuit)
|
match = court_pat.search(text_content)
|
||||||
|
if match is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return match.span()
|
||||||
|
|
||||||
def validate_accuser(lawsuit):
|
def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# hard to implement
|
# hard to implement
|
||||||
def validate_topic():
|
def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# also hard to implement
|
# also hard to implement
|
||||||
def validate_intent():
|
def validate_intent(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def validate_signature():
|
def validate_signature(text_content) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE)
|
date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE)
|
||||||
def validate_date_and_place(lawsuit: str):
|
def validate_date_and_place(text_content: str) -> Union[Tuple[int, int], Literal[False]]:
|
||||||
return date_and_place_pat.search(lawsuit)
|
match = date_and_place_pat.search(text_content)
|
||||||
|
if match is None:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return match.span()
|
||||||
|
|
||||||
def validate(text):
|
def validate(text_content: str) -> object:
|
||||||
pass
|
return {
|
||||||
|
"court": validate_court(text_content),
|
||||||
|
"date_and_place": validate_date_and_place(text_content),
|
||||||
|
}
|
||||||
|
|
||||||
# debug
|
# debug
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
import sys
|
||||||
lawsuit = str(sys.stdin.read())
|
print(validate(str(sys.stdin.read())))
|
||||||
print(validate_court(lawsuit))
|
|
||||||
print(validate_date_and_place(lawsuit))
|
|
||||||
|
Loading…
Reference in New Issue
Block a user