diff --git a/flake.nix b/flake.nix index 88903aa..e92b041 100644 --- a/flake.nix +++ b/flake.nix @@ -37,7 +37,7 @@ }; in pkgs.mkShell { - buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask regex ]; + buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask flask-cors regex ]; shellHook = '' read -p 'Insert Geneea API key:' -r -s geneea_api_key export geneea_api_key diff --git a/form/res-judicata/src/router/index.js b/form/res-judicata/src/router/index.js index 285ef82..a490b79 100644 --- a/form/res-judicata/src/router/index.js +++ b/form/res-judicata/src/router/index.js @@ -17,7 +17,12 @@ const routes = [ // this generates a separate chunk (about.[hash].js) for this route // which is lazy-loaded when the route is visited. component: () => import(/* webpackChunkName: "about" */ '../views/Edit.vue') - } + }, + { + path: '/upload', + name: 'Upload', + component: () => import('../views/Upload'), + }, ] const router = new VueRouter({ diff --git a/validator/requirements.txt b/validator/requirements.txt index cb7a7d2..2a4b212 100644 --- a/validator/requirements.txt +++ b/validator/requirements.txt @@ -20,3 +20,4 @@ six==1.15.0 tqdm==4.56.0 urllib3==1.26.2 Werkzeug==1.0.1 +flask-cors=3.0.10 diff --git a/validator/server.py b/validator/server.py index bad18ef..7a00487 100644 --- a/validator/server.py +++ b/validator/server.py @@ -1,24 +1,18 @@ -from flask import Flask, render_template, request, jsonify -import json +from flask import Flask, request, jsonify +from flask_cors import CORS import validator app = Flask(__name__) +CORS(app) -@app.route('/') -def index(): - return render_template('index.html') -@app.route('/validator', methods=['GET', 'POST']) +@app.route('/validator', methods=['POST']) def validate(): if request.method == 'POST': - print(request.files) - f = request.files['file'] - - text = validator.process_file(f) - result = validator.validate(text) - return jsonify(result) - else: - return 'Soubor byl zvalidován. TODO musím ověřit, jak byl zvalidován.' # TODO change + raw_file = request.files['file'] + text_content = validator.process_file(raw_file) + validation_result = validator.validate(text_content) + return jsonify(validation_result) -app.run() \ No newline at end of file +app.run() diff --git a/validator/templates/index.html b/validator/templates/index.html deleted file mode 100644 index aeb843c..0000000 --- a/validator/templates/index.html +++ /dev/null @@ -1,9 +0,0 @@ - - -
- - -
- - \ No newline at end of file diff --git a/validator/validator.py b/validator/validator.py index 3b8d9df..e826e64 100644 --- a/validator/validator.py +++ b/validator/validator.py @@ -4,6 +4,7 @@ import docx2txt import pytesseract from PIL import Image import regex as re +from typing import Union, Tuple, Literal def process_jpg(f): text = pytesseract.image_to_string(Image.open(f), lang="ces") @@ -19,45 +20,55 @@ def process_docx(f): return docx2txt.process(f) -def process_file(f): +def process_file(f) -> str: # TODO proper file format distinguishing, not only by suffix? _, ext = os.path.splitext(f.filename) - print(ext) if ext == '.jpg': return process_jpg(f) elif ext == '.pdf': return process_pdf(f) elif ext == '.docx': return process_docx(f) + else: + return str(f.read()) court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE) -def validate_court(lawsuit: str): - return court_pat.search(lawsuit) +def validate_court(text_content: str) -> Union[Tuple[int, int], Literal[False]]: + match = court_pat.search(text_content) + if match is None: + return False + else: + return match.span() -def validate_accuser(lawsuit): +def validate_accuser(text_content) -> Union[Tuple[int, int], Literal[False]]: pass # hard to implement -def validate_topic(): +def validate_topic(text_content) -> Union[Tuple[int, int], Literal[False]]: pass # also hard to implement -def validate_intent(): +def validate_intent(text_content) -> Union[Tuple[int, int], Literal[False]]: pass -def validate_signature(): +def validate_signature(text_content) -> Union[Tuple[int, int], Literal[False]]: pass date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE) -def validate_date_and_place(lawsuit: str): - return date_and_place_pat.search(lawsuit) +def validate_date_and_place(text_content: str) -> Union[Tuple[int, int], Literal[False]]: + match = date_and_place_pat.search(text_content) + if match is None: + return False + else: + return match.span() -def validate(text): - pass +def validate(text_content: str) -> object: + return { + "court": validate_court(text_content), + "date_and_place": validate_date_and_place(text_content), + } # debug if __name__ == "__main__": import sys - lawsuit = str(sys.stdin.read()) - print(validate_court(lawsuit)) - print(validate_date_and_place(lawsuit)) + print(validate(str(sys.stdin.read())))