Basic court regex validator
This commit is contained in:
parent
957f5efb0c
commit
d6ad798e96
@ -37,7 +37,7 @@
|
|||||||
};
|
};
|
||||||
in
|
in
|
||||||
pkgs.mkShell {
|
pkgs.mkShell {
|
||||||
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask ];
|
buildInputs = with pkgs.python38Packages; [ pkgs.python38 pip geneea_sdk pytesseract pdftotext pillow docx2txt flask regex ];
|
||||||
shellHook = ''
|
shellHook = ''
|
||||||
read -p 'Insert Geneea API key:' -r -s geneea_api_key
|
read -p 'Insert Geneea API key:' -r -s geneea_api_key
|
||||||
export geneea_api_key
|
export geneea_api_key
|
||||||
|
@ -3,6 +3,7 @@ import pdftotext
|
|||||||
import docx2txt
|
import docx2txt
|
||||||
import pytesseract
|
import pytesseract
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
import regex as re
|
||||||
|
|
||||||
def process_jpg(f):
|
def process_jpg(f):
|
||||||
text = pytesseract.image_to_string(Image.open(f), lang="ces")
|
text = pytesseract.image_to_string(Image.open(f), lang="ces")
|
||||||
@ -29,8 +30,9 @@ def process_file(f):
|
|||||||
elif ext == '.docx':
|
elif ext == '.docx':
|
||||||
return process_docx(f)
|
return process_docx(f)
|
||||||
|
|
||||||
def validate_court(lawsuit):
|
court_pat = re.compile(r"(okresní|krajský|vrchní|nejvyšší(\s+správní\S*)|ústavní)\S*\s+soud\S*(\s+ve?)?\s+(\S+)", flags=re.IGNORECASE)
|
||||||
pass
|
def validate_court(lawsuit: str):
|
||||||
|
return court_pat.match(lawsuit)
|
||||||
|
|
||||||
def validate_accuser(lawsuit):
|
def validate_accuser(lawsuit):
|
||||||
pass
|
pass
|
||||||
@ -51,3 +53,8 @@ def validate_date():
|
|||||||
|
|
||||||
def validate(text):
|
def validate(text):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# debug
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
print(validate_court(str(sys.stdin.read())))
|
||||||
|
Loading…
Reference in New Issue
Block a user