From f55d90400abf61f5a903a85acbb8328a0da37cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vojt=C4=9Bch=20K=C3=A1n=C4=9B?= Date: Sat, 23 Jan 2021 05:07:28 +0100 Subject: [PATCH] date and place validation regex --- validator/validator.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/validator/validator.py b/validator/validator.py index a11a8b0..3b8d9df 100644 --- a/validator/validator.py +++ b/validator/validator.py @@ -30,7 +30,7 @@ def process_file(f): elif ext == '.docx': return process_docx(f) -court_pat = re.compile(r"(okresní|krajský|vrchní|nejvyšší(\s+správní\S*)|ústavní)\S*\s+soud\S*(\s+ve?)?\s+(\S+)", flags=re.IGNORECASE) +court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE) def validate_court(lawsuit: str): return court_pat.search(lawsuit) @@ -48,8 +48,9 @@ def validate_intent(): def validate_signature(): pass -def validate_date(): - pass +date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE) +def validate_date_and_place(lawsuit: str): + return date_and_place_pat.search(lawsuit) def validate(text): pass @@ -57,4 +58,6 @@ def validate(text): # debug if __name__ == "__main__": import sys - print(validate_court(str(sys.stdin.read()))) + lawsuit = str(sys.stdin.read()) + print(validate_court(lawsuit)) + print(validate_date_and_place(lawsuit))