date and place validation regex

This commit is contained in:
Vojtěch Káně 2021-01-23 05:07:28 +01:00
parent bd66615f61
commit f55d90400a

View File

@ -30,7 +30,7 @@ def process_file(f):
elif ext == '.docx': elif ext == '.docx':
return process_docx(f) return process_docx(f)
court_pat = re.compile(r"(okresní|krajský|vrchní|nejvyšší(\s+správní\S*)|ústavní)\S*\s+soud\S*(\s+ve?)?\s+(\S+)", flags=re.IGNORECASE) court_pat = re.compile(r"\b(?:okresní|krajský|vrchní|nejvyšší(?:\s+správní\w*)|ústavní)\w*\s+soud\w*(?:\s+ve?)?\s+((?:\w|\s)+)", flags=re.IGNORECASE)
def validate_court(lawsuit: str): def validate_court(lawsuit: str):
return court_pat.search(lawsuit) return court_pat.search(lawsuit)
@ -48,8 +48,9 @@ def validate_intent():
def validate_signature(): def validate_signature():
pass pass
def validate_date(): date_and_place_pat = re.compile(r"\bve?\s+[^\n]+(?:\s|[.,\-–—:])+(?:dne)?(?:\s|[.,\-–—:])+\d+\.", flags=re.IGNORECASE)
pass def validate_date_and_place(lawsuit: str):
return date_and_place_pat.search(lawsuit)
def validate(text): def validate(text):
pass pass
@ -57,4 +58,6 @@ def validate(text):
# debug # debug
if __name__ == "__main__": if __name__ == "__main__":
import sys import sys
print(validate_court(str(sys.stdin.read()))) lawsuit = str(sys.stdin.read())
print(validate_court(lawsuit))
print(validate_date_and_place(lawsuit))