diff --git a/validator/requirements.txt b/validator/requirements.txt
index fe30b93..cb7a7d2 100644
--- a/validator/requirements.txt
+++ b/validator/requirements.txt
@@ -1,6 +1,7 @@
 certifi==2020.12.5
 chardet==4.0.0
 click==7.1.2
+docx2txt==0.8
 Flask==1.1.2
 geneea-nlp-client==1.2.0
 idna==2.10
@@ -10,7 +11,8 @@ joblib==1.0.0
 MarkupSafe==1.1.1
 nltk==3.5
 pdftotext==2.1.5
-PyPDF2==1.26.0
+Pillow==8.1.0
+pytesseract==0.3.7
 regex==2020.11.13
 requests==2.25.1
 retrying==1.3.3
diff --git a/validator/validator.py b/validator/validator.py
index c4620c8..0047789 100644
--- a/validator/validator.py
+++ b/validator/validator.py
@@ -1,8 +1,12 @@
 import os
 import pdftotext
+import docx2txt
+import pytesseract
+from PIL import Image
 
 def process_jpg(f):
-    pass
+    text = pytesseract.image_to_string(Image.open(f), lang="ces")
+    return text
 
 def process_pdf(f):
     pdf = pdftotext.PDF(f)
@@ -10,7 +14,9 @@ def process_pdf(f):
     return aggregate
 
 def process_docx(f):
-    pass
+    # TODO weird output with many spaces
+    return docx2txt.process(f)
+    
 
 def process_file(f):
     # TODO proper file format distinguishing, not only by suffix?
@@ -21,9 +27,7 @@ def process_file(f):
     elif ext == '.pdf':
         return process_pdf(f)
     elif ext == '.docx':
-        return process_docx(f)
-
-    
+        return process_docx(f)    
 
 def validate_court(lawsuit):
     pass