Add pdf to text
This commit is contained in:
parent
3dee27732c
commit
c51386f195
@ -9,6 +9,8 @@ Jinja2==2.11.2
|
|||||||
joblib==1.0.0
|
joblib==1.0.0
|
||||||
MarkupSafe==1.1.1
|
MarkupSafe==1.1.1
|
||||||
nltk==3.5
|
nltk==3.5
|
||||||
|
pdftotext==2.1.5
|
||||||
|
PyPDF2==1.26.0
|
||||||
regex==2020.11.13
|
regex==2020.11.13
|
||||||
requests==2.25.1
|
requests==2.25.1
|
||||||
retrying==1.3.3
|
retrying==1.3.3
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
from flask import Flask, render_template, request
|
from flask import Flask, render_template, request, jsonify
|
||||||
import json
|
import json
|
||||||
import validator
|
import validator
|
||||||
|
|
||||||
|
@ -1,14 +1,29 @@
|
|||||||
|
import os
|
||||||
|
import pdftotext
|
||||||
|
|
||||||
def process_jpg(f):
|
def process_jpg(f):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def process_pdf(f):
|
def process_pdf(f):
|
||||||
pass
|
pdf = pdftotext.PDF(f)
|
||||||
|
aggregate = "\n\n".join(pdf)
|
||||||
|
return aggregate
|
||||||
|
|
||||||
def process_docx(f):
|
def process_docx(f):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def process_file(f):
|
def process_file(f):
|
||||||
pass
|
# TODO proper file format distinguishing, not only by suffix?
|
||||||
|
_, ext = os.path.splitext(f.filename)
|
||||||
|
print(ext)
|
||||||
|
if ext == '.jpg':
|
||||||
|
return process_jpg(f)
|
||||||
|
elif ext == '.pdf':
|
||||||
|
return process_pdf(f)
|
||||||
|
elif ext == '.docx':
|
||||||
|
return process_docx(f)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def validate_court(lawsuit):
|
def validate_court(lawsuit):
|
||||||
pass
|
pass
|
||||||
|
Loading…
Reference in New Issue
Block a user