diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3abf6de --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.txt +*.log diff --git a/google-ocr.py b/google-ocr.py index 796478a..665a205 100644 --- a/google-ocr.py +++ b/google-ocr.py @@ -2,13 +2,16 @@ import os +types = ['*.jpg','*.png','*.pdf'] files = [] -for filename in glob.glob('*.jpg'): - files.append(filename) -for image in sorted(files): - print "uploading " + image - command = "gdput.py -t ocr " + image + " > result.log" +for type_ in types: + for filename in glob.glob(type_): + files.append(filename) + +for file in sorted(files): + print "uploading " + file + command = "gdput.py -t ocr " + file + " > result.log" print "running " + command os.system(command) @@ -17,7 +20,7 @@ for line in resultfile: if "id:" in line: fileid = line.split(":")[1].strip() - filename = image.split(".")[0] + ".txt" + filename = file.split(".")[0] + ".txt" get_command = "gdget.py -f txt -s " + filename + " " + fileid print "running "+ get_command os.system(get_command) diff --git a/sample1.jpg b/sample1.jpg new file mode 100644 index 0000000..131d182 Binary files /dev/null and b/sample1.jpg differ diff --git a/sample2.pdf b/sample2.pdf new file mode 100644 index 0000000..2987c1f Binary files /dev/null and b/sample2.pdf differ