FREE PYTHON CODES: July 2016

Friday, 22 July 2016

Python code to convert .docx , .doc and .pdf files to .txt files

from os import chdir, getcwd, listdir, path

from time import strftime

from win32com import client

import os

import pyPdf



def count_files(filetype):

   

    count_files = 0

    for files in listdir(folder):

        if files.endswith(filetype):

            count_files += 1

    return count_files



def check_path(prompt):

    ''' (str) -> str

    Verifies if the provided absolute path does exist.

    '''

    abs_path = raw_input(prompt)

    while path.exists(abs_path) != True:

        print "\nThe specified path does not exist.\n"

        abs_path = raw_input(prompt)

    return abs_path   

   

print "\n"


folder = check_path("Provide absolute path for the folder: ")




chdir(folder)




num_docx = count_files(".docx")

num_doc = count_files(".doc")

num_pdf=count_files(".pdf")




if num_docx + num_doc +num_pdf  == 0:

    print "\nThe specified folder does not contain docx,doc or pdf files.\n"

    print strftime("%H:%M:%S"), "There are no files to convert. BYE, BYE!."

    exit()

else:

    print "\nNumber of doc,docx and pdf files: ", num_docx + num_doc +num_pdf, "\n"

    print strftime("%H:%M:%S"), "Starting to convert files ...\n"



try:

    word = client.DispatchEx("Word.Application")

    for files in listdir(getcwd()):

        if files.endswith(".docx"):

            new_name = files.replace(".docx", r".txt")

            in_file = path.abspath(folder + "\\" + files)

            new_file = path.abspath(folder + "\\" + new_name)

            doc = word.Documents.Open(in_file)

            print strftime("%H:%M:%S"), " docx -> txt ", path.relpath(new_file)

           

            doc.SaveAs(new_file, FileFormat = 2)

            doc.Close()

        if files.endswith(".doc"):

            new_name = files.replace(".doc", r".txt")

            in_file = path.abspath(folder + "\\" + files)

            new_file = path.abspath(folder + "\\" + new_name)

            doc = word.Documents.Open(in_file)

            print strftime("%H:%M:%S"), " doc  -> txt ", path.relpath(new_file)

            doc.SaveAs(new_file, FileFormat = 2)

            doc.Close()





       



           

except Exception, e:

    print e

finally:

    word.Quit()


list=[]

directory=folder

for root,dirs,files in os.walk(directory):

    for filename in files:

        if filename.endswith('.pdf'):

            t=os.path.join(directory,filename)

            list.append(t)


m=len(list)

i=0

while i<=len(list):

    path=list[i]

    head,tail=os.path.split(path)

    var="\\"

   

    tail=tail.replace(".pdf",".txt")

    name=head+var+tail

   

   

    

    content = ""

    # Load PDF into pyPDF

    pdf = pyPdf.PdfFileReader(file(path, "rb"))

    # Iterate pages

    for i in range(0, pdf.getNumPages()):

        # Extract text from page and add to content

        content += pdf.getPage(i).extractText() + "\n"

    print strftime("%H:%M:%S"), " pdf  -> txt "

    f=open(name,'w')

    f.write(content.encode("UTF-8"))

    f.close


print "\n", strftime("%H:%M:%S"), "Finished converting .doc .docx and .pdf files."





# Count the number of txt files.


num_txt = count_files(".txt")  


print "\nNumber of txt files: ", num_txt

Thursday, 7 July 2016

Python code to convert all pdf files to .txt files in a folder

 import os

from os import chdir, getcwd, listdir, path

import pyPdf

from time import strftime


def check_path(prompt):

    ''' (str) -> str

    Verifies if the provided absolute path does exist.

    '''

    abs_path = raw_input(prompt)

    while path.exists(abs_path) != True:

        print "\nThe specified path does not exist.\n"

        abs_path = raw_input(prompt)

    return abs_path   

   

print "\n"


folder = check_path("Provide absolute path for the folder: ")


list=[]

directory=folder

for root,dirs,files in os.walk(directory):

    for filename in files:

        if filename.endswith('.pdf'):

            t=os.path.join(directory,filename)

            list.append(t)


m=len(list)

i=0

while i<=len(list):

    path=list[i]

    head,tail=os.path.split(path)

    var="\\"

   

    tail=tail.replace(".pdf",".txt")

    name=head+var+tail

   

   

    

    content = ""

    # Load PDF into pyPDF

    pdf = pyPdf.PdfFileReader(file(path, "rb"))

    # Iterate pages

    for i in range(0, pdf.getNumPages()):

        # Extract text from page and add to content

        content += pdf.getPage(i).extractText() + "\n"

    print strftime("%H:%M:%S"), " pdf  -> txt "

    f=open(name,'w')

    f.write(content.encode("UTF-8"))

    f.close

Simple animation loop in python

Click here to download the python file

Build A Salesforce To Sell For Your Business With Monkey Business

For Python version 3 users follow the code given below.
for the code to work you have to install pypdf2
by using ----------> pip install pypdf2

For converting html files to text files Click here

import os

from os import chdir, getcwd, listdir, path

import PyPDF2

from time import strftime


def check_path(prompt):

    ''' (str) -> str

    Verifies if the provided absolute path does exist.

    '''

    abs_path = input(prompt)

    while path.exists(abs_path) != True:

        print ("\nThe specified path does not exist.\n")

        abs_path = input(prompt)

    return abs_path   

   

print ("\n")


folder = check_path("Provide absolute path for the folder: ")


list=[]

directory=folder

for root,dirs,files in os.walk(directory):

    for filename in files:

        if filename.endswith('.pdf'):

            t=os.path.join(directory,filename)

            list.append(t)




for item in list:
    path=item

    head,tail=os.path.split(path)

    var="\\"

   

    tail=tail.replace(".pdf",".txt")

    name=head+var+tail

    

   

   

    

    content = ""

    

    pdf = PyPDF2.PdfFileReader(path, "rb")

    

    for i in range(0, pdf.getNumPages()):

        

        content += pdf.getPage(i).extractText() + "\n"
        

    print (strftime("%H:%M:%S"), " pdf  -> txt ")

    with open(name,'a') as out:
        out.write(content

FREE PYTHON CODES

a

Friday, 22 July 2016

Thursday, 7 July 2016

Blog Archive

About Me