#-*-coding:utf-8 -*- import os htmlstr = "" def pdf2html(srcpdf,outdir="C:\\AppData\\say365\\htmp"): """ """ if not os.path.exists(outdir): os.makedirs(outdir) outsvg = os.path.join(outdir,srcpdf.split("\\")[-1]).replace(".pdf",".svg") tmphtml = os.path.join(outdir,srcpdf.split("\\")[-1]).replace(".pdf",".html") cmd = "E:\\WorkSpace\\docxconvert\\say365parser\pdf2vec.exe -useunicode 1 %s %s -winfont2" % (srcpdf,outsvg) os.system(cmd) with open(tmphtml,"w+") as f: #f.write(htmlstr) for name in os.listdir(outdir): tmpsvg = os.path.join(outdir,name) content = open(tmpsvg,"r").read() f.write(content) #f.write("") return tmphtml if __name__ == "__main__": srcpdf = "E:\\WorkSpace\\docxconvert\\ecb6c73b-78dc-4b66-83a0-ebf3803645b8.pdf" outdir = "C:\\AppData\\say365\\htmp" pdf2html(srcpdf,outdir)