123456789101112131415161718192021222324252627 |
- #-*-coding:utf-8 -*-
- import os
- htmlstr = "<html><head></head><body>"
- def pdf2html(srcpdf,outdir="C:\\AppData\\say365\\htmp"):
- """
- """
- if not os.path.exists(outdir):
- os.makedirs(outdir)
- outsvg = os.path.join(outdir,srcpdf.split("\\")[-1]).replace(".pdf",".svg")
- tmphtml = os.path.join(outdir,srcpdf.split("\\")[-1]).replace(".pdf",".html")
- cmd = "E:\\WorkSpace\\docxconvert\\say365parser\pdf2vec.exe -useunicode 1 %s %s -winfont2" % (srcpdf,outsvg)
- os.system(cmd)
- with open(tmphtml,"w+") as f:
- #f.write(htmlstr)
- for name in os.listdir(outdir):
- tmpsvg = os.path.join(outdir,name)
- content = open(tmpsvg,"r").read()
-
- f.write(content)
- #f.write("</body></html>")
- return tmphtml
-
-
- if __name__ == "__main__":
- srcpdf = "E:\\WorkSpace\\docxconvert\\ecb6c73b-78dc-4b66-83a0-ebf3803645b8.pdf"
- outdir = "C:\\AppData\\say365\\htmp"
- pdf2html(srcpdf,outdir)
|