1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374 |
- #-*-coding:utf-8 -*-
- import re,os
- import json,uuid
- from bs4 import BeautifulSoup
- from win32com.client import Dispatch
- import pythoncom
- from upload_to_oss import TedOSS
- import threading
- import shutil
- #from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT
- class DocxConverter(object):
- """
- """
- def __init__(self,docpath="test4.docx"):
- """
- """
- self.docpath = docpath
- self.oss = TedOSS()
-
- def upload_imgfiles(self,uuidhtml):
- """
- """
- imgroot = os.path.join(os.path.dirname(self.docpath))
- imgdir = os.path.join(imgroot,uuidhtml+".files")
- if os.path.exists(imgdir):
- for file in os.listdir(imgdir):
- imgfile = os.path.join(imgdir,file)
- ossfile = uuidhtml+".files/"+file
- self.oss.upload_from_local(imgfile,ossfile)
- #shutil.rmtree(imgdir)
- pythoncom.CoUninitialize()
-
- def docx2pdf(self):
- """
- """
- pythoncom.CoInitialize()
- self.word = Dispatch("Word.Application")
- self.word.Visible = 0
- print(self.docpath,33333333333)
- self.doc = self.word.Documents.Open(self.docpath)
- self.uuidhtml = str(uuid.uuid4())
- #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
- #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
- #self.doc.SaveAs(html,10)
-
- pdffile = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".pdf")
- print(pdffile,1111111111111)
- self.doc.SaveAs(pdffile,17)
-
- #AvDoc = Dispatch("AcroExch.AVDoc")
- #try:
- # if AvDoc.Open(pdffile,""):
- # pdDoc = AvDoc.GetPDDoc()
- # jsObject = pdDoc.GetJSObject()
- # jsObject.SaveAs(html,"com.adobe.acrobat.html")
- #except Exception as e:
- # import traceback
- # traceback.print_exc()
- # AvDoc.close(True)
- #finally:
- # AvDoc.Close(True)
-
- self.doc.Close()
- self.word.Quit()
- os.remove(self.docpath)
-
- return pdffile
- if __name__ == "__main__":
- docpath = "E:\\WorkSpace\\docxconvert\\lasttest.docx"
- docxconverter = DocxConverter(docpath)
- docxconverter.docx2html()
|