#-*-coding:utf-8 -*- import re,os import json,uuid from bs4 import BeautifulSoup from win32com.client import Dispatch import pythoncom from upload_to_oss import TedOSS import threading import shutil #from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT class DocxConverter(object): """ """ def __init__(self,docpath="test4.docx"): """ """ self.docpath = docpath self.oss = TedOSS() def upload_imgfiles(self,uuidhtml): """ """ imgroot = os.path.join(os.path.dirname(self.docpath)) imgdir = os.path.join(imgroot,uuidhtml+".files") if os.path.exists(imgdir): for file in os.listdir(imgdir): imgfile = os.path.join(imgdir,file) ossfile = uuidhtml+".files/"+file self.oss.upload_from_local(imgfile,ossfile) #shutil.rmtree(imgdir) pythoncom.CoUninitialize() def docx2pdf(self): """ """ pythoncom.CoInitialize() self.word = Dispatch("Word.Application") self.word.Visible = 0 print(self.docpath,33333333333) self.doc = self.word.Documents.Open(self.docpath) self.uuidhtml = str(uuid.uuid4()) #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html") #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html") #self.doc.SaveAs(html,10) pdffile = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".pdf") print(pdffile,1111111111111) self.doc.SaveAs(pdffile,17) #AvDoc = Dispatch("AcroExch.AVDoc") #try: # if AvDoc.Open(pdffile,""): # pdDoc = AvDoc.GetPDDoc() # jsObject = pdDoc.GetJSObject() # jsObject.SaveAs(html,"com.adobe.acrobat.html") #except Exception as e: # import traceback # traceback.print_exc() # AvDoc.close(True) #finally: # AvDoc.Close(True) self.doc.Close() self.word.Quit() os.remove(self.docpath) return pdffile if __name__ == "__main__": docpath = "E:\\WorkSpace\\docxconvert\\lasttest.docx" docxconverter = DocxConverter(docpath) docxconverter.docx2html()