docconvert.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. #-*-coding:utf-8 -*-
  2. import re,os
  3. import json,uuid
  4. from bs4 import BeautifulSoup
  5. from win32com.client import Dispatch
  6. import pythoncom
  7. from upload_to_oss import TedOSS
  8. import threading
  9. import shutil
  10. #from win32com.client.dynamic import Dispatch, ERRORS_BAD_CONTEXT
  11. class DocxConverter(object):
  12. """
  13. """
  14. def __init__(self,docpath="test4.docx"):
  15. """
  16. """
  17. self.docpath = docpath
  18. self.oss = TedOSS()
  19. def upload_imgfiles(self,uuidhtml):
  20. """
  21. """
  22. imgroot = os.path.join(os.path.dirname(self.docpath))
  23. imgdir = os.path.join(imgroot,uuidhtml+".files")
  24. if os.path.exists(imgdir):
  25. for file in os.listdir(imgdir):
  26. imgfile = os.path.join(imgdir,file)
  27. ossfile = uuidhtml+".files/"+file
  28. self.oss.upload_from_local(imgfile,ossfile)
  29. #shutil.rmtree(imgdir)
  30. pythoncom.CoUninitialize()
  31. def docx2pdf(self):
  32. """
  33. """
  34. pythoncom.CoInitialize()
  35. self.word = Dispatch("Word.Application")
  36. self.word.Visible = 0
  37. print(self.docpath,33333333333)
  38. self.doc = self.word.Documents.Open(self.docpath)
  39. self.uuidhtml = str(uuid.uuid4())
  40. #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
  41. #html = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".html")
  42. #self.doc.SaveAs(html,10)
  43. pdffile = os.path.join(os.path.dirname(self.docpath),self.uuidhtml+".pdf")
  44. print(pdffile,1111111111111)
  45. self.doc.SaveAs(pdffile,17)
  46. #AvDoc = Dispatch("AcroExch.AVDoc")
  47. #try:
  48. # if AvDoc.Open(pdffile,""):
  49. # pdDoc = AvDoc.GetPDDoc()
  50. # jsObject = pdDoc.GetJSObject()
  51. # jsObject.SaveAs(html,"com.adobe.acrobat.html")
  52. #except Exception as e:
  53. # import traceback
  54. # traceback.print_exc()
  55. # AvDoc.close(True)
  56. #finally:
  57. # AvDoc.Close(True)
  58. self.doc.Close()
  59. self.word.Quit()
  60. os.remove(self.docpath)
  61. return pdffile
  62. if __name__ == "__main__":
  63. docpath = "E:\\WorkSpace\\docxconvert\\lasttest.docx"
  64. docxconverter = DocxConverter(docpath)
  65. docxconverter.docx2html()