微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

如何从许多Word文件中将标题提取到Excel以使用VBA建立数据库

如何解决如何从许多Word文件中将标题提取到Excel以使用VBA建立数据库

我有数百个Word文件(docx),每个文件都有不同的标题,分别定义为标题1,标题2,标题3等。这些文件中的每个文件都有一个标题相对应的目录。我要做的是从每个文件提取每个标题,并将它们填充到Excel工作簿中以建立数据库。我的问题是我对VBA非常无能,尤其是在Word中。

我的第一个尝试是将标题仅从单个Word文档中提取到Excel工作簿中。我设法以某种方式在线找到了将Word的标题提取到Outlook的代码,并且还分离了将Word的标题提取到新的Word文件代码。对于我一生,我一直无法适应这两段代码来设法将Word的标题提取到Excel。上帝知道我已经尝试过。

如果有人可以帮助我,至少在步骤1中,我将不胜感激(将标题从单个Word文件提取到Excel-然后,我将尝试制定进一步的步骤)。这是我提到的这两段代码,如果它们相关的话:

Word to Outlook

Sub copyheadingsIntoOutlookMail()
    Dim objOutlookApp,objMail As Object
    Dim objMailDocument As Word.Document
    Dim objMailRange As Word.Range
    Dim varheadings As Variant
    Dim i As Long
    Dim strText As String
    Dim nLongDiff As Integer

    'Create a new Outlook email
    Set objOutlookApp = CreateObject("outlook.application")
    Set objMail = objOutlookApp.CreateItem(olMailItem)
    objMail.display
    Set objMailDocument = objMail.GetInspector.WordEditor
    Set objMailRange = objMailDocument.Range(0,0)
 
    'Get the headings of the current Word document
    varheadings = ActiveDocument.GetCrossReferenceItems(wdRefTypeheading)

    For i = LBound(varheadings) To UBound(varheadings)
        strText = Trim(varheadings(i))
 
        'Get the heading level
        nLongDiff = Len(RTrim$(CStr(varheadings(i)))) - Len(Trim(CStr(varheadings(i))))
        nheadingLevel = (nLongDiff / 2) + 1
 
        'Insert the heading into the Outlook mail
        With objMailRange
             .InsertAfter strText & vbNewLine
             .Style = "heading " & nheadingLevel
             .Collapse wdCollapseEnd
        End With
    Next i
End Sub

词对词


Public Sub CreateOutline()
    Dim docOutline As Word.Document
    Dim docSource As Word.Document
    Dim rng As Word.Range
    
    Dim astrheadings As Variant
    Dim strText As String
    Dim intLevel As Integer
    Dim intItem As Integer
        
    Set docSource = ActiveDocument
    Set docOutline = Documents.Add
    
    ' Content returns only the
    ' main body of the document,not
    ' the headers and footer.
    Set rng = docOutline.Content
    astrheadings = _
     docSource.GetCrossReferenceItems(wdRefTypeheading)
    
    For intItem = LBound(astrheadings) To UBound(astrheadings)
        ' Get the text and the level.
        strText = Trim$(astrheadings(intItem))
        intLevel = GetLevel(CStr(astrheadings(intItem)))
        
        ' Add the text to the document.
        rng.InsertAfter strText & vbNewLine
        
        ' Set the style of the selected range and
        ' then collapse the range for the next entry.
        rng.Style = "heading " & intLevel
        rng.Collapse wdCollapseEnd
    Next intItem
End Sub

Private Function GetLevel(strItem As String) As Integer
    ' Return the heading level of a header from the
    ' array returned by Word.
    
    ' The number of leading spaces indicates the
    ' outline level (2 spaces per level: H1 has
    ' 0 spaces,H2 has 2 spaces,H3 has 4 spaces.
        
    Dim strTemp As String
    Dim strOriginal As String
    Dim intDiff As Integer
    
    ' Get rid of all trailing spaces.
    strOriginal = RTrim$(strItem)
    
    ' Trim leading spaces,and then compare with
    ' the original.
    strTemp = LTrim$(strOriginal)
    
    ' Subtract to find the number of
    ' leading spaces in the original string.
    intDiff = Len(strOriginal) - Len(strTemp)
    GetLevel = (intDiff / 2) + 1
End Function

解决方法

尝试以下Excel宏。运行它时,只需选择要处理的文件夹即可。

Sub GetTOCHeadings()
'Note: this code requires a reference to the Word object model.
'See under the VBE's Tools|References.
Application.ScreenUpdating = False
Dim wdDoc As Word.Document,wdRng As Word.Range,wdPara As Word.Paragraph
Dim strFolder As String,strFile As String
Dim WkSht As Worksheet,i As Long,j As Long
strFolder = GetFolder
If strFolder = "" Then Exit Sub
Dim wdApp As New Word.Application
Set WkSht = ActiveSheet
i = WkSht.Cells(WkSht.Rows.Count,1).End(xlUp).Row
wdApp.WordBasic.DisableAutoMacros
wdApp.DisplayAlerts = wdAlertsNone
strFile = Dir(strFolder & "\*.doc",vbNormal)
While strFile <> ""
  i = i + 1
  Set wdDoc = wdApp.Documents.Open(Filename:=strFolder & "\" & strFile,AddToRecentFiles:=False,Visible:=False)
  With wdDoc
    j = 1: WkSht.Cells(i,j) = strFile
    If .TablesOfContents.Count > 0 Then
      With .TablesOfContents(1)
        .IncludePageNumbers = False
        .Update
        Set wdRng = .Range
      End With
      With wdRng
        .Fields(1).Unlink
        For Each wdPara In .Paragraphs
          j = j + 1
          WkSht.Cells(i,j).Value = Replace(wdPara.Range.Text,vbTab," ")
        Next
      End With
    End If
    .Close SaveChanges:=False
  End With
  strFile = Dir()
Wend
wdApp.DisplayAlerts = wdAlertsAll
wdApp.Quit
Set wdDoc = Nothing: Set wdApp = Nothing: Set WkSht = Nothing
Application.ScreenUpdating = True
End Sub
 
Function GetFolder() As String
    Dim oFolder As Object
    GetFolder = ""
    Set oFolder = CreateObject("Shell.Application").BrowseForFolder(0,"Choose a folder",0)
    If (Not oFolder Is Nothing) Then GetFolder = oFolder.Items.Item.Path
    Set oFolder = Nothing
End Function

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。