微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

确定一个词是否在匹配项的 125 个词以内

如何解决确定一个词是否在匹配项的 125 个词以内

我想遍历一个文档,对于每个单词,查看 250 个单词(后面 125 个,前面 125 个)内是否有匹配项。

如果有匹配项,请将其突出显示。某些词被排除在外。这些存储在字典中。

为了测试我正在使用的循环,

For Each para In ActiveDocument.Paragraphs
    For Each wrd In para.Range.Words
        Debug.Print wrd & "----" & wrd.Start
    Next wrd
Next para

问题:

“我讨厌到精神世界旅行”这句话中的“世界”,当我在寻找7时打印32。

我想做类似的事情:

If wrd < 125 Then
    Set wrdRng = ActiveDocument.Range(Start:=wrd - 125,End:=ActiveDocument.Words(wrd + 125).End)
Else 
    Set wrdRng = ActiveDocument.Range(Start:=0,End:=ActiveDocument.Words(250 - wrd).End)
End if

编辑:

我使用的当前代码在大约 13 分钟内完成了 50,000 字文档的循环。这完全太长了。有人有更好的选择吗?

Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String


StartTime = Timer

For Each Para In ActiveDocument.Paragraphs
For Each wrd In Para.Range.Words
With ActiveDocument.Range
  With .Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Text = "<(McKnight)*\1>"
    .Replacement.Text = ""
    .Forward = True
    .Wrap = wdFindStop
    .Format = False
    .MatchCase = False
    .MatchWildcards = True
  End With
  do while .Find.Execute
    If .ComputeStatistics(wdStatisticWords) < 100 Then
      i = i + 1
      .Words.First.HighlightColorIndex = wdBrightGreen
      .Words.Last.HighlightColorIndex = wdBrightGreen
    End If
    .End = .End - Len(.Words.Last)
    .Collapse wdCollapseEnd
  Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
Debug.Print wrd
    Next wrd
Next Para
MinutesElapsed = Format((Timer - StartTime) / 86400,"hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes",vbinformation

End Sub

编辑:

Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String


StartTime = Timer

For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
With ActiveDocument.Range
  With .Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Text = "<(wrd)*\1>"
    .Replacement.Text = ""
    .Forward = True
    .Wrap = wdFindStop
    .Format = False
    .MatchCase = False
    .MatchWildcards = True
  End With
  do while .Find.Execute
    If .ComputeStatistics(wdStatisticWords) < 100 Then
      i = i + 1
      .Words.First.HighlightColorIndex = wdBrightGreen
      .Words.Last.HighlightColorIndex = wdBrightGreen
    End If
    .End = .End - Len(.Words.Last)
    .Collapse wdCollapseEnd
  Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
'Debug.Print wrd
    Next wrd
Next para
MinutesElapsed = Format((Timer - StartTime) / 86400,vbinformation

End Sub

解决方法

尝试以下操作。除其他外,它允许您指定要忽略的单词(例如介词、冠词等)。此外,不同的突出显示用于识别给定单词的所有“命中”。状态栏上会给出进度报告。在我的笔记本电脑上,一份 50,000 字的“lorem”文档大约需要 6:40。

Option Explicit
Dim ArrOut() As String

Sub Demo()
' Turn Off Screen Updating
Application.ScreenUpdating = False
Dim eTime As Single
' Start Timing
eTime = Timer
Dim wdDoc As Document,StrFnd As String,StrTmp As String,Rng As Range
Dim SBar As Boolean,bTrk As Boolean,h As Long,i As Long,j As Long
' Store current Status Bar status,then switch on
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set wdDoc = ActiveDocument
With wdDoc
  ' Store current Track Changes status,then switch off
  bTrk = .TrackRevisions: .TrackRevisions = False
  'Display status
   Application.StatusBar = "Building word list"
  'Compile the Find list
  Call BuildWordList(.Range.Text)
  With .Range.Find
    .ClearFormatting
    .Replacement.ClearFormatting
    .Text = ""
    .Replacement.Text = ""
    .Format = False
    .MatchCase = False
    .MatchWholeWord = True
    .Wrap = wdFindStop
    .Execute
  End With
End With
'Process all words in the concordance
For i = 0 To UBound(ArrOut())
  StrFnd = ArrOut(i)
  h = i Mod 14
  If h < 6 Then
    h = h + 2
  Else
    h = h + 3
  End If
  'Display current word
  Application.StatusBar = "Processing: " & StrFnd
  'Use wildcards,if possible,for extra speed
  If Len(StrFnd) < 4 Then
    StrTmp = ""
    For j = 1 To Len(StrFnd)
      StrTmp = StrTmp & "[" & UCase(Mid(StrFnd,j,1)) & Mid(StrFnd,1) & "]"
    Next
    StrFnd = StrTmp
    With wdDoc.Range
      With .Find
        .MatchWildcards = True
        .Text = "<(" & StrFnd & ")>*<(" & StrFnd & ")>"
        .Forward = True
        .Wrap = wdFindStop
      End With
      Do While .Find.Execute
        If .ComputeStatistics(wdStatisticWords) < 100 Then
          If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
          .Words.Last.HighlightColorIndex = h
        End If
        .End = .End - Len(.Words.Last)
        .Collapse wdCollapseEnd
      Loop
    End With
  Else
    With wdDoc.Range
      With .Find
        .MatchWildcards = False
        .Text = StrFnd
        .Forward = True
        .Wrap = wdFindStop
        .Execute
      End With
      Set Rng = .Duplicate
      Do While .Find.Execute
        Rng.End = .Duplicate.End
        With Rng
          If .ComputeStatistics(wdStatisticWords) < 100 Then
            If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
            .Words.Last.HighlightColorIndex = h
          End If
        End With
        Set Rng = .Duplicate
        .Collapse wdCollapseEnd
      Loop
    End With
  End If
  DoEvents
Next
' Restore original Track Changes status
wdDoc.TrackRevisions = bTrk
' Clear the Status Bar
Application.StatusBar = False
' Restore original Status Bar status
Application.DisplayStatusBar = SBar
' Restore Screen Updating
Application.ScreenUpdating = True
' Calculate elapsed time
eTime = (Timer - eTime + 86400) Mod 86400 ' Just in case execution time spans midnight
MsgBox "Execution took " & Format(eTime / 86400,"hh:mm:ss") & " to process"
End Sub

Sub BuildWordList(StrIn As String)
Dim StrFnd As String,j As Long,k As Long
'Define the exlusions list
Const StrExcl As String = "a,am,and,are,as,at,be,but,by,can,cm,did,do,does,eg," & _
          "en,eq,etc,for,get,go,got,has,have,he,her,him,how,i,ie,if,in,into,is," & _
          "it,its,me,mi,mm,my,na,nb,no,not,of,off,ok,on,one,or,our,out,re,she," & _
          "so,the,their,them,they,t,to,was,we,were,who,will,would,yd,you,your"
 'Strip out unwanted characters
For i = 1 To 255
  Select Case i
    Case 1 To 31,33 To 64,91 To 96,123 To 144,147 To 191,247
    Do While InStr(StrIn,Chr(i)) > 0
      StrIn = Replace(StrIn,Chr(i)," ")
    Loop
  End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn,Chr(145),"'"),Chr(146),"' "," ")," '"," ")
'Convert to lowercase
StrIn = " " & LCase(StrIn) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl,","))
  StrFnd = " " & Split(StrExcl,")(i) & " "
  Do While InStr(StrIn,StrFnd) > 0
    StrIn = Replace(StrIn," " & Split(StrExcl,")(i) & " "," ")
  Loop
Next
'Clean up any duplicate spaces
Do While InStr(StrIn,"  ") > 0
  StrIn = Replace(StrIn,"  "," ")
Loop
i = 0
Do While UBound(Split(StrIn," ")) > 1
  StrFnd = " " & Split(StrIn," ")(1) & " ": j = Len(StrIn)
  'Find how many occurences of each word there are in the document
  StrIn = Replace(StrIn,StrFnd," ")
  k = (j - Len(StrIn)) / (Len(StrFnd) - 1)
  'If there's more than one occurence,add the word to our Find list
  If k > 1 Then
    ReDim Preserve ArrOut(i)
    ArrOut(i) = Trim(StrFnd)
    i = i + 1
  End If
Loop
WordBasic.SortArray ArrOut()
End Sub

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。