如何解决确定一个词是否在匹配项的 125 个词以内
我想遍历一个文档,对于每个单词,查看 250 个单词(后面 125 个,前面 125 个)内是否有匹配项。
如果有匹配项,请将其突出显示。某些词被排除在外。这些存储在字典中。
为了测试我正在使用的循环,
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
Debug.Print wrd & "----" & wrd.Start
Next wrd
Next para
问题:
“我讨厌到精神世界旅行”这句话中的“世界”,当我在寻找7时打印32。
我想做类似的事情:
If wrd < 125 Then
Set wrdRng = ActiveDocument.Range(Start:=wrd - 125,End:=ActiveDocument.Words(wrd + 125).End)
Else
Set wrdRng = ActiveDocument.Range(Start:=0,End:=ActiveDocument.Words(250 - wrd).End)
End if
编辑:
我使用的当前代码在大约 13 分钟内完成了 50,000 字文档的循环。这完全太长了。有人有更好的选择吗?
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each Para In ActiveDocument.Paragraphs
For Each wrd In Para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(McKnight)*\1>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
do while .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
Debug.Print wrd
Next wrd
Next Para
MinutesElapsed = Format((Timer - StartTime) / 86400,"hh:mm:ss")
MsgBox "This code ran successfully in " & MinutesElapsed & " minutes",vbinformation
End Sub
编辑:
Sub Demo()
Application.ScreenUpdating = False
Dim i As Long
Dim StartTime As Double
Dim MinutesElapsed As String
StartTime = Timer
For Each para In ActiveDocument.Paragraphs
For Each wrd In para.Range.Words
With ActiveDocument.Range
With .Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = "<(wrd)*\1>"
.Replacement.Text = ""
.Forward = True
.Wrap = wdFindStop
.Format = False
.MatchCase = False
.MatchWildcards = True
End With
do while .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
i = i + 1
.Words.First.HighlightColorIndex = wdBrightGreen
.Words.Last.HighlightColorIndex = wdBrightGreen
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Application.ScreenUpdating = True
'MsgBox i & " instances found."
'Debug.Print wrd
Next wrd
Next para
MinutesElapsed = Format((Timer - StartTime) / 86400,vbinformation
End Sub
解决方法
尝试以下操作。除其他外,它允许您指定要忽略的单词(例如介词、冠词等)。此外,不同的突出显示用于识别给定单词的所有“命中”。状态栏上会给出进度报告。在我的笔记本电脑上,一份 50,000 字的“lorem”文档大约需要 6:40。
Option Explicit
Dim ArrOut() As String
Sub Demo()
' Turn Off Screen Updating
Application.ScreenUpdating = False
Dim eTime As Single
' Start Timing
eTime = Timer
Dim wdDoc As Document,StrFnd As String,StrTmp As String,Rng As Range
Dim SBar As Boolean,bTrk As Boolean,h As Long,i As Long,j As Long
' Store current Status Bar status,then switch on
SBar = Application.DisplayStatusBar
Application.DisplayStatusBar = True
Set wdDoc = ActiveDocument
With wdDoc
' Store current Track Changes status,then switch off
bTrk = .TrackRevisions: .TrackRevisions = False
'Display status
Application.StatusBar = "Building word list"
'Compile the Find list
Call BuildWordList(.Range.Text)
With .Range.Find
.ClearFormatting
.Replacement.ClearFormatting
.Text = ""
.Replacement.Text = ""
.Format = False
.MatchCase = False
.MatchWholeWord = True
.Wrap = wdFindStop
.Execute
End With
End With
'Process all words in the concordance
For i = 0 To UBound(ArrOut())
StrFnd = ArrOut(i)
h = i Mod 14
If h < 6 Then
h = h + 2
Else
h = h + 3
End If
'Display current word
Application.StatusBar = "Processing: " & StrFnd
'Use wildcards,if possible,for extra speed
If Len(StrFnd) < 4 Then
StrTmp = ""
For j = 1 To Len(StrFnd)
StrTmp = StrTmp & "[" & UCase(Mid(StrFnd,j,1)) & Mid(StrFnd,1) & "]"
Next
StrFnd = StrTmp
With wdDoc.Range
With .Find
.MatchWildcards = True
.Text = "<(" & StrFnd & ")>*<(" & StrFnd & ")>"
.Forward = True
.Wrap = wdFindStop
End With
Do While .Find.Execute
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
.End = .End - Len(.Words.Last)
.Collapse wdCollapseEnd
Loop
End With
Else
With wdDoc.Range
With .Find
.MatchWildcards = False
.Text = StrFnd
.Forward = True
.Wrap = wdFindStop
.Execute
End With
Set Rng = .Duplicate
Do While .Find.Execute
Rng.End = .Duplicate.End
With Rng
If .ComputeStatistics(wdStatisticWords) < 100 Then
If .Words.First.HighlightColorIndex <> h Then .Words.First.HighlightColorIndex = h
.Words.Last.HighlightColorIndex = h
End If
End With
Set Rng = .Duplicate
.Collapse wdCollapseEnd
Loop
End With
End If
DoEvents
Next
' Restore original Track Changes status
wdDoc.TrackRevisions = bTrk
' Clear the Status Bar
Application.StatusBar = False
' Restore original Status Bar status
Application.DisplayStatusBar = SBar
' Restore Screen Updating
Application.ScreenUpdating = True
' Calculate elapsed time
eTime = (Timer - eTime + 86400) Mod 86400 ' Just in case execution time spans midnight
MsgBox "Execution took " & Format(eTime / 86400,"hh:mm:ss") & " to process"
End Sub
Sub BuildWordList(StrIn As String)
Dim StrFnd As String,j As Long,k As Long
'Define the exlusions list
Const StrExcl As String = "a,am,and,are,as,at,be,but,by,can,cm,did,do,does,eg," & _
"en,eq,etc,for,get,go,got,has,have,he,her,him,how,i,ie,if,in,into,is," & _
"it,its,me,mi,mm,my,na,nb,no,not,of,off,ok,on,one,or,our,out,re,she," & _
"so,the,their,them,they,t,to,was,we,were,who,will,would,yd,you,your"
'Strip out unwanted characters
For i = 1 To 255
Select Case i
Case 1 To 31,33 To 64,91 To 96,123 To 144,147 To 191,247
Do While InStr(StrIn,Chr(i)) > 0
StrIn = Replace(StrIn,Chr(i)," ")
Loop
End Select
Next
'Convert smart single quotes to plain single quotes & delete any at the start/end of a word
StrIn = Replace(Replace(Replace(Replace(StrIn,Chr(145),"'"),Chr(146),"' "," ")," '"," ")
'Convert to lowercase
StrIn = " " & LCase(StrIn) & " "
'Process the exclusions list
For i = 0 To UBound(Split(StrExcl,","))
StrFnd = " " & Split(StrExcl,")(i) & " "
Do While InStr(StrIn,StrFnd) > 0
StrIn = Replace(StrIn," " & Split(StrExcl,")(i) & " "," ")
Loop
Next
'Clean up any duplicate spaces
Do While InStr(StrIn," ") > 0
StrIn = Replace(StrIn," "," ")
Loop
i = 0
Do While UBound(Split(StrIn," ")) > 1
StrFnd = " " & Split(StrIn," ")(1) & " ": j = Len(StrIn)
'Find how many occurences of each word there are in the document
StrIn = Replace(StrIn,StrFnd," ")
k = (j - Len(StrIn)) / (Len(StrFnd) - 1)
'If there's more than one occurence,add the word to our Find list
If k > 1 Then
ReDim Preserve ArrOut(i)
ArrOut(i) = Trim(StrFnd)
i = i + 1
End If
Loop
WordBasic.SortArray ArrOut()
End Sub
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。