如何解决Sub 和 Function 独立工作但不一起工作
这个问题是我发布的一个小系列的一部分,用于尝试抓取 https://echa.europa.eu/information-on-chemicals 的 brief profiles
该代码使用公共函数 GetUrl
() 来检索所需简要资料的网址。然后使用它但子程序 GetContents() 来抓取所需的物理和化学特性数据。
令人困惑的是,我收到了一个运行时错误 91。这很奇怪,因为 GetContents() 和 GetUrl() 在彼此独立时都可以工作。
有人不介意看看那会很棒吗。
Sub GetContents()
Dim xmlReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim SubSectList As MSHTML.IHTMLElement
Dim SubSects As MSHTML.IHTMLElementCollection
Dim SubSect As MSHTML.IHTMLElement
Url = GetUrl()
xmlReq.Open "Get",Url,False
xmlReq.send
If xmlReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & xmlReq.Status & " - " & xmlReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = xmlReq.responseText
Set SubSectList = HTMLDoc.getElementsByClassName("col-xs-12 col-lg-10 MainContent")(1)
Set SubSects = SubSectList.getElementsByTagName("dt")
For Each SubSect In SubSects
Debug.Print SubSect.innerText & " : "; SubSect.NextSibling.innerText
Next SubSect
End Sub
Public Function GetUrl() As String
Const Url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
Dim oHttp As Object,oHtml As HTMLDocument,MyDict As Object,I&,R&
Dim DictKey As Variant,payload$,searchKeyword$,Ws As Worksheet
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
'Keyword can Be any chemical usually set to a cell value i.e. Range("a1").Value
searchKeyword = "Acetone" '
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckBox") = "on"
payload = ""
For Each DictKey In MyDict
payload = IIf(Len(DictKey) = 0,WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)),_
payload & "&" & WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)))
Next DictKey
With oHttp
.Open "POST",False
.setRequestHeader "User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type","application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
Debug.Print oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
GetUrl = oHtml.querySelector("table.table > tbody > tr > td > a.substanceNameLink").getAttribute("href")
End Function
参考文献:
更新:特别奇怪的是,当在代码中给出一个确切的 url 时,下面的代码可以工作:
Sub GetContents()
Dim xmlReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim SubSectList As MSHTML.IHTMLElement
Dim SubSects As MSHTML.IHTMLElementCollection
Dim SubSect As MSHTML.IHTMLElement
xmlReq.Open "Get","https://echa.europa.eu/brief-profile/-/briefprofile/100.028.723",False
xmlReq.send
If xmlReq.Status <> 200 Then
MsgBox "Problem" & vbNewLine & xmlReq.Status & " - " & xmlReq.statusText
Exit Sub
End If
HTMLDoc.body.innerHTML = xmlReq.responseText
Set SubSectList = HTMLDoc.getElementsByClassName("col-xs-12 col-lg-10 MainContent")(1)
Set SubSects = SubSectList.getElementsByTagName("dt")
For Each SubSect In SubSects
Debug.Print SubSect.innerText & " : "; SubSect.NextSibling.innerText
Next SubSect
End Sub
然而替换
xmlReq.Open "Get",False
xmlReq.send
与
Url = GetUrl()
xmlReq.Open "Get",url,False
xmlReq.send
其中 GetUrl() 指的是上面的工作公共函数
导致代码中断..调试时Set SubSects = SubSectList.getElementsByTagName("dt")
是有问题的行。
运行答案中提供的代码时更新结果屏幕截图:
解决方法
您提取了错误的 url,并且该 URI 的 html 中没有 dt 元素。更改css选择器并简化如下:
Option Explicit
Public Sub GetContents()
Dim XMLReq As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument
XMLReq.Open "Get",GetUrl,False
XMLReq.send
HTMLDoc.body.innerHTML = XMLReq.responseText
Dim i As Long
With HTMLDoc.querySelectorAll(".EndpointContent dt")
For i = 0 To .Length - 1
Debug.Print .Item(i).innerText & " : " & .Item(i).NextSibling.NextSibling.innerText
Next
End With
End Sub
Public Function GetUrl() As String
Const url = "https://echa.europa.eu/search-for-chemicals?p_auth=5ayUnMyz&p_p_id=disssimplesearch_WAR_disssearchportlet&p_p_lifecycle=1&p_p_state=normal&p_p_col_id=_118_INSTANCE_UFgbrDo05Elj__column-1&p_p_col_count=1&_disssimplesearch_WAR_disssearchportlet_javax.portlet.action=doSearchAction&_disssimplesearch_WAR_disssearchportlet_backURL=https%3A%2F%2Fecha.europa.eu%2Finformation-on-chemicals%3Fp_p_id%3Ddisssimplesearchhomepage_WAR_disssearchportlet%26p_p_lifecycle%3D0%26p_p_state%3Dnormal%26p_p_mode%3Dview%26p_p_col_id%3D_118_INSTANCE_UFgbrDo05Elj__column-1%26p_p_col_count%3D1%26_disssimplesearchhomepage_WAR_disssearchportlet_sessionCriteriaId%3D"
Dim oHttp As Object,oHtml As HTMLDocument,MyDict As Object,i&,R&
Dim DictKey As Variant,payload$,searchKeyword$,Ws As Worksheet
Set oHtml = New HTMLDocument
Set oHttp = CreateObject("MSXML2.XMLHTTP")
Set MyDict = CreateObject("Scripting.Dictionary")
Set Ws = ThisWorkbook.Worksheets("Sheet1")
'Keyword can Be any chemical usually set to a cell value i.e. Range("a1").Value
searchKeyword = "Acetone" '
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_formDate") = "1621017052777" 'timestamp
MyDict("_disssimplesearch_WAR_disssearchportlet_searchOccurred") = "true"
MyDict("_disssimplesearch_WAR_disssearchportlet_sskeywordKey") = searchKeyword
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimer") = "true"
MyDict("_disssimplesearchhomepage_WAR_disssearchportlet_disclaimerCheckbox") = "on"
payload = ""
For Each DictKey In MyDict
payload = IIf(Len(DictKey) = 0,WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)),_
payload & "&" & WorksheetFunction.EncodeURL(DictKey) & "=" & WorksheetFunction.EncodeURL(MyDict(DictKey)))
Next DictKey
With oHttp
.Open "POST",url,False
.setRequestHeader "User-Agent","Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/84.0.4147.135 Safari/537.36"
.setRequestHeader "Content-type","application/x-www-form-urlencoded"
.send (payload)
oHtml.body.innerHTML = .responseText
End With
Debug.Print oHtml.querySelector(".briefProfileLink").getAttribute("href")
GetUrl = oHtml.querySelector(".briefProfileLink").getAttribute("href")
End Function
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。