微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

将HTML文档的javascript部分中的字段提取到表中?地理坐标

我有一个HTML文档,其中包含Javascript块中的地理信息.它是此网页的源代码https://energy.ehawaii.gov/epd/public/energy-projects-map.html

这可以被视为地图,也可以视为列表.

我想要实现的是在Excel中使用该列表,但是具有“纬度”字段和“经度”字段. Google地图标记指定Javascript中的LatLng.

我如何使用VB之类的东西来处理HTML文件的源代码,并组织成一个包含以下字段/列的表:

>描述(来自< a ... title =“等等”>)
>技术(来自< p>技术:Solar< / p>例如)
>纬度(来自google.maps.LatLng(纬度,经度);
> Longtitude(来自与纬度相同的代码行,但使用第二个变量)?

所有帮助赞赏!

解决方法

尝试基于XMLHTTP请求的此VBScript解决方案.只需复制下面的代码,粘贴到文本文件,将其保存为.vbs并运行它.脚本尚未优化,所有请求都不是异步,因此我的PC上需要大约40秒来获取所有数据.
Option Explicit
Dim arrCells(),arrList,arrTmp,sRespHeaders,sRespText,arrSetHeaders,i,j,iTotal,oApp,oWB,oWS,oOutput

' Create output window
Output oOutput

' Get cookies
oOutput.write "Get cookies"
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-map.html",Array(),sRespText
ParseResponse "^Set-(Cookie): ([\S]*?=[\S]*?);[\s\S]*?$",arrSetHeaders

' Get project list
oOutput.write "Get project list"
arrList = Array()
XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-projects-list.json?sEcho=2&iColumns=5&sColumns=&idisplayStart=1&idisplayLength=0&mDataProp_0=0&mDataProp_1=1&mDataProp_2=2&mDataProp_3=3&mDataProp_4=4&sSearch=&bRegex=false&sSearch_0=&bRegex_0=false&bSearchable_0=true&sSearch_1=&bRegex_1=false&bSearchable_1=true&sSearch_2=&bRegex_2=false&bSearchable_2=true&sSearch_3=&bRegex_3=false&bSearchable_3=true&sSearch_4=&bRegex_4=false&bSearchable_4=true&iSortCol_0=0&sSortDir_0=asc&iSortingCols=1&bSortable_0=true&bSortable_1=true&bSortable_2=true&bSortable_3=true&bSortable_4=true","",sRespText
ParseProjects sRespText,iTotal
oOutput.write "Get project list: " & (UBound(arrList) + 1) & " of " & iTotal

' Rearrange to 2-dimensional array,get LatLng
ReDim arrCells(UBound(arrList),8) ' Name,Technology,Island,Capacity,Location,RID,Type,Lat,Lng
For i = 0 To UBound(arrList)
    For j = 0 To 6
        arrCells(i,j) = arrList(i)(j)
    Next
    oOutput.write "Get LatLng: " & (i + 1) & " of " & iTotal
    arrTmp = RequestLatLng(arrList(i)(5))
    arrCells(i,7) = arrTmp(0)
    arrCells(i,8) = arrTmp(1)
Next

' Create Excel worksheet,output data
oOutput.write "Export to Excel"
Set oApp = CreateObject("Excel.Application")
oApp.Visible = True
Set oWB = oApp.Workbooks.Add(-4167) ' xlWBATWorksheet
Set oWS = oWB.Worksheets(1)
oWS.Range(oWS.Cells(1,1),oWS.Cells(UBound(arrCells) + 1,9)).Value = arrCells
oWS.Columns.AutoFit
oWB.Saved = True
oOutput.write "Completed"

Sub XmlHttpGet(sQuery,sRespText)
    Dim arrHeader
    With CreateObject("MSXML2.ServerXMLHTTP")
        .Setoption 2,13056 ' SXH_SERVER_CERT_IGnorE_ALL_SERVER_ERRORS
        .Open "GET",sQuery,False
        For Each arrHeader In arrSetHeaders
            .SetRequestHeader arrHeader(0),arrHeader(1)
        Next
        .Send ""
        sRespHeaders = .GetAllResponseHeaders
        sRespText = .ResponseText
    End With
End Sub

Sub ParseResponse(sPattern,sResponse,aData)
    Dim oMatch,aTmp,sSubMatch
    aData = Array()
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .Pattern = sPattern
        For Each oMatch In .Execute(sResponse)
            If oMatch.SubMatches.Count = 1 Then
                PushItem aData,oMatch.SubMatches(0)
            Else
                aTmp = Array()
                For Each sSubMatch In oMatch.SubMatches
                    PushItem aTmp,sSubMatch
                Next
                PushItem aData,aTmp
            End If
        Next
    End With
End Sub

Sub PushItem(aList,vItem)
    ReDim Preserve aList(UBound(aList) + 1)
    aList(UBound(aList)) = vItem
End Sub

Sub ParseProjects(sJson,arrProj,iTotalRecords)
    Dim i,q
    With CreateObject("htmlfile")
        With .parentwindow
            .execscript ";","jscript"
            .eval ("json = " & sJson & ";")
            iTotalRecords = CInt(.json.iTotalRecords)
            do while .json.aaData.Length
                ReDim Preserve arrProj(UBound(arrProj) + 1)
                With .json.aaData.Shift()
                    arrProj(UBound(arrProj)) = Array(.Shift(),.Shift(),.Shift())
                End With
            Loop
        End With
    End With
End Sub

Function RequestLatLng(sRid)
    Dim sRespText,sTmp
    XmlHttpGet "https://energy.ehawaii.gov/epd/public/energy-project-details.html?rid=" & sRid,sRespText
    arrTmp = Split(sRespText,"google.maps.LatLng(")
    If UBound(arrTmp) >= 1 Then
        sTmp = arrTmp(1)
        arrTmp = Split(sTmp,"),")
        If UBound(arrTmp) >= 1 Then
            RequestLatLng = Split(arrTmp(0),",")
            Exit Function
        End If
    End If
    RequestLatLng = Array("#","#")
End Function

Sub Output(oWnd)
    Set oWnd = ShowWindow("energy.ehawaii.gov","data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAWIAAAB2CAYAAADybJlDAAAACXBIWXMAAC4jAAAuIwF4pT92AAAAIGNIUk0AAHolAACAgwAA+f8AAIDpAAB1MAAA6mAAADqYAAAXb5JfxUYAAAUjSURBVHja7N05ztxGEIBRjqHciZ34Bn3/w9QNnPsE49QwtPwc9lLd9V4kAdKwyQE+Fjnb6/1+XwCs85tDACDEAEIMgBADCDEAQgwgxAaimYAQAyDEAEIMgBADCDEAQgwgxACM980hgD28Xq/Td7Hd/PdxzHPri+FBiBPH9hPbBVqIQYh3D+/2YRZiEOIT47tVmIUYhLhCfFMHWYhBiKsFOF2UhRiEuHKAUwRZiEGIBXhxkIUYhFiAFwdZiEGIBXhxjIUYhdhlhCP5OocFWYhBiGeHLaxdiEGI54YsNljj0n0SYhDiEYGLhGtKG2MhBiHOFuCdXiDsemmhhtoh7hW9SLKOLWMsxFA3xD3iVznA3Y6FEEPNED8NoAB3PC5CDPVCvDLCFT4gcvv4CDHUCvGTEFYKcMw8Vn48FOpYEeG28RQ87cRjIoYaE/GnMdxpco7O2336eH9f1/WPEIMQr4jwyo8Zt4HbaqOOoRCDEGePcCzc9pQYCzGcHeJZER45ia6ewmP0MfViHZxrxwjHle9XltvoYyTEIMIZIvwkwDNvvfT6/0IMpInw0wn4SYTj4Xa63T4RYjANr4hwj1sQKybhIc+NEIMIz4xwr3vAvSI8eyo2EQNLAphhGo1Ej92EGESyW0AWhbD3fkaHbT86lkIMNc2KcO+3ox3500lCDPWm4ZkRXjnx31lDTDgeP3wcIYZaEZ712Bk+lBG7bFOIoZbYdBsz3g0SC/a/CTGw6lZAlok/xZqEGGrFsg163FW3AnqvY8lULMQg3pkj3JKsYyghhjrT8KhL66oRbr3WI8Qg5OUm0Gz7JMQgxhmDdcoJ4UvrEmKoF9AfPVbbdN+2n8qFGNg5fkfcGhFiIFP82mb72Ho8lhDD+dOrqdxEDCyc6mLAY54+lQsxkDrspvwBx06IQUDtj4kYGDA5xnf+Hpvv07GEGEzHd4LZO5qVXqALIQZ6xaxtuu602xNiYNV03A46FkIM7BuhpNPwVEIM50VvxddBjvqC+hKEGAQ9++PG6U+IEAOzp+M7wS7xHmghBnaajk3EgEA+nI4rT8NCDNy+hO4VQtPxL3xzCIAvTKWz3zOcYRr+a9ZaTcSA2wTf9/usDQkxcCfGMWk7pQgx1PGHUAoxsNafHUM7ajouGXkhBjKFc8RXbQoxUCLGgizEgCB33+60KwIhBgR5ceB9oAPYNWTHvLBnIoa9Js2el9UnRPmIfRViYNol+CZBnh53tyaAE+O/1ScAhRgQ5cWEGBgZwa9OlW3SeqLj2rsdF/eIQQxXif/9OSYdlx73lLuuVYhhL9kvs3sELiau9b9RXnbyEmIg6wln5kln6U84CTGcK/Ptibjx72ZHefpz4cU6YJfwx6Tt3NnPLicIIYY9L9vb5us/Icg/W8OtfRRiOH9KPfXL1mPiND706kCIgenhKRblX/JiHdS5vM/wpUGzPnq81Qt8Qgym1dNPWOmjLMRQayquHPq0URZiMBVXO3GYiIEtYnjy7Yxs+xZCDdxcOipAuwW7fXB8h0/y3r4GZDthZIrwz9bfeh0XIYYzIvfVKNyJx0kfBmmDjnuXbbk1AZzukwjHhG2FEINL/4xTZpUIm4hBjKfGdtXtjZZ4WyHEwOk+jfCSk4YQg6k402S5Y4Tb0+0IMTAyPpF0XWkiLMRgKj5lKn7yy8yx+pgIMYhxlel8dYRDiEGMT5uK23VAhIUYqDgFp4rwdV3X6/1+e0phA6/Xa7dJ9rT3NQ+7B/0vAAAA//8DAERsQ7O6796eAAAAAElFTkSuQmCC",354,118)
End Sub

Function ShowWindow(sTitle,sBG,iWidth,iHeight)
    Set ShowWindow = CreateWindow()
    With ShowWindow
        With .document
            .title = sTitle
            .getElementsByTagName("head")(0).appendChild .createElement("style")
            .styleSheets(0).csstext = "* {font: 8pt tahoma; margin: 5px;}"
            .body.style.background = "buttonface"
            .body.style.backgroundRepeat = "no-repeat"
            .body.style.backgroundImage = "url(" & sBG & ")"
            .body.innerHTML = ""
        End With
        .resizeto .screen.availWidth,.screen.availHeight
        .resizeto iWidth + .screen.availWidth - .document.body.offsetWidth,iHeight + .screen.availHeight - .document.body.offsetHeight
        .moveto CInt((.screen.availWidth - iWidth) / 2),CInt((.screen.availHeight - iHeight) / 2)
        .execScript "var handlers,thunks = {body_onunload: function() {handlers.WSHQuit()}};"
        Execute "Class clsHandlers: Public Sub WSHQuit(): WScript.Quit: End Sub: End Class"
        Set .handlers = New clsHandlers
        Set .document.body.onunload = .thunks.body_onunload
        .execScript "var write = function(t) {document.body.innerHTML = t};"
    End With
End Function

Function CreateWindow()
    ' source http://forum.script-coding.com/viewtopic.PHP?pid=75356#p75356
    Dim sSignature,oShellWnd,oProc
    On Error Resume Next
    sSignature = Left(CreateObject("Scriptlet.TypeLib").Guid,38)
    Do
        Set oProc = CreateObject("WScript.Shell").Exec("mshta ""about:<head><script>moveto(-32000,-32000);document.title=' '</script><hta:application id=app border=thick minimizebutton=no maximizebutton=no scroll=no showintaskbar=yes contextmenu=no selection=no innerborder=no icon=""""/><object id='shell' classid='clsid:8856F961-340A-11D0-A96B-00C04FD705A2'><param name=Registerasbrowser value=1></object><script>shell.putproperty('" & sSignature & "',document.parentwindow);</script></head>""")
        Do
            If oProc.Status > 0 Then Exit Do
            For Each oShellWnd In CreateObject("Shell.Application").Windows
                Set CreateWindow = oShellWnd.GetProperty(sSignature)
                If Err.Number = 0 Then Exit Function
                Err.Clear
            Next
        Loop
    Loop
End Function

原文地址:https://www.jb51.cc/html/231639.html

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。

相关推荐