shahin
Active Member
Dear all, I have made a parser to scrape yellowpage. It is doing fine so far with five columns filled in [1. Name 2. Street 3. Locality 4. Region 5. Postal Code]. However, the problem is that I can't extract the phone number which should be in 6th column. That is why I need help to get it done. If anybody out there gives me a little push on this, It would be a great help for me. Thanks in advance.
Code:
Option Explicit
Sub ResTest()
Const URL = "http://www.yellowpages.com/search?search_terms=Coffee%20Shops&geo_location_terms=San%20Francisco%2C%20CA&page=2"
Const ext = "http://www.yellowpages.com"
Dim http As New MSXML2.XMLHTTP60, html As New HTMLDocument
Dim topics As Object, topic As Object, link As Object
Dim newlink As String
Dim P As Long, N As Long, L As Long, str As Variant
L = 2
http.Open "GET", URL, False
http.send
html.body.innerHTML = http.responseText
Set topics = html.getElementsByClassName("business-name")
For Each topic In topics
newlink = ext & Replace(topic.href, "about:", "")
With http
.Open "GET", newlink, False
.send
str = Split(.responseText, "<h1 itemprop=""name"">")
N = UBound(str)
For P = 1 To N
Cells(L, 1) = Split(str(P), "<")(0)
Cells(L, 2) = Split(Split(str(P), "itemprop=""streetAddress"">")(1), "<")(0)
Cells(L, 3) = Split(Split(str(P), "itemprop=""addressLocality"">")(1), "<")(0)
Cells(L, 4) = Split(Split(str(P), "itemprop=""addressRegion"">")(1), "<")(0)
Cells(L, 5) = Split(Split(str(P), "itemprop=""postalCode"">")(1), "<")(0)
'''Cells(L, 6) = Phone Number>> this is where I got stuck
L = L + 1
Next P
End With
Next topic
End Sub