標題:
[發問]
抓取多網頁資料
[打印本頁]
作者:
msmplay
時間:
2020-1-10 10:48
標題:
抓取多網頁資料
需求:我需要3個資料【賣場名稱、賣場連結網址、價格】
使用工具:ie11
使用網址:https://www.findprice.com.tw/
人工匯整檔案:https://drive.google.com/file/d/1I1gRapjwq-y-RrBwS0kpDDDv4RNiUgkZ/view?usp=sharing
問題1:是否有什麼方式可以將網頁搜尋結果的多個網頁,一次抓取或轉存至同一excel檔內
例如:以上網頁搜尋『kd-55x8000g』,會出現10頁結果,目前做法為一頁一頁轉出至excel,再以人工的方式匯整成一個活頁,非常耗時
[attach]31632[/attach][attach]31633[/attach]
[attach]31634[/attach]
問題2:此匯出excel檔方式無法匯出每個賣場連結網址,是否有其方式也可以抓得到或轉出每個賣場連結網址呢?重點在於我需要3個資料【賣場名稱、賣場連結網址、價格】
[attach]31635[/attach]
作者:
GBKEE
時間:
2020-4-14 18:36
回復
1#
msmplay
程式碼複製在同一模組上
Option Explicit
Dim 圖片目錄 As String, 圖片數 As Double, 圖片名稱 As String
Sub Ex_抓取多網頁資料()
Dim I As Integer, ii As Integer, e As Object
Dim Search As String, IMG As Object, Span As Object, A As Object
Search = InputBox("kd-55x8000g", "請輸入要搜尋的字串")
If Search = "" Then MsgBox "沒有請輸入......": End
With CreateObject("InternetExplorer.Application")
' .Visible = True
.Navigate "https://www.findprice.com.tw/"
Do While .Busy Or .readyState <> 4: DoEvents: Loop
.Document.GETELEMENTBYID("search").Value = Search '**輸入要搜尋的字串
Do While .Busy Or .readyState <> 4: DoEvents: Loop
.Document.GETELEMENTBYID("tsbb").Click '**搜尋鍵
Do While .Busy Or .readyState <> 4: DoEvents: Loop
Set A = .Document.form1.ALL.TAGS("tbody")
If InStr(A(1).INNERTEXT, Search) = 0 Then '**沒有搜尋到
MsgBox vbLf & A(1).INNERTEXT
GoTo Xquit
Else
工作表整理
Cells(1) = Trim(Split(A(1).INNERTEXT, ">")(0))
Cells(1, 2) = "價格"
Cells(1, 3) = "說明"
End If
I = 1
NE:
Do While .Busy Or .readyState <> 4: DoEvents: Loop
Set e = .Document.GETELEMENTBYID("tbContent").ALL.TAGS("TR")
For ii = 1 To e.Length - 1
With e(ii).ALL
Set A = .TAGS("A")(0)
If Not A Is Nothing Then
Set IMG = A.ALL.TAGS("IMG")(0)
Set Span = .TAGS("SPAN")
If Not IMG Is Nothing And Span.Length > 0 Then
If Span(0).CLASSNAME = "rec-price-20" Then
I = I + 1
Cells(I, "b") = Trim(Span(0).INNERTEXT)
Cells(I, "C") = Trim(IMG.Title)
網路圖片 IMG.Href
Ex_網頁資料 Cells(I, "a"), A.Href
End If
End If
End If
End With
Next
'**下一頁 '<a id="pg-next" href="/g/kd-55x8000g/?i=3">下一頁 ></a>
For Each e In .Document.ALL.TAGS("A")
If e.ID = "pg-next" Then
e.Click '下一頁 (按鍵)
GoTo NE '循環到下一頁
End If
Next
Xquit:
.Quit '關閉網頁
End With
Range("b:b").Columns.AutoFit
End Sub
Private Sub 工作表整理()
圖片目錄 = ThisWorkbook.Path & "\商品圖片\"
If Dir(圖片目錄, vbDirectory) = "" Then MkDir 圖片目錄 '**沒有圖片目錄則建立之
If Dir(圖片目錄 & "*.*") <> "" Then Kill 圖片目錄 & "\*.*" '**圖片目錄下有檔案則刪除之
圖片數 = 0
Pictures.Delete '**刪除工作表上的圖片
With Cells
.Clear
.Columns.AutoFit
.Rows.AutoFit
.WrapText = False
.HorizontalAlignment = xlCenter
.VerticalAlignment = xlCenter
End With
With Range("c:c")
.ColumnWidth = 40
.WrapText = True
End With
End Sub
Private Sub Ex_網頁資料(Rng As Range, Href As String)
With Rng
.ColumnWidth = 15
.RowHeight = 90
End With
With Pictures.Insert(圖片名稱) '**插入圖片
.ShapeRange.LockAspectRatio = msoFalse '**不鎖定圖片的長寬比例
.top = Rng.top
.left = Rng.left
.Height = Rng.Height
.width = Rng.width
ActiveSheet.Hyperlinks.Add Anchor:=.ShapeRange.Item(1), Address:=Href '***超連結
End With
End Sub
Private Sub 網路圖片(URL As String)
Dim Xml As Object '用來取得網頁資料
Dim Stream As Object 'As ADODB.stream '用來儲存二進位檔案
On Error GoTo Url_err '處理圖片網頁的錯誤
Again:
Set Xml = CreateObject("Microsoft.XMLHTTP")
Set Stream = CreateObject("ADODB.stream")
Xml.Open "GET", URL, 0
Xml.send
With Stream
.Open
.Type = 1
.write Xml.ResponseBody
' If Dir(商品圖片) <> "" Then Kill 商品圖片
圖片名稱 = 圖片目錄 & 圖片數 & ".jpg"
圖片數 = 圖片數 + 1
.SaveToFile (圖片名稱)
.Close
End With
Set Xml = Nothing
Set Stream = Nothing
Exit Sub
Url_err:
URL = "https://th.bing.com/th/id/OIP.lmGYd5XOfu-zuoG1GZW-HAHaE8?w=256&h=171&c=7&o=5&dpr=1.2&pid=1.7"
GoTo Again
End Sub
複製代碼
歡迎光臨 麻辣家族討論版版 (http://forum.twbts.com/)