我在網路抓取和 API 方面有一些經驗,但是我無法在這個網站上搜索合適的 API 來執行此操作:
https://www.giga.com.vc/Bebida obs:/Bebida 只是一個類似于“/Drinks”的類別
問題是,我找到了幾個 API,但它們僅適用于一種產品,或者它們甚至適用于某些產品,但我似乎無法找到正確的規則來使用適當的類別或頁面對其進行分頁并遍歷類別產品以獲取價格, EANS 等
import requests
import pandas as pd
from bs4 import BeautifulSoup
例如:這可行,但格式很糟糕:
print(requests.get('https://www.giga.com.vc/padaria?initialMap=c&initialQuery=padaria&map=category-1&page=1').content)
要么
urlx = 'https://www.giga.com.vc/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=pt-BR&operationName=Products&variables={}&extensions={"persistedQuery":{"version":1,"sha256Hash":"49a77e3e2082563773aff56ad9c0432d59302e86fd1baaad9ca0f4bca2630d46","sender":"[email protected]","provider":"[email protected]"},"variables":"eyJoaWRlVW5hdmFpbGFibGVJdGVtcyI6ZmFsc2UsInNrdXNGaWx0ZXIiOiJBTExfQVZBSUxBQkxFIiwiaW5zdGFsbG1lbnRDcml0ZXJpYSI6Ik1BWF9XSVRIT1VUX0lOVEVSRVNUIiwiY2F0ZWdvcnkiOiIiLCJjb2xsZWN0aW9uIjoiMTYvIiwic3BlY2lmaWNhdGlvbkZpbHRlcnMiOltdLCJvcmRlckJ5IjoiIiwiZnJvbSI6MCwidG8iOjExfQ=="}'
r = requests.get(urlx)
for x in r.json()['data']['products']:
print(x)
這也有效:
url2 = 'https://www.giga.com.vc/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=pt-BR&__bindingId=3f6e91e6-44f2-4fb0-a2d9-e238b53082e0&operationName=ProductRecommendations&variables={}&extensions={"persistedQuery":{"version":1,"sha256Hash":"e5782bd9e8bc64d337a7d7f96b9c280b462cdb0754d15b415192dac2755ad280","sender":"[email protected]","provider":"[email protected]"},"variables":"eyJpZGVudGlmaWVyIjp7ImZpZWxkIjoiaWQiLCJ2YWx1ZSI6IjE0NzUyMyJ9LCJ0eXBlIjoidmlldyJ9"}'
requests.get(url2).json()['data']['productRecommendations']
預期的輸出是這樣的:
r = requests.get(urlx)
for items in r.json()['data']['products']:
prd_dict = {
'product_id': items['productId'],
'price': items['priceRange']['sellingPrice']['highPrice'],
'product_name': items['productName'],
'category_id': items['categoryId'],
'ean': items['items'][0]['ean'],
'box_qty': items['specificationGroups'][0]['specifications'][0]['values']
}
print(prd_dict)
原始輸出:
{'product_id': '141917', 'price': 20.54, 'product_name': 'Banana Nanica Kg', 'category_id': '433', 'ean': '4511', 'box_qty': ['0']}
{'product_id': '148077', 'price': 1.45, 'product_name': 'água de Coco Tradicional Quadrado 200Ml', 'category_id': '148', 'ean': '0751320333650', 'box_qty': ['27']}
uj5u.com熱心網友回復:
它發送解碼后variables有base64
'{"hideUnavailableItems":false,"skusFilter":"ALL","simulationBehavior":"default","installmentCriteria":"MAX_WITHOUT_INTEREST","productOriginVtex":false,"map":"c","query":"bebida","orderBy":"OrderByScoreDESC","from":40,"to":59,"selectedFacets":[{"key":"c","value":"bebida"}],"operator":"and","fuzzy":"0","searchState":null,"facetsBehavior":"Static","categoryTreeBehavior":"default","withFacets":false}'
如果我決議 URL,將所有轉換為字典,替換為'from': 0
'to': 99然后我得到 100 個產品
但如果 U 使用大于 99 的值,它就不起作用。也許它需要對 URL 進行一些其他更改。
import base64
import urllib.parse
import urllib.request
import json
url1 = 'https://www.giga.com.vc/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=pt-BR&__bindingId=3f6e91e6-44f2-4fb0-a2d9-e238b53082e0&operationName=productSearchV3&variables={}&extensions={"persistedQuery":{"version":1,"sha256Hash":"6869499be99f20964918e2fe0d1166fdf6c006b1766085db9e5a6bc7c4b957e5","sender":"[email protected]","provider":"[email protected]"},"variables":"eyJoaWRlVW5hdmFpbGFibGVJdGVtcyI6ZmFsc2UsInNrdXNGaWx0ZXIiOiJBTEwiLCJzaW11bGF0aW9uQmVoYXZpb3IiOiJkZWZhdWx0IiwiaW5zdGFsbG1lbnRDcml0ZXJpYSI6Ik1BWF9XSVRIT1VUX0lOVEVSRVNUIiwicHJvZHVjdE9yaWdpblZ0ZXgiOmZhbHNlLCJtYXAiOiJjIiwicXVlcnkiOiJiZWJpZGEiLCJvcmRlckJ5IjoiT3JkZXJCeVNjb3JlREVTQyIsImZyb20iOjIwLCJ0byI6MzksInNlbGVjdGVkRmFjZXRzIjpbeyJrZXkiOiJjIiwidmFsdWUiOiJiZWJpZGEifV0sIm9wZXJhdG9yIjoiYW5kIiwiZnV6enkiOiIwIiwic2VhcmNoU3RhdGUiOm51bGwsImZhY2V0c0JlaGF2aW9yIjoiU3RhdGljIiwiY2F0ZWdvcnlUcmVlQmVoYXZpb3IiOiJkZWZhdWx0Iiwid2l0aEZhY2V0cyI6ZmFsc2V9"}'
url2 = 'https://www.giga.com.vc/_v/segment/graphql/v1?workspace=master&maxAge=short&appsEtag=remove&domain=store&locale=pt-BR&__bindingId=3f6e91e6-44f2-4fb0-a2d9-e238b53082e0&operationName=productSearchV3&variables={}&extensions={"persistedQuery":{"version":1,"sha256Hash":"6869499be99f20964918e2fe0d1166fdf6c006b1766085db9e5a6bc7c4b957e5","sender":"[email protected]","provider":"[email protected]"},"variables":"eyJoaWRlVW5hdmFpbGFibGVJdGVtcyI6ZmFsc2UsInNrdXNGaWx0ZXIiOiJBTEwiLCJzaW11bGF0aW9uQmVoYXZpb3IiOiJkZWZhdWx0IiwiaW5zdGFsbG1lbnRDcml0ZXJpYSI6Ik1BWF9XSVRIT1VUX0lOVEVSRVNUIiwicHJvZHVjdE9yaWdpblZ0ZXgiOmZhbHNlLCJtYXAiOiJjIiwicXVlcnkiOiJiZWJpZGEiLCJvcmRlckJ5IjoiT3JkZXJCeVNjb3JlREVTQyIsImZyb20iOjQwLCJ0byI6NTksInNlbGVjdGVkRmFjZXRzIjpbeyJrZXkiOiJjIiwidmFsdWUiOiJiZWJpZGEifV0sIm9wZXJhdG9yIjoiYW5kIiwiZnV6enkiOiIwIiwic2VhcmNoU3RhdGUiOm51bGwsImZhY2V0c0JlaGF2aW9yIjoiU3RhdGljIiwiY2F0ZWdvcnlUcmVlQmVoYXZpb3IiOiJkZWZhdWx0Iiwid2l0aEZhY2V0cyI6ZmFsc2V9"}'
#print('--- url ---')
#print(url)
parts = urllib.parse.urlparse(url1)
#print(parts)
query = urllib.parse.parse_qs(parts.query)
#print(query)
data = json.loads(query['extensions'][0])
variables = data['variables']
#print(variables)
q = base64.b64decode(variables.encode()).decode()
q = json.loads(q)
print('--- replace values ---')
print(q)
q['from'] = 0
q['to'] = 99
print(q)
print('---')
q = json.dumps(q)
variables = base64.b64encode(q.encode()).decode()
#print(variables)
data['variables'] = variables
query['extensions'][0] = json.dumps(data)
#print(query)
parts = parts._replace(query=urllib.parse.urlencode(query, doseq=True))
#print(parts)
url = urllib.parse.urlunparse(parts)
#print('--- url ---')
#print(url)
req = urllib.request.urlopen(url)
data = json.loads(req.read())
for number, item in enumerate(data['data']['productSearch']['products'], 1):
print(number, '|', item['productName'])
結果:
1 | água de Coco Tradicional Quadrado 200Ml
2 | Leite Longa Vida Integral com Tampa Italac 1L
3 | Leite Longa Vida Quatá Integral 1L
4 | água Mineral sem Gás Minalba 1,5L
5 | água Mineral Sem Gás Minalba 510Ml
6 | Refrigerante Coca-Cola200Ml
7 | Leite Longa Vida Integral Shefa Garrafa 1L
8 | Refrigerante Coca-Cola sem A?úcar 1L
9 | Cerveja Heineken Lata 350Ml
10 | Leite Integral Longa Vida com Tampa Ninho 1L
11 | Leite Longa Vida Semidesnatado Com Tampa Italac 1L
12 | Cerveja Heineken Long Neck 330Ml
13 | Cerveja Amstel Lata 269Ml
14 | água Mineral Minalba Com Gás 510Ml
15 | água Mineral Pureza Vital Sem Gás Nestlé Pet 510Ml
16 | água Mineral Sem Gás Bonafont 500Ml
17 | Refrigerante Coca-Cola sem A?úcar 200ml
18 | água de Coco Kero Coco 1L
19 | Refrigerante Coca-Cola 2,5L
20 | Leite Longa Vida Desnatado com Tampa Italac 1L
21 | Refrigerante Coca-Cola Lata 350Ml
22 | Leite Longa Vida Integral Tirol 1L
23 | água Mineral com Gás Pureza Vital 510ML
24 | Suco de Uva Integral Tinto Aurora 1,5L
25 | Achocolatado Toddynho 200Ml
26 | Refrigerante Coca-Cola sem A?úcar Lata 220ml
27 | Energético Red Bull Energy Drink 250Ml
28 | água Mineral Sem Gás Pureza Vital Nestlé 1,5 L
29 | Suco Natural One Laranja 2L
30 | Refrigerante Coca-Cola 2L
31 | Cerveja Skol Lata 350Ml
32 | Refrigerante Coca-Cola 1L
33 | Suco De Ma?? Yakult 200Ml
34 | Cerveja Império Puro Malte Lata 269Ml
35 | Refrigerante Guaraná Antarctica 2L
36 | Refrigerante Coca-Cola Lata 220Ml
37 | Leite Em Pó Integral Italac Sachê 400G
38 | água Coco Puro Coco 200Ml
39 | água Mineral Com Gás Crystal Pet 1,5 L
40 | Achocolatado sabor Chocolate Italakinho 200Ml
41 | Cerveja Duplo Malte Brahma Lata 350Ml
42 | água Mineral Indaiá sem Gás 500Ml
43 | Refresco em Pó Sabor Laranja Tang 25G
44 | água De Coco Puro Coco 1L
45 | Cerveja Budweiser Lata 269Ml
46 | Cerveja Skol Lata 269Ml
47 | Refresco em Pó Sabor Uva Tang 25G
48 | Refrigerante Guaraná Antarctica Lata 350Ml
49 | Cerveja Eisenbahn Pilsen Lata 350Ml
50 | Cerveja Itaipava Lata 350Ml
51 | Cerveja Itaipava Lata 269Ml
52 | Refrigerante Coca-Cola 600Ml
53 | Refrigerante Guaraná Antarctica 1,5L
54 | Cerveja Stella Artois Lata 269Ml
55 | Whisky Escocês Johnnie Walker Red Label 1L
56 | Refrigerante Coca-Cola sem A?úcar 2L
57 | água Mineral sem Gás Crystal Pet 1,5 L
58 | Cerveja Amstel Lata 350Ml
59 | Cerveja Corona Extra Long Neck 330Ml
60 | Cerveja Stella Artois Long Neck 330Ml
61 | água Mineral Sem Gás Bonafont 1,5 L
62 | Cerveja Puro Malte Petra Lata 350Ml
63 | água de Coco Kero Coco 200Ml
64 | Cerveja Heineken Garrafa 600Ml
65 | Refrigerante de Laranja Sukita 2L
66 | Chopp De Vinho Draft 600Ml
67 | Refrigerante De Lim?o H2Oh! 500Ml
68 | Suco Natural de Uva e Maca One Ambiente 2L
69 | Refrigerante Dolly Guaraná 2L
70 | Energético Energy Monster Lata 473Ml
71 | Refresco em Pó Sabor Lim?o Tang 25g
72 | Suco De Laranja Integral Prat's 4 Ls
73 | Energético Red Bull Tropical Energy Drink 250Ml
74 | Refrigerante Limoneto H2Oh! Pet 500Ml
75 | água T?nica Antarctica Zero 350Ml
76 | água Mineral Sem Gás Minalba 10 Ls
77 | Vodka Red Smirnoff 998Ml
78 | Suco De Laranja Natural Xand? Garrafa 900Ml
79 | Energético Red Bull Melancia Energy Drink 250Ml
80 | Bebida Láctea de Proteína Zero Lactose sabor Chocolate YoPro 15G
81 | água Ver?o Sense Lindoya 510ml
82 | Vodka Nacional Smirnoff Ice Red 269ml
83 | Whisky Escocês White Horse 8 Anos 1L
84 | Refrigerante Coca-Cola sem A?úcar 2,5L
85 | Refresco em Pó Sabor Maracujá Tang 25g
86 | Cerveja Império Puro Malte Lata 350Ml
87 | Vodka Ice Smirnoff 275Ml
88 | Cerveja Eisenbahn Pilsen Long Neck 355Ml
89 | Guaraná Com A?aí Natural Guaraviton Pet 500Ml
90 | Cerveja Budweiser Long Neck 330Ml
91 | água Mineral Com Gás Pet Crystal 500Ml
92 | água T?nica Antarctica Lata 350Ml
93 | Refrigerante Sabor Guaraná Mini Dolly Pet 350Ml
94 | água T?nica Schweppes lata 350ml
95 | Cacha?a 51 965Ml
96 | Cerveja Skol Lata 550Ml
97 | Refresco em Pó Sabor Abacaxi Tang 25g
98 | Cerveja Puro Malte Petra Lata 269Ml
99 | Cacha?a Velho Barreiro 910Ml
100 | Refrigerante Fanta Laranja 2L
轉載請註明出處,本文鏈接:https://www.uj5u.com/net/455147.html
