import requests
from bs4 import BeautifulSoup
import json
data = {
0:{
0:"title",
1:"dates",
2:"city/state",
3:"country"
},
1:{
0:"event",
1:"reps",
2:"prize"
},
2:{
0:"results"
}
}
url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
response = requests.get(url).text
soup = BeautifulSoup(response, features='lxml')
all_data = []
for element in soup.find_all('div', class_="row"):
event = {}
for i, col in enumerate(element.find_all('div', class_='col-md-4')):
for j, item in enumerate(col.strings):
event[data[i][j]] = item
all_data.append(event)
print(json.dumps(all_data,indent=4))
這是網站的鏈接https://mms.kcbs.us/members/evr_search.php?org_id=KCBA
我不確定為什么沒有任何東西被添加到串列和字典中
uj5u.com熱心網友回復:
您看到的資料是通過 JavaScript 從外部 URL 加載的。要模擬 Ajax 請求,您可以使用下一個示例:
import json
import requests
from bs4 import BeautifulSoup
api_url = "https://mms.kcbs.us/members/evr_search_ol_json.php"
params = {
"otype": "TEXT",
"evr_map_type": "2",
"org_id": "KCBA",
"evr_begin": "6/16/2022",
"evr_end": "7/16/2022",
"evr_address": "",
"evr_radius": "50",
"evr_type": "269",
"evr_openings": "0",
"evr_region": "",
"evr_region_type": "1",
"evr_judge": "0",
"evr_keyword": "",
"evr_rep_name": "",
}
soup = BeautifulSoup(
requests.get(api_url, params=params).content, "html.parser"
)
data = {
0: {0: "title", 1: "dates", 2: "city/state", 3: "country"},
1: {0: "event", 1: "reps", 2: "prize"},
2: {0: "results"},
}
all_data = []
for element in soup.find_all("div", class_="row"):
event = {}
for i, col in enumerate(element.find_all("div", class_="col-md-4")):
for j, item in enumerate(col.strings):
event[data[i][j]] = item
all_data.append(event)
print(json.dumps(all_data, indent=4))
印刷:
[
{
"title": "Frisco BBQ Challenge",
"dates": "6/16/2022 - 6/18/2022",
"city/state": "Frisco, CO 80443",
"country": "UNITED STATES",
"event": "STATE CHAMPIONSHIP",
"reps": "Reps: BUNNY TUTTLE, RICH TUTTLE, MICHAEL WINTER",
"prize": "Prize Money: $13,050.00",
"results": "Results Not In"
},
{
"title": "York County BBQ Festival",
"dates": "6/17/2022 - 6/18/2022",
"city/state": "Delta, PA 17314",
"country": "UNITED STATES",
"event": "STATE CHAMPIONSHIP",
"reps": "Reps: ANGELA MCKEE, ROBERT MCKEE, LOUISE WEIDNER",
"prize": "Prize Money: $5,500.00",
"results": "Results Not In"
},
...and so on.
uj5u.com熱心網友回復:
該網站需要 JavaScript 在瀏覽器中運行,而請求不會發生。使用硒要容易得多。
您需要檢查您的 chrome 瀏覽器版本并安裝 chrome 驅動程式(此處)[https://chromedriver.chromium.org/downloads]
from selenium import webdriver
url = "https://mms.kcbs.us/members/evr_search.php?org_id=KCBA"
chrome_driver_path = r"" # your chrome driver path here
driver = webdriver.Chrome(executable_path=chrome_driver_path)
driver.get(url)
for elements in driver.find_elements_by_class_name('row'):
event = {}
# time.sleep(0.5) # if "stale element reference" error occur
for i, col in enumerate(elements.find_elements_by_class_name('col-md-4')):
for j, item in enumerate(col.text):
pass # your code here
轉載請註明出處,本文鏈接:https://www.uj5u.com/qukuanlian/492297.html
