生成的輸出:1 用于獲取模型名稱的代碼:2
enter code here
test_link = 'https://www.amd.com/en/products/cpu/amd-ryzen-9-3900xt'
r = requests.get(test_link, headers=headers)
soup = BeautifulSoup(r.content,'lxml')
whole_data = soup.find('div', class_='fieldset-wrapper')
specifications = []
specifications_value=[]
for variable1 in whole_data.find_all('div', class_='field__label'):
#print(variable1.text)
variable1 = variable1.text
specifications = list(variable1.split('\n'))
#print(specifications)
for variable2 in whole_data.find_all('div', class_='field__item'):
#print(variable2.text)
variable2 = variable2.text
specifications_value = list(variable2.split('\n'))
#print(specifications_value)
問題:我正在獲取資料,但是在單獨的變數和 for 回圈中,如何使用鍵值對映射這兩個變數?這樣我就可以檢查條件,例如:如果值是平臺,則僅說明它的值(盒式處理器)
我想以這樣的方式捕獲資料,如果“密鑰”是平臺,則只捕獲它的值(盒裝處理器)。所有其他 14 個標簽類似。
uj5u.com熱心網友回復:
您可以遍歷預期鍵串列并用于:-soup-contains定位描述節點。如果那不是 None 則選擇子值。否則,回傳''。
import requests
from bs4 import BeautifulSoup as bs
links = ['https://www.amd.com/en/products/cpu/amd-ryzen-7-3800xt',
'https://www.amd.com/en/products/cpu/amd-ryzen-9-3900xt']
all_keys = ['Platform', 'Product Family', 'Product Line', '# of CPU Cores',
'# of Threads', 'Max. Boost Clock', 'Base Clock', 'Total L2 Cache', 'Total L3 Cache',
'Default TDP', 'Processor Technology for CPU Cores', 'Unlocked for Overclocking', 'CPU Socket',
'Thermal Solution (PIB)', 'Max. Operating Temperature (Tjmax)', 'Launch Date', '*OS Support']
with requests.Session() as s:
s.headers = {'User-Agent': 'Mozilla/5.0'}
for link in links:
r = s.get(link)
soup = bs(r.content, 'lxml')
specification = {}
for key in all_keys:
spec = soup.select_one(
f'.field__label:-soup-contains("{key}") .field__item, .field__label:-soup-contains("{key}") .field__items .field__item')
if spec is None:
specification[key] = ''
else:
if key == '*OS Support':
specification[key] = [
i.text for i in spec.parent.select('.field__item')]
else:
specification[key] = spec.text
print(specification)
print()
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/392959.html
