此代碼抓取 Oddsortal 網站:
import pandas as pd
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import threading
from multiprocessing.pool import ThreadPool
import os
import re
from math import nan
class Driver:
def __init__(self):
options = webdriver.ChromeOptions()
options.add_argument("--headless")
# Un-comment next line to supress logging:
options.add_experimental_option('excludeSwitches', ['enable-logging'])
self.driver = webdriver.Chrome(options=options)
def __del__(self):
self.driver.quit() # clean up driver when we are cleaned up
# print('The driver has been "quitted".')
threadLocal = threading.local()
def create_driver():
the_driver = getattr(threadLocal, 'the_driver', None)
if the_driver is None:
the_driver = Driver()
setattr(threadLocal, 'the_driver', the_driver)
return the_driver.driver
class GameData:
def __init__(self):
self.date = []
self.time = []
self.game = []
self.score = []
self.home_odds = []
self.draw_odds = []
self.away_odds = []
self.country = []
self.league = []
def generate_matches(table):
tr_tags = table.findAll('tr')
for tr_tag in tr_tags:
if 'class' in tr_tag.attrs and 'dark' in tr_tag['class']:
th_tag = tr_tag.find('th', {'class': 'first2 tl'})
a_tags = th_tag.findAll('a')
country = a_tags[0].text
league = a_tags[1].text
else:
td_tags = tr_tag.findAll('td')
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
td_tags[4].text, td_tags[5].text, country, league]
我收到一個串列錯誤:
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
IndexError: list index out of range
我該如何解決?
這是我目前的完整代碼
當我檢查時,串列索引為 1,而我正確獲取國家和聯賽值。如果串列中沒有值,我該如何修改
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
td_tags[4].text, td_tags[5].text, country, league]
獲得正確的價值?
uj5u.com熱心網友回復:
您可以檢查是否td_tags不為空
td_tags = tr_tag.findAll('td')
if len(td_tags) > 0: # or just if td_tags
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
td_tags[4].text, td_tags[5].text, country, league]
uj5u.com熱心網友回復:
您可以打開包裝,td_tags因此如果它是空的,您將只有county和league:
yield [*[td_tag.text for td_tag in td_tags], country, league]
或者你可以用一個簡單的檢查if它不為空:
if td_tags:
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
td_tags[4].text, td_tags[5].text, country, league]
如果您不想抑制, 另一種選擇IndexError:
with suppress(IndexError):
yield [td_tags[0].text, td_tags[1].text, td_tags[2].text, td_tags[3].text,
td_tags[4].text, td_tags[5].text, country, league]
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/341174.html
