python通過鏈接快速回圈-有解無憂

import requests
import json
from tqdm import tqdm

要回圈的鏈接串列

links =['https://www.google.com/','https://www.google.com/','https://www.google.com/']

使用請求的鏈接的 for 回圈

data = []
for link in tqdm(range(len(links))):
    response = requests.get(links[link])
    response = response.json()
    data.append(response)

上面的 for 回圈用于遍歷所有鏈接串列，但是當我嘗試在大約一千個鏈接上回圈任何幫助時，它很耗時。

uj5u.com熱心網友回復：

最簡單的方法是把它變成多執行緒的。最好的方法可能是異步的。

多執行緒解決方案：

import requests
from tqdm.contrib.concurrent import thread_map

links =['https://www.google.com/','https://www.google.com/','https://www.google.com/']

def get_data(url):
    response = requests.get(url)
    response = response.json()  # Do note this might fail at times
    return response

data = thread_map(get_data, links)

或不使用tqdm.contrib.concurrent.thread_map：

import requests
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

links =['https://www.google.com/','https://www.google.com/','https://www.google.com/']

def get_data(url):
    response = requests.get(url)
    response = response.json()  # Do note this might fail at times
    return response

executor = ThreadPoolExecutor()

data = list(tqdm(executor.map(get_data, links), total=len(links)))

uj5u.com熱心網友回復：

正如評論中所建議的，您可以使用asyncio和aiohttp。

import asyncio
import aiohttp

links = ["your", "links", "here"]

# create aio connector
conn = aiohttp.TCPConnector(limit_per_host=100, limit=0, ttl_dns_cache=300)

# set number of parallel requests - if you are requesting different domains you are likely to be able to set this higher, otherwise you may be rate limited
PARALLEL_REQUESTS = 10

# Create results array to collect results
results = []

async def gather_with_concurrency(n):
    # Create semaphore for async i/o  
    semaphore = asyncio.Semaphore(n)

    # create an aiohttp session using the previous connector
    session = aiohttp.ClientSession(connector=conn)

    # await logic for get request
    async def get(URL):
        async with semaphore:
            async with session.get(url, ssl=False) as response:
                obj = await response.read()
                # once object is acquired we append to list
                results.append(obj)
    # wait for all requests to be gathered and then close session
    await asyncio.gather(*(get(url) for url in urls))
    await session.close()

# get async event loop
loop = asyncio.get_event_loop()
# run using number of parallel requests
loop.run_until_complete(gather_with_concurrency(PARALLEL_REQUESTS))
# Close connection
conn.close()

# loop through results and do something to them
for res in results:
    do_something(res)

I have tried to comment on the code as well as possible.

I have used BS4 to parse requests in this manner (in the do_something logic), but it will really depend on your use case.

轉載請註明出處，本文鏈接：https://www.uj5u.com/houduan/477841.html

標籤：Python 循环蟒蛇请求

上一篇：如何根據比較其他列中的值生成資料框列？

下一篇：Python-將CSV匯入為DataFrame，使用groupby過濾并將結果匯出為格式化文本