我正在嘗試從這些頁面
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
coordinate=response.xpath("//div[@class='adr']/text()").getall()
yield{
'coordoness':coordinate
}
uj5u.com熱心網友回復:
閱讀評論。
from scrapy import Spider
from scrapy.http import Request
class AuthorSpider(Spider):
name = 'pushpa'
start_urls = ['http://www.cuma.fr/annuaires?page=1e']
def parse(self, response):
books = response.xpath("//h2/a/@href").extract()
for book in books:
url = response.urljoin(book)
yield Request(url, callback=self.parse_book)
def parse_book(self, response):
# coordinate = response.xpath("//div[@class='adr']/text()").getall()
# replace '/text()' with '//text()' to get all the text inside div tag:
coordinate = response.xpath("//div[@class='adr']//text()").getall()
# strip the strings in the list:
coordinate = [i.strip() for i in coordinate]
# remove empty strings:
coordinate = [i for i in coordinate if i]
yield{
'coordoness': coordinate
}
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/426941.html
