這是大神的代碼,運行的時候報錯
# noinspection PyRedeclaration
def parse_tweet_item(self, items):
for it in items:
try:
tweet = Tweet()
tweet['usernameTweet'] = it.xpath('.//span[@class="username u-dir u-textTruncate"]/b/text()')
ID = it.xpath('.//@data-tweet-id').extract()
if not ID:
continue
tweet['ID'] = ID[0]
# get text content
tweet['text'] = ' '.join(
it.xpath('.//div[@class="js-tweet-text-container"]/p//text()').extract()).replace(' # ',
'#').replace(
' @ ', '@')
# clear data[20200416]
# tweet['text'] = re.sub(r"[\s+\.\!\/_,$%^*(+\"\')]+|[+——?【】?~@#¥%……&*]+|\\n+|\\r+|(\\xa0)+|(\\u3000)+|\\t", "", tweet['text']);
# 過濾掉表情符號【20200417】
tweet['text'] = filter_emoji(tweet['text'], '')
if tweet['text'] == '':
# If there is not text, we ignore the tweet
continue
# get meta data
tweet['url'] = it.xpath('.//@data-permalink-path').extract()[0]
nbr_retweet = it.css('span.ProfileTweet-action--retweet > span.ProfileTweet-actionCount').xpath(
'@data-tweet-stat-count').extract()
if nbr_retweet:
tweet['nbr_retweet'] = int(nbr_retweet[0])
else:
tweet['nbr_retweet'] = 0
nbr_favorite = it.css('span.ProfileTweet-action--favorite > span.ProfileTweet-actionCount').xpath(
'@data-tweet-stat-count').extract()
if nbr_favorite:
tweet['nbr_favorite'] = int(nbr_favorite[0])
else:
tweet['nbr_favorite'] = 0
nbr_reply = it.css('span.ProfileTweet-action--reply > span.ProfileTweet-actionCount').xpath(
'@data-tweet-stat-count').extract()
if nbr_reply:
tweet['nbr_reply'] = int(nbr_reply[0])
else:
tweet['nbr_reply'] = 0
tweet['datetime'] = datetime.fromtimestamp(int(
it.xpath('.//div[@class="stream-item-header"]/small[@class="time"]/a/span/@data-time').extract()[
0])).strftime('%Y-%m-%d %H:%M:%S')
# get photo
has_cards = it.xpath('.//@data-card-type').extract()
if has_cards and has_cards[0] == 'photo':
tweet['has_image'] = True
tweet['images'] = it.xpath('.//*/div/@data-image-url').extract()
elif has_cards:
logger.debug('Not handle "data-card-type":\n%s' % it.xpath('.').extract()[0])
# get animated_gif
has_cards = it.xpath('.//@data-card2-type').extract()
if has_cards:
if has_cards[0] == 'animated_gif':
tweet['has_video'] = True
tweet['videos'] = it.xpath('.//*/source/@video-src').extract()
elif has_cards[0] == 'player':
tweet['has_media'] = True
tweet['medias'] = it.xpath('.//*/div/@data-card-url').extract()
elif has_cards[0] == 'summary_large_image':
tweet['has_media'] = True
tweet['medias'] = it.xpath('.//*/div/@data-card-url').extract()
elif has_cards[0] == 'amplify':
tweet['has_media'] = True
tweet['medias'] = it.xpath('.//*/div/@data-card-url').extract()
elif has_cards[0] == 'summary':
tweet['has_media'] = True
tweet['medias'] = it.xpath('.//*/div/@data-card-url').extract()
elif has_cards[0] == '__entity_video':
pass # TODO
# tweet['has_media'] = True
# tweet['medias'] = item.xpath('.//*/div/@data-src').extract()
else: # there are many other types of card2 !!!!
logger.debug('Not handle "data-card2-type":\n%s' % it.xpath('.').extract()[0])
is_reply = it.xpath('.//div[@class="ReplyingToContextBelowAuthor"]').extract()
tweet['is_reply'] = is_reply != []
is_retweet = it.xpath('.//span[@class="js-retweet-text"]').extract()
tweet['is_retweet'] = is_retweet != []
tweet['user_id'] = it.xpath('.//@data-user-id').extract()[0]
yield tweet
if self.crawl_user:
# get user info
user = User()
user['ID'] = tweet['user_id']
user['name'] = it.xpath('.//@data-name').extract()[0]
user['screen_name'] = it.xpath('.//@data-screen-name').extract()[0]
user['avatar'] = \
it.xpath('.//div[@class="content"]/div[@class="stream-item-header"]/a/img/@src').extract()[0]
yield user
except:
logger.error("Error tweet:\n%s" % it.xpath('.').extract()[0])
# raise


在網上搜了很多解決辦法都沒有用
真的是個傻瓜小白,拜托各位了
uj5u.com熱心網友回復:
看報錯是說it是一個str,按道理給parse_tweet_item(self, items)函式傳的items應該是一個元素的集合,但是這個集合里現在有str的項轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/246296.html
上一篇:關于pyqt5方面的一個問題
