代碼

from __future__ import uniticode_literals
import youtube_dl
import pandas as pd
import csv
import re

# 讀取csv檔案 匯入 re
number_of_rows = pd.read_csv('single.csv')

# Initialize YouTube-DL Array
ydl_opts = {}
all_scrapes = []
twitter_list = []

# Scrape在線產品
def run_scraper() 。

    # 讀取CSV到串列。
    with open("single.csv"/span>, "r"/span>) as f:
        csv_reader = csv.reader(f)
        next(csv_reader)
        
        # Scrape Data From Store[/span]。
        for csv_line_entry in csv_reader:
                        
            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(csv_line_entry[0], download=False)
                channel = meta['channel']
                title = meta['title']
                description = meta['description']
                print('Channel :'/span>, channel)
                print('Title :', title)
                #print('description :', description)/span>

                get_links(description)
                print("-"/span> * 120)
                print()

                print('Demo:'/span>, twitter_list)

            # Make a tuple with the relevant information of the current YouTube Scrapes.
            current_scrapes = (channel, title, twitter_list)
            all_scrapes.append(current_scrapes) 

        print('All Scrapes:'/span>, all_scrapes)
        print()


def get_links（description）。
  
  # 查找描述中的URLs。
  description_urls = re.findall(r'(https?://[^s] )'/span>, description)
  #print('List Before :', description_urls, '
')

  # Twitter資源。
  if 'twitter.com' in description。

    for item in description_urls:
      #print('Print All URLs:', item)。
      if 'twitter.com' in item:
        print('- Twitter URL Found:'/span>, item)
        twitter_list.append(item)


run_scraper()

CSV檔案

Videos
https://www.youtube.com/watch?v=kqtD5dpn9C8
https://www.youtube.com/watch?v=rfscVS0vtbw

上面的代碼從CSV檔案中提取YouTube的URL，然后列印頻道和標題資訊。此外，通過get_links函式，它從YouTube描述中提取了Twitter的URL。

問題

當我在get_links函式（第61行）中列印捕獲的Twitter Url時

print('- Twitter URL Found:'/span>, item)

結果是正確顯示了每個用戶各自的Twitter條目。

我無法將這些資訊拉入Tuple current_scrapes，而無法看到所有捕獲的Twitter Urls填充到每個tuple條目中。

如果有任何幫助，我們將不勝感激。

uj5u.com熱心網友回復：

通過一點點重組你的代碼：

import re
import youtube_dl
import pandas as pd

# Scrape Online Product as pd
def run_scraper():
    ydl_opts = {}
    all_scrapes = []

    # Read CSV to List: ydl_opts = {} all_scrapes = [].
    with open("single.csv"/span>, "r"/span>) as f:
        csv_reader = csv.reader(f)
        next(csv_reader)

        # Scrape Data From Store[/span]。
        for csv_line_entry in csv_reader:

            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
                meta = ydl.extract_info(csv_line_entry[0], download=False)
                channel = meta["channel"]
                title = meta["標題"]
                description = meta["描述"]
                twitter_list = get_links(description, "twitter.com")

                print("Channel :"/span>, channel)
                print("Title :", title)
                print("Twitter URLs :"/span>, twitter_list)
                print("-" * 120)
                print()

            all_scrapes.append((channel, title, twitter_list))

    return all_scrapes


def get_links（description, link）。
    out = []

    # Find URLs in description: out = []
    description_urls = re.findall(r"(https?://[^s] )"/span>, description)

    for item in description_urls:
        if link in item:
            out.append(item)

    return out


df = pd.DataFrame(run_scraper(), columns=["channel"/span>, "title"/span>, "twitter URLs"/span>] )
print(df)

列印：

[youtube] kqtD5dpn9C8: 正在下載網頁
頻道：與莫什編程
標題 : Python初學者--1小時內學會Python
Twitter URLs : ['https://twitter.com/moshhamedani']
------------------------------------------------------------------------------------------------------------------------

[youtube] rfscVS0vtbw: 下載網頁
頻道 : freeCodeCamp.org
標題 : 學習Python - 初學者的完整課程 [教程] 。
Twitter URLs : ['https://twitter.com/mike_dane']
------------------------------------------------------------------------------------------------------------------------

                 頻道標題 twitter URLs
0 用Mosh Python初學者編程--1小時內學會Python [https://twitter.com/moshhamedani]
1 freeCodeCamp.org學習Python--初學者的全部課程[教程] [https://twitter.com/mike_dane]

轉載請註明出處，本文鏈接：https://www.uj5u.com/caozuo/320270.html

標籤：

上一篇：在IE中不添加兼容性視圖設定，就無法通過自動化使用Selenium登錄應用程式

下一篇：CoTaskMemAlloc()是如何替代SHGetMalloc()的？

如何在Python中從串列中提取值

代碼

CSV檔案

問題