已經玩了 14 個小時(我是初學者)
從一個資料庫表中提取資料以在雅虎上搜索該股票代碼上的所有資料,然后“打算”上傳它。
我最初將它作為熊貓 df 但得到了“模棱兩可的錯誤”,所以我現在又把它作為 [] 。新錯誤。我絞盡腦汁:(但是,如果我將其留空,它確實有效。
from __future__ import print_function
import yfinance as yf
import pandas as pd
import datetime
import warnings
import MySQLdb as mdb
import requests
import numpy as np
import MySQLdb as mdb
import requests
# Obtain a database connection to the MySQL instance
con = mdb.connect("localhost","sec_user","","securities_master")
def obtain_list_of_db_tickers():
"""
Obtains a list of the ticker symbols in the database.
"""
with con:
cur = con.cursor()
cur.execute("SELECT id, ticker FROM symbol")
data = cur.fetchall()
print(data)
return [(d[0], d[1]) for d in data]
def get_daily_historic_data_yahoo(ticker):
blow = yf.download(ticker)
data = []
data.append(yf.download(ticker).reset_index())
return data
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
df.insert(0, 'data_vendor_id', data_vendor_id)
df.insert(1, 'symbol_id', symbol_id)
df.insert(3, 'created_date', now)
df.insert(4, 'last_updated_date', now)
daily_data = []
daily_data.append(df)
#df = daily_data
# Amend the data to include the vendor ID and symbol ID
# Connect to the MySQL instance
db_host = 'localhost'
db_user = ''
db_pass = ''
db_name = 'securities_master'
con = mdb.connect("localhost", "sec_user", "", "securities_master"
# host=db_host, user=db_user, passwd=db_pass, db=db_name
)
try:
mdb.connect
# If connection is not successful
except:
print("Can't connect to database")
return 0
# If Connection Is Successful
print("Connected")
final_str = """INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
if __name__ == "__main__":
# This ignores the warnings regarding Data Truncation
# from the Yahoo precision to Decimal(19,4) datatypes
warnings.filterwarnings('ignore')
# Loop over the tickers and insert the daily historical
# data into the database
tickers = obtain_list_of_db_tickers()
lentickers = len(tickers)
for i, t in enumerate(tickers):
print(
"Adding data for %s: %s out of %s" %
(t[1], i 1, lentickers)
)
yf_data = get_daily_historic_data_yahoo(t[1])
insert_daily_data_into_db('1', t[0], yf_data)
print("Successfully added Yahoo Finance pricing data to DB.")
錯誤
Traceback (most recent call last):
File "/home/quant/price_retrieval.py", line 106, in <module>
insert_daily_data_into_db('1', t[0], yf_data)
File "/home/quant/price_retrieval.py", line 88, in insert_daily_data_into_db
cur.executemany(final_str, daily_data)
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 230, in executemany
return self._do_execute_many(
File "/home/quant/.local/lib/python3.8/site-packages/MySQLdb/cursors.py", line 255, in _do_execute_many
v = values % escape(next(args), conn)
TypeError: not enough arguments for format string
uj5u.com熱心網友回復:
我不是資料科學家,所以可能有一種更優雅的方法可以直接用 Pandas 修復它。但是我通常使用 MySQL(以及任何 SQL 驅動程式)的方式是為其提供 Python 元組串列。
如果您決議 Pandas 資料框的每一行for row in df.itertuples():并仔細制作每個元組 - 確保型別與 SQL 表匹配,一切都應該有效;)
例子:
def insert_daily_data_into_db(data_vendor_id, symbol_id, daily_data):
'''
Takes a list of tuples of daily data and adds it to the MySQL database.
Appends the vendor ID and symbol ID to the data.
daily_data: List of tuples of the OHLC data (with adj_close and volume)
'''
# Create the time now
now = datetime.datetime.utcnow()
df = pd.DataFrame(data=daily_data[0])
daily_data = []
created_date = now
last_updated_date = now
for row in df.itertuples():
_index = row[0] # discard
date = row[1]
open = row[2]
high = row[3]
low = row[4]
close = row[5]
adj_close_price = row[6]
volume = row[7]
daily_data.append((int(data_vendor_id), symbol_id, date, created_date, last_updated_date, open, high, low, close, volume, adj_close_price))
# Connect to the MySQL instance
con = mdb.connect(host="localhost", user="user", password="yourpassword",
db="yourdbname", port=3306)
final_str = """
INSERT INTO daily_price (data_vendor_id, symbol_id, price_date, created_date,
last_updated_date, open_price, high_price, low_price, close_price, volume, adj_close_price)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
with con:
cur = con.cursor()
cur.executemany(final_str, daily_data)
con.commit()
我盡量不要過多地篡改您現有的代碼。足以讓它發揮作用。
我認為那里發生的事情是,您在技術上向它傳遞了一個 Pandas 資料幀串列,串列中只有一個 Pandas 資料幀。相反,您想要的是一個元組串列,每個元組有 11 個要解包的欄位。
也許您的意思是直接傳遞資料幀,即不包含在串列中,但我仍然認為這不正確,因為 1) 資料幀中有一個“索引”列會給出錯誤的結果 2) 您需要在資料幀上呼叫一些方法以僅檢索值(而不是列的標題)并將其轉換為正確的元組串列。這可能是非常可行的,但我會把它留給你去發現。
我還假設您的表架構是這樣的:
CREATE TABLE IF NOT EXISTS daily_price (
data_vendor_id INT,
symbol_id INT,
price_date DATETIME,
created_date DATETIME,
last_updated_date TIMESTAMP,
open_price VARCHAR(256),
high_price VARCHAR(256),
low_price VARCHAR(256),
close_price VARCHAR(256),
volume INT,
adj_close_price VARCHAR(256)
);
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/384394.html
上一篇:突出顯示列熊貓中的最大值和最小值
