由于能夠使用熊貓的限制(不允許),我正在嘗試在兩個 csv 檔案之間進行左連接操作。我正在掙扎。下面是一個例子:
import csv
def read_csv(path):
file = open(path, "r")
content_list = []
for line in file.readlines():
record = line.split(",")
for item in range(len(record)):
record[item] = record[item].replace("\n","")
content_list.append(tuple(record))
return content_list
lookup_list = read_csv("lookup.csv")
data_list = read_csv("data.csv")
print("list with id and name:")
print(lookup_list)
print("list with id, age, weight:")
print(data_list)
result =list()
data_dict = {x[0]: x for x in data_list[1:]}
for left in lookup_list[1:]:
if left[0] in data_dict:
result.append(data_dict.get(left[0]) left[1:])
print("Result of merge:")
print(result)
帶有 id 和 name 的串列:
[('id', 'name'), ('123', 'Robin'), ('221', 'Larry'), ('331', 'Wilson'), ('412', 'Jack') ]
包含 id、年齡、體重的串列:
[('id', 'age', 'weight'), ('123', '47', '320'), ('221', '47', '190'), ('331', '25' ', '225'), ('412', '21', '180'), ('110', '14', '150')]
合并結果:
[('123', '47', '320', '羅賓'), ('221', '47', '190', '拉里'), ('331', '25', '225', '威爾遜'), ('412', '21', '180', '杰克')]
由于 lookup_list 沒有 id 110 的條目,因此它不包含在結果中。我需要將它包含在結果中,“名稱”值為空。這是我掙扎的地方。
使用 pandas 容易得多,但我們的自動化工程師限制我們只能使用標準 python 發行版中包含的庫/模塊。
在此先感謝您的幫助。
uj5u.com熱心網友回復:
該解決方案按照我的描述執行,并將串列讀入字典。然后,您可以使用合并的結果撰寫一個新的 CSV 檔案。
import csv
from pprint import pprint
def read_csv(path):
file = open(path, "r")
contents = {}
header = []
for line in file.readlines():
record = line.strip().split(",")
if not header:
header = record
else:
contents[record[0]] = {a:b for a,b in zip(header,record)}
return contents
lookup_list = read_csv("xxx.csv")
data_list = read_csv("yyy.csv")
print("list with id and name:")
pprint(lookup_list)
print("list with id, age, weight:")
pprint(data_list)
for k,v in data_list.items():
if k not in lookup_list:
lookup_list[k] = {'name':''}
lookup_list[k].update(v)
print("Result of merge:")
pprint(lookup_list)
輸出:
list with id and name:
{'123': {'id': '123', 'name': 'Robin'},
'221': {'id': '221', 'name': 'Larry'},
'331': {'id': '331', 'name': 'Wilson'},
'412': {'id': '412', 'name': 'Jack'}}
list with id, age, weight:
{'110': {'age': '14', 'id': '110', 'weight': '150'},
'123': {'age': '47', 'id': '123', 'weight': '320'},
'221': {'age': '47', 'id': '221', 'weight': '190'},
'331': {'age': '25', 'id': '331', 'weight': '255'},
'412': {'age': '21', 'id': '412', 'weight': '180'}}
Result of merge:
{'110': {'age': '14', 'id': '110', 'name': '', 'weight': '150'},
'123': {'age': '47', 'id': '123', 'name': 'Robin', 'weight': '320'},
'221': {'age': '47', 'id': '221', 'name': 'Larry', 'weight': '190'},
'331': {'age': '25', 'id': '331', 'name': 'Wilson', 'weight': '255'},
'412': {'age': '21', 'id': '412', 'name': 'Jack', 'weight': '180'}}
跟進
為了進一步討論,這里是如何在 sqlite 中完成的。我想每個人都需要評估這是否更好。
import csv
from pprint import pprint
import sqlite3
db = sqlite3.connect(":memory:")
db.execute( 'CREATE TABLE lookup (id int, name text);' )
db.execute( 'CREATE TABLE data (id int, age int, weight int);' )
def read_csv(db, table, path):
cur = db.cursor()
header = []
for line in open(path).readlines():
if not header:
header = line.rstrip()
continue
record = line.strip().split(",")
sql = f"INSERT INTO {table} ({header}) VALUES ("
sql = ','.join(['?']*len(record)) ");"
cur.execute(sql, record)
lookup_list = read_csv(db, "lookup", "xxx.csv")
data_list = read_csv(db, "data", "yyy.csv")
cur = db.cursor()
for row in cur.execute(
"SELECT data.id,lookup.name,data.age,data.weight FROM data LEFT JOIN lookup ON lookup.id = data.id;"):
print(row)
輸出:
(123, 'Robin', 47, 320)
(221, 'Larry', 47, 190)
(331, 'Wilson', 25, 255)
(412, 'Jack', 21, 180)
(110, None, 14, 150)
uj5u.com熱心網友回復:
sqlite3 包含在標準 Python 發行版中。
您可以創建一個記憶體資料庫,將 csv 內容放入表中,然后進行實際的左連接。
請參閱此答案以從 csv 創建 sqlite 資料庫 使用 Python 將 CSV 檔案匯入 sqlite3 資料庫表
使用該答案中顯示的方法創建表格。假設您呼叫了表t_lookup和t_data,并且呼叫了資料庫連接conn1。
cursor = conn1.cursor()
cursor.execute('''
SELECT t1.*, t2.name
FROM
t_data t1
LEFT JOIN
t_lookup t2
ON t1.id = t2.id;''')
left_result = cursor.fetchall()
for row in left_result:
print(row)
conn1.close()
轉載請註明出處,本文鏈接:https://www.uj5u.com/houduan/400253.html
