import os
import sys
valid_lines = []
corrupt_lines = []
'''
The validate_data function will check the students.csv line by line for corrupt data.
- Valid lines should be added to the valid_lines list.
- Invalid lines should be added to the corrupt_lines list.
Example input: 0896801,Kari,Wilmore,1970-06-18,INF
This data is valid and the line should be added to the valid_lines list unchanged.
Example input: 0773226,Junette,Gur_ry,1995-12-05,
This data is invalid and the line should be added to the corrupt_lines list in the following format:
0773226,Junette,Gur_ry,1995-12-05, => INVALID DATA: ['0773226', 'Gur_ry', '']
In the above example the studentnumber does not start with '08' or '09',
the last name contains a special character and the student program is empty.
Don't forget to put the students.csv file in the same location as this file!
'''
def validate_data(line):
# TYPE YOUR SOLUTION CODE HERE
#CSV STUDENT NUMBER
student_number_valid = True
index = 0
studentnumber, firstname, lastname, birthdate, studyprogram = line.split(",")
for element in line.split(","):
if index == 0:
student_number = element
if len(studentnumber) > 0:
csv_s_n = studentnumber[0]
csv_s_n2 = studentnumber[1]
if csv_s_n == '0' and csv_s_n2 == '8' or '9':
valid_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
else:
corrupt_lines.append(studentnumber)
#CSV NAME
if len(firstname) > 0:
if firstname.isalpha() == True:
valid_lines.append(firstname)
else:
corrupt_lines.append(firstname)
else:
corrupt_lines.append(firstname)
if len(lastname) > 0:
if lastname.isalpha() == True:
valid_lines.append(lastname)
else:
corrupt_lines.append(lastname)
else:
corrupt_lines.append(lastname)
#CSV BIRTHDAY
if len(birthdate.split()) == 3:
year1, month1, day1 = birthdate.split("-")
year1=int(year1)
valid_months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
valid_days = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10","11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"]
thirty_day_months = ["04", "06", "09", "11"]
if not year1 in range(1960, 2004 1):
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if not month1 in valid_months:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
if month1 == "02":
valid_days.remove("31")
valid_days.remove("30")
valid_days.remove("29")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for february
if month1 in thirty_day_months:
valid_days.remove("31")
if not day1 in valid_days:
birthdate = (f"{year1}-{month1}-{day1}")
corrupt_lines.append(birthdate)
# validate day for 30-day months
else:
birthdate = (f"{year1}-{month1}-{day1}")
valid_lines.append(birthdate)
else:
corrupt_lines.append(birthdate)
#CSV STUDYPROGRAM
if len(studyprogram) > 1:
if studyprogram == "INF" or "TINF" or "CMD" or "AI":
valid_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
else:
corrupt_lines.append(studyprogram)
def main(csv_file):
with open(os.path.join(sys.path[0], csv_file), newline='') as csv_file:
# skip header line
next(csv_file)
for line in csv_file:
validate_data(line.strip())
print('### VALID LINES ###')
print("\n".join(valid_lines))
print('### CORRUPT LINES ###')
print("\n".join(corrupt_lines))
if __name__ == "__main__":
main('students.csv')
如您所見,函式validate_data應該檢查匯入的檔案中是否存在損壞和有效的行,然后將它們附加到正確的串列中,并列印它們。它可以作業,但正如您可能看到的那樣,這些行不會列印在一行中。
我確定我必須制作另外兩個串列才能將正確的資料附加到一行中,對損壞的資料執行相同的操作,但是當我嘗試時它失敗了。
uj5u.com熱心網友回復:
您根據每個驗證將該行添加到每個串列中。因此,您將多次將其添加到每個串列中,具體取決于哪些驗證成功和失敗。
您應該只將它添加到一個串列或另一個串列 - 如果它未能通過任何驗證,請將其添加到corrupt_list,并且僅在valid_list所有驗證成功時才添加到。
執行此操作的簡單方法是corrupt_list在每次驗證失敗時添加,然后從函式回傳。如果您完成所有驗證,請添加到valid_list.
您不應該將單個欄位附加到valid_linesand corrupt_lines,它們應該包含整行。
csv_s_n2 == '8' or '9'如果變數與任一值相等,則測驗的內容不是正確的。請參閱為什么“a == x or y or z”總是評估為 True?我如何將“a”與所有這些進行比較?
def validate_data(line):
# TYPE YOUR SOLUTION CODE HERE
#CSV STUDENT NUMBER
student_number_valid = True
index = 0
studentnumber, firstname, lastname, birthdate, studyprogram = line.split(",")
if len(studentnumber) > 0:
csv_s = studentnumber[0:2]
if csv_s not in ('08', '09'))
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV NAME
if len(firstname) > 0:
if not firstname.isalpha():
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
if len(lastname) > 0:
if not lastname.isalpha():
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV BIRTHDAY
if len(birthdate.split('-')) == 3:
year1, month1, day1 = birthdate.split("-")
year1=int(year1)
valid_months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
valid_days = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10","11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31"]
thirty_day_months = ["04", "06", "09", "11"]
if not year1 in range(1960, 2004 1):
corrupt_lines.append(line)
return
if not month1 in valid_months:
corrupt_lines.append(line)
return
if month1 == "02":
# validate day for february
valid_days.remove("31")
valid_days.remove("30")
valid_days.remove("29")
elif month1 in thirty_day_months:
# validate day for 30-day months
valid_days.remove("31")
if not day1 in valid_days:
corrupt_lines.append(line)
return
else:
corrupt_lines.append(line)
return
#CSV STUDYPROGRAM
if studyprogram not in ("INF", "TINF", "CMD", "AI"):
corrupt_lines.append(line)
return
valid_lines.append(line)
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/521509.html
上一篇:選擇元組給定區間的兩個元素
下一篇:在Python中查找和替換字符
