注意:我無論如何都不是程式員,但我一直在提問和做教程,試圖了解如何在治療之外為我的問題找到解決方案。
無論如何,我有一個腳本可以下載 .csv 檔案,然后將它們匯入 Postgres 資料庫。我注意到 .csv 有很多“浮點”資料型別,我需要它們是整數,所以我找到了以下代碼片段并使用它。我正在學習如何使用熊貓,但在
df.iloc[index, c] = int(x) 行。
它顯示“ValueError:無法將浮點NaN轉換為整數”
我最終注釋掉了該片段中的幾行,它可以作業,但它將列轉換為浮點數并添加 .0。
在我的腦海中,我想我沒有確定正確的列,但我不知道如何判斷。
如果我注釋掉重復行下方和回傳 df 行上方的所有內容..它運行沒有錯誤,但資料不準確。它在應該只是整數的欄位中顯示浮點資料。
我在想我需要識別需要被視為浮點數的每一列并將其設為整數,但不確定如何在腳本中執行此操作,以便查看下載的任何內容并修復該問題。因為我讓腳本登錄到一個站點并下載 .csv 檔案,然后列印它們并將它們放入 postgres。
print("Printing csv present at " file_path)
df = pd.read_csv(file_path, index_col=None, header=0)
df = df.loc[:,~df.columns.duplicated()]
for index, row in df.iterrows():
for c, x in enumerate(row):
if isinstance(x, float):
df.iloc[index, c] = int(x)
return df
我一邊學習一邊學習,所以如果事情看起來不像“教科書”,但我練習得越多,我得到的效果就越好。
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from subprocess import CREATE_NO_WINDOW
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from datetime import timedelta, datetime
import time
import json
import sys
import csv
import random
import requests
from bs4 import BeautifulSoup
import logging, traceback
from os.path import expanduser
import os,glob
import pandas as pd
# All credentials are housed in the user environment under the designated variable.
cf_user = os.environ.get('CF_USER') # Windows Variable
cf_password = os.environ.get('CF_PASS') # Windows Variable
home = expanduser("~")
cf_url = 'website.url.com'
#directory where cf files are downloaded
directory = home "/Downloads/"
def getDriver():
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--start-maximized")
driver = webdriver.Chrome(executable_path=r'C:\chromedriver_win\chromedriver.exe', options=chrome_options)
return driver
driver = getDriver()
def waitAndGetElement(delay, condition):
elem = WebDriverWait(driver, delay).until(condition)
return elem
def bulkDownload(url, file_format):
driver.get(url)
downloadBtn = waitAndGetElement(600000, EC.presence_of_element_located((By.XPATH, '//a[text()=" Download Now"]')))
time.sleep(5)
downloadBtn.click()
current_url = url = driver.current_url
export_num = current_url.split("/")[-1]
file_path = file_format.format(export_num)
while not os.path.exists(file_path):
time.sleep(2)
print("Printing csv present at " file_path) # This takes the downloaded file and prints it to the csvs folder
df = pd.read_csv(file_path, index_col=None, header=0)
df = df.loc[:,~df.columns.duplicated()]
for index, row in df.iterrows():
for c, x in enumerate(row):
if isinstance(x, float):
df.iloc[index, c] = int(x)
return df
driver.get("https://" cf_url "/users/sign_in")
email_input = waitAndGetElement(300, EC.presence_of_element_located((By.ID, 'user_email')))
email_input.send_keys(cf_user)
pwd_input = waitAndGetElement(300, EC.presence_of_element_located((By.ID, 'user_password')))
pwd_input.send_keys(cf_password)
submit_btn = waitAndGetElement(300, EC.presence_of_element_located((By.XPATH, '//input[@name="commit"]')))
submit_btn.click()
ADDRESS_FILE_NAME_FORMAT = home "/Downloads/address_export_{}.csv"
ORDER_FILE_NAME_FORMAT = home "/Downloads/customer_export_{}.csv"
SIGNATURE_FILE_NAME_FORMAT = home "/Downloads/signature_export_{}.csv"
PRODUCT_FILE_NAME_FORMAT = home "/Downloads/product_export_{}.csv"
df = bulkDownload("https://" cf_url "/path_to_sign/url/site_admin/products/export?model=product", PRODUCT_FILE_NAME_FORMAT)
df.to_csv('csvs/products.csv', index = False)
df = bulkDownload("https://" cf_url "/path_to_sign/url/site_admin/signatures/export?filterrific[sorted_by]=signed_at_desc&model=signature", SIGNATURE_FILE_NAME_FORMAT)
df.to_csv('csvs/signatures.csv', index = False)
df = bulkDownload("https://" cf_url "/path_to_sign/url/site_admin/orders/export?filterrific[sorted_by]=created_at_desc&model=customer", ORDER_FILE_NAME_FORMAT)
df.to_csv('csvs/customers.csv', index = False)
df = bulkDownload("https://" cf_url "/path_to_sign/url/site_admin/addresses/export?model=address", ADDRESS_FILE_NAME_FORMAT)
df.to_csv('csvs/address.csv', index = False)
driver.quit()
uj5u.com熱心網友回復:
您需要確保浮點數確實具有值,否則無法將其轉換為整數。使用if math.isnan(x):進行額外檢查,并在腳本頂部添加匯入數學陳述句。
for index, row in df.iterrows():
for c, x in enumerate(row):
if isinstance(x, float):
if math.isnan(x):
df.iloc[index, c] = 0 # zero or whatever value when NaN is identified
else:
df.iloc[index, c] = int(x)
return df
轉載請註明出處,本文鏈接:https://www.uj5u.com/shujuku/346996.html
