這里要讀一個xml檔案,并把資料寫入csv,
python代碼
python 3.8
def read_xml3():
doc = etree.parse("D:/tmp/bbc_text/bbc-text.xml")
root=doc.getroot()
news_items =[]
with open("D:/tmp/bbc_text/bbc_text_python.csv", "w", encoding="utf-8") as f:
f.write("category,text\n")
for news_item_xml in root.getchildren():
f.write(news_item_xml.attrib['category'])
f.write(",")
f.write(news_item_xml.text.replace('"','""'))
f.write("\n")
f.flush()
f.close()
C#代碼
public class BbcNewsItem
{
public string Category { get; set; }
public string Text { get; set; }
}
public static class XmlReader
{
public static void Read()
{
XmlDocument xmlDoc = new XmlDocument();
xmlDoc.Load("D:/tmp/bbc_text/bbc-text.xml");
List<BbcNewsItem> newsItems = new List<BbcNewsItem>();
foreach (XmlNode node in xmlDoc.DocumentElement.ChildNodes)
{
XmlElement e = (XmlElement)node;
BbcNewsItem newsItem = new BbcNewsItem();
newsItem.Category = e.GetAttribute("category");
newsItem.Text = e.InnerText;
newsItems.Add(newsItem);
}
using (var writer = new StreamWriter("D:/tmp/bbc_text/bbc_text_cshape.csv"))
{
using (var csvWriter = new CsvWriter(writer, CultureInfo.InvariantCulture))
{
csvWriter.Configuration.HasHeaderRecord = true;
csvWriter.Configuration.AutoMap<BbcNewsItem>();
csvWriter.WriteRecords(newsItems);
writer.Flush();
writer.Close();
}
}
}
}
測驗后所用時間為
235.8ms ± 19.8
不過,我后來發現,我用的是.net core 3.1,因為.net core是跨平臺的,所以,并不是windows的機器碼,應該是MSIL
于是改成了.net framework 4.5,
57.6 ms ± 10.8
C++代碼
C++ 14
#include <iostream>
#include <fstream>
#include <chrono>
#include "pugixml.hpp"
using namespace std;
void findAndReplaceAll(std::string& data, std::string toSearch, std::string replaceStr)
{
// Get the first occurrence
size_t pos = data.find(toSearch);
// Repeat till end is reached
while (pos != std::string::npos)
{
// Replace this occurrence of Sub String
data.replace(pos, toSearch.size(), replaceStr);
// Get the next occurrence from the current position
pos = data.find(toSearch, pos + replaceStr.size());
}
}
int main()
{
auto start = chrono::system_clock::now();
std::cout << "Hello World!\n";
pugi::xml_document doc;
pugi::xml_parse_result result = doc.load_file("D:/tmp/bbc_text/bbc-text.xml");
if (!result)
return -1;
ofstream csv_file;
csv_file.open("D:/tmp/bbc_text/bbc_text_cpp.csv");
csv_file << "category,text" << endl;
auto children = doc.child("bbc").children();
for (auto child : children) {
string category = child.attribute("category").value();
string text = child.text().as_string();
findAndReplaceAll(text, "\"", "\"\"");
text = "\"" + text + "\"";
csv_file << category << "," << text << endl;
}
csv_file.flush();
csv_file.close();
auto end = chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end - start;
auto millis = std::chrono::duration_cast<std::chrono::milliseconds>(elapsed_seconds).count();
std::cout << millis << "\n";
string c;
std::cin >>c;
}
C++的運行時間是34.8 ms ± 1.6
還有golang和java的代碼,我放在github了:
https://github.com/EricWebsmith/lang_compare_xml
結論
| 語言 | 運行時間 |
|---|---|
| c++ | 34.8 ms ± 1.6 |
| python | 38.5 ms ± 4.88 |
| .net framework | 57.6 ms ± 10.8 |
| golang | 171.5 ms ± 12.6 |
| .net core | 235.8ms ± 19.8 |
| java | 258.6 ms ± 15.0 |
.Net Core運行時間是python的2倍,
.Net Framework運行時間是python的1/2倍,
.Net Framework運行時間是.Net Core的1/2倍,
python確實比我想象的快了很多,我以為python應該墊底的,
不過,我還是不看好python開發運用程式,型別不安全,和c#,c++相比,編程的時候,自動提示很差,其實編程效率并不高,這種編程效率也低,運行速度也慢的語言,不適合用來做運用程式,
當然,誰都沒有c++快,
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/241481.html
標籤:其他
