我正在嘗試從 CSV 檔案中獲取隨機產品描述的情感分數,我遇到了我認為是 API 回應時間的問題,不確定我是否正在錯誤地/低效地使用 API 遍歷 CSV但是要獲得 CSV 中所有 300 多個條目的結果需要很長時間,每當我想將新更改推送到我的代碼庫時,我都需要等待 API 每次重新評估這些條目,這是我的代碼我為加載 CSV 檔案和獲取情緒分數
<?php
set_time_limit(500); // extended timeout due to slow / overwhelmed API response
function extract_file($csv) { // CSV to array function
$file = fopen($csv, 'r');
while (!feof($file)) {
$lines[] = fgetcsv($file, 1000, ',');
}
fclose($file);
return $lines;
}
$the_file = 'dataset.csv';
$csv_data = extract_file($the_file);
$response_array = []; // array container to hold returned sentiment values from among prduct descriptions
for($x = 1; $x < count($csv_data) - 1; $x ) { // loop through all descriptions
echo $x; // show iteration
$api_text = $csv_data[$x][1];
$api_text = str_replace('&', ' and ', $api_text); // removing escape sequence characters, '&' breaks the api :)
$api_text = str_replace(" ", " ", $api_text); // serializing string
$text = 'text=';
$text .=$api_text; // serializing string further for the API
//echo 'current text1: ', $api_text;
$curl = curl_init(); // API request init
curl_setopt_array($curl, [
CURLOPT_URL => "https://text-sentiment.p.rapidapi.com/analyze",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => $text,
CURLOPT_HTTPHEADER => [
"X-RapidAPI-Host: text-sentiment.p.rapidapi.com",
"X-RapidAPI-Key: <snip>",
"content-type: application/x-www-form-urlencoded"
],
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
echo "cURL Error #:" . $err;
} else {
echo $response;
}
$json = json_decode($response, true); // convert response to JSON format
if(isset($json["pos"]) == false) { // catching response error 100, makes array faulty otherwise
continue;
}
else {
array_push($response_array, array($x, " " => $json["pos"], "-" => $json["neg"])); // appends array with sentiment values at current index
}
}
echo "<br>";
echo "<br> results: ";
echo "<p>";
for ($y = 0; $y < count($response_array); $y ){ // prints out all the sentiment values
echo "<br>";
echo print_r($response_array[$y]);
echo "<br>";
}
echo "</p>";
echo "<br>the most negative description: ";
$max_neg = array_keys($response_array, max(array_column($response_array, '-')));
//$max_neg = max(array_column($response_array, '-'));
echo print_r($csv_data[$max_neg[0]]);
echo "<br>the most positive description: ";
$max_pos = array_keys($response_array, max(array_column($response_array, ' ')));
echo print_r($csv_data[$max_pos[0]]);
?>
此代碼片段的目的是在 csv 的描述列中找到最消極和最積極的情緒,并根據它們的索引將它們列印出來,我只對找到具有最多積極和消極情緒詞的描述感興趣數字而不是整體情緒的百分比
該檔案可以在這個git repo中找到
感謝您的任何建議
uj5u.com熱心網友回復:
這可以通過創建快取檔案來實作。
cache.json此解決方案使用產品名稱作為每個條目的鍵創建一個包含 API 結果的檔案。
在后續呼叫中,它將使用快取值(如果存在)。
set_time_limit(500);
function file_put_json($file, $data)
{
$json = json_encode($data, JSON_PRETTY_PRINT);
file_put_contents($file, $json);
}
function file_get_json($file, $as_array=false)
{
return json_decode(file_get_contents($file), $as_array);
}
function file_get_csv($file, $header_row=true)
{
$handle = fopen($file, 'r');
if ($header_row === true)
$header = fgetcsv($handle);
$array = [];
while ($row = fgetcsv($handle)) {
if ($header_row === true) {
$array[] = array_combine($header, array_map('trim', $row));
} else {
$array[] = array_map('trim', $row);
}
}
fclose($handle);
return $array;
}
function call_sentiment_api($input)
{
$text = 'text=' . $input;
$curl = curl_init();
curl_setopt_array($curl, [
CURLOPT_URL => "https://text-sentiment.p.rapidapi.com/analyze",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => $text,
CURLOPT_HTTPHEADER => [
"X-RapidAPI-Host: text-sentiment.p.rapidapi.com",
"X-RapidAPI-Key: <snip>",
"content-type: application/x-www-form-urlencoded"
],
]);
$response = curl_exec($curl);
$err = curl_error($curl);
curl_close($curl);
if ($err) {
throw new Exception("cURL Error #:" . $err);
}
return $response;
}
$csv_data = file_get_csv('dataset.csv');
if (file_exists('cache.json')) {
$cache_data = file_get_json('cache.json', true);
} else {
$cache_data = [];
}
$cache_names = array_keys($cache_data);
$output = [];
foreach ($csv_data as $csv) {
$product_name = $csv['name'];
echo $product_name . '...';
if (in_array($product_name, $cache_names)) {
echo 'CACHED...' . PHP_EOL;
continue;
}
$description = urlencode(str_replace('&', ' and ', $csv['description']));
$response = call_sentiment_api($description);
echo 'API...' . PHP_EOL;
$json = json_decode($response, true);
$cache_data[$product_name] = $json;
}
file_put_json('cache.json', $cache_data);
echo 'SAVE CACHE!' . PHP_EOL . PHP_EOL;
$highest_pos = 0;
$highest_neg = 0;
$pos = [];
$neg = [];
foreach ($cache_data as $name => $cache) {
if (!isset($cache['pos']) || !isset($cache['neg'])) {
continue;
}
if ($cache['pos'] > $highest_pos) {
$pos = [$name => $cache];
$highest_pos = $cache['pos'];
}
if ($cache['pos'] === $highest_pos) {
$pos[$name] = $cache;
}
if ($cache['neg'] > $highest_neg) {
$neg = [$name => $cache];
$highest_neg = $cache['neg'];
}
if ($cache['neg'] === $highest_neg) {
$neg[$name] = $cache;
}
}
echo "Most Positive Sentiment: " . $highest_pos . PHP_EOL;
foreach ($pos as $name => $pos_) {
echo "\t" . $name . PHP_EOL;
}
echo PHP_EOL;
echo "Most Negative Sentiment: " . $highest_neg . PHP_EOL;
foreach ($neg as $name => $neg_) {
echo "\t" . $name . PHP_EOL;
}
結果是:
Most Positive Sentiment: 4
X-Grip Lifting Straps - GymBeam
Beta Carotene - GymBeam
Chelated Magnesium - GymBeam
Creatine Crea7in - GymBeam
L-carnitine 1000 mg - GymBeam - 20 tabs
Resistance Band Set - GymBeam
Most Negative Sentiment: 2
Calorie free Ketchup sauce 320 ml - GymBeam
ReHydrate Hypotonic Drink 1000 ml - GymBeam
Vitamin E 60 caps - GymBeam
Vitamin B-Complex 120 tab - GymBeam
Zero Syrup Hazelnut Choco 350 ml - GymBeam
Bio Psyllium - GymBeam
Zero calorie Vanilla Syrup - GymBeam
uj5u.com熱心網友回復:
你需要知道時間去哪兒了。
首先確定 curl 請求中時間的去向。
我的猜測是 API 回應時間。
如果是這種情況,我有一個解決方案。同時,我將獲得用于同時執行 curl 請求的“多任務”代碼。
curl 有你需要的時間。它看起來像這樣:
'total_time' => 0.029867,
'namelookup_time' => 0.000864,
'connect_time' => 0.001659,
'pretransfer_time' => 0.00988,
'size_upload' => 0.0,
'size_download' => 8300.0,
'speed_download' => 277898.0,
'speed_upload' => 0.0,
只需添加幾行代碼
$response = curl_exec($curl);
$info = var_export(curl_getinfo($curl),true);
file_put_contents('timing.txt',$info,FILE_APPEND);
同時運行 curl 套接字。
把你的卷曲放在 curl.php
$text = $_GET['text'];
curl_setopt_array($curl, [
CURLOPT_URL => "https://text-sentiment.p.rapidapi.com/analyze",
CURLOPT_RETURNTRANSFER => true,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 30,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => $text,
CURLOPT_HTTPHEADER => [
"X-RapidAPI-Host: text-sentiment.p.rapidapi.com",
"X-RapidAPI-Key: <snip>",
"content-type: application/x-www-form-urlencoded"
],
]);
此代碼進入您的 CSV 回圈以創建所有 URL 查詢欄位以傳遞給 curl.php(例如http://127.0.0.1/curl.php?text=$text)
$query = urlencode($text);
$urls[] = array('host' => "127.0.0.1",'path' => "/curl.php?text=$query
然后處理所有的 URL。
foreach($urls as $path){
$host = $path['host'];
$path = $path['path'];
$http = "GET $path HTTP/1.0\r\nHost: $host\r\n\r\n";
$stream = stream_socket_client("$host:80", $errno,$errstr, 120,STREAM_CLIENT_ASYNC_CONNECT|STREAM_CLIENT_CONNECT);
if ($stream) {
$sockets[] = $stream; // supports multiple sockets
fwrite($stream, $http);
}
else {
$err .= "$id Failed<br>\n";
}
}
然后監視套接字并從每個套接字中檢索回應。
然后關閉套接字,直到你擁有它們。
while (count($sockets)) {
$read = $sockets;
stream_select($read, $write = NULL, $except = NULL, $timeout);
if (count($read)) {
foreach ($read as $r) {
$id = array_search($r, $sockets);
$data = fread($r, $buffer_size);
if (strlen($data) == 0) {
// echo "$id Closed: " . date('h:i:s') . "\n\n\n";
$closed[$id] = microtime(true);
fclose($r);
unset($sockets[$id]);
}
else {
$results[$id] .= $data;
}
}
}
else {
// echo 'Timeout: ' . date('h:i:s') . "\n\n\n";
break;
}
}
然后你所有的結果都在$results[].
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/512383.html
標籤:phpapi卷曲情绪分析
