多標簽分類形狀問題-有解無憂

資料集：

def set_labels(data):
    labels = list(dict(filter(lambda x: x[0] != 'text', data.items())).values())
    return data['text'], labels

train_dataset.data = tf.data.experimental.make_csv_dataset(
            self.path   '1',
            batch_size=1,
            num_epochs=1,
            shuffle=False
        ).map(self.set_labels)

CSV（4條記錄只是為了重現）：

0,1,10,11,12,13,14,15,16,17,18,19,2,20,21,22,23,24,25,26,27,28,29,3,30,31,32,33,34,35,36,37,38,39,4,40,41,42,43,44,45,46,47,48,49,5,50,51,52,53,54,55,56,57,58,59,6,60,61,62,63,64,65,66,67,68,69,7,70,71,72,73,74,75,76,77,78,79,8,80,81,82,83,84,85,86,87,88,89,9,90,91,92,93,94,95,96,text
0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,c   c using gcc version 4.5.2 64bit gcc -o3 4.5.0 linux x86_64 read cout << hello world printf hello world mean sequence depends entirely specifi compiler implementation version probably compiler option edit state use gcc 4.5.2 fairly new seems like 4.5.2 introduces additional 64bit register fiddling sequence whatever reason save 64bit rax rd zeroing - make absolutely sense least much interesting 3 argument call sequence g   -o1 -s source.cpp lead c_proc .lcx string stack pointer involved cpp_proc see regard rbo
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,writing simple program matlab wondering best way ensure value user inputting proper integer currently using however really know must better way work time would also like add error checking ala try catch block brand new matlab input would great edit2 seems working anything noticeably wrong isinteger defined accepted answer following used directly code check non-integer input including empty infinite imaginary value work correctly scalar input test whether multi-dimensional array contains integer use edit test integer value restrict valid value positive integer add num_dice > 0 @majorapus answer use force user input integer looping succumb demand
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,need send asynchronous email async action understand following error happening use class project use form without error everything quiet error asynchronous action method esqueciminhasenhaasync return task whic cannot executed synchronously action email service stack trace [invalidoperationexception asynchronous action metho esqueciminhasenha return task cannot execute synchronously. system.web.mvc.async.taskasyncactiondescriptor.execute controllercontex controllercontext idictionary2 parameter  11 system.web.mvc.controlleractioninvoker.invokeactionmethod controllercontex controllercontext actiondescriptor actiondescriptor idictionary parameter  2 system.web.mvc.<>c__displayclass15.b__12  55 system.web.mvc.controlleractioninvoker.invokeactionmethodfilter iactionfilte filter actionexecutingcontext precontext func1 continuation  25 system.web.mvc.<>c__displayclass17.<invokeactionmethodwithfilters>b__14  22 system.web.mvc.controlleractioninvoker.invokeactionmethodwithfilters controllercontex controllercontext ilist1 filter actiondescriptor actiondescriptor idictionary2 parameter  18 system.web.mvc.controlleractioninvoker.invokeaction controllercontex controllercontext string actionname  52 system.web.mvc.controller.executecore  9 musiartist.mvc.areas.admin.controllers.basecontroller.executecore fbackupbitbucketnegrini.musiartistsrcmusiartist.mvcareasadmincontrollersbasecontroller.cs2 system.web.mvc.controllerbase.execute requestcontext requestcontext  180 system.web.mvc.<>c__displayclass19.<beginexecute>b__13  18 system.web.mvc.async.asyncresultwrapper.<.cctor>b__0 iasyncresul asyncresult action action  1 system.web.mvc.async.wrappedasyncresult2.callenddelegate iasyncresul asyncresult  1 system.web.mvc.async.wrappedasyncresultbase1.end  4 system.web.mvc.controller.endexecute iasyncresult asyncresult  2 system.web.mvc.controller.system.web.mvc.async.iasynccontroller.endexecute iasyncresul asyncresult  1 system.web.mvc.mvchandler.<beginprocessrequest>b__5 iasyncresul asyncresult processrequeststate innerstate  2 system.web.mvc.async.wrappedasyncvoid1.callenddelegate iasyncresul asyncresult  2 system.web.mvc.async.wrappedasyncresultbase`1.end  4 system.web.mvc.mvchandler.endprocessrequest iasyncresult asyncresult  28 system.web.mvc.mvchandler.system.web.ihttpasynchandler.endprocessrequest iasyncresul result   system.web.callhandlerexecutionstep.system.web.httpapplication.iexecutionstep.execute  9765121 system.web.httpapplication.executestep iexecutionstep step boolean& completedsynchronously  155 made override disableasyncsupport method controller set getter false everything worked perfectly believe inheriting controller controller controllerbase method must behave differently
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,got generic list look like picinfo class look like trying find example return anonymous type need generic list someone help would appreciate also please explain code learning process thanks advance [edit] generic list contains list object object picture every picture file name hash value data irrelevant point picture name duplicate file name want get list duplicate file name generic list pi picture also hash value file name identical want another list identical file name also identical hash value [ edit] something like work whether best method sure efficient element iterating list get count hope code complicated need explaining always think best work anyway anythign confusing ask explain want second filter filtering filename hash duplicate need extend lambda count check hash obviously want filename end easy enough select get enumerable list filename possibly distinct want appear nb code written hand forgive typo may compile first time etc ;- edit explain code - spoiler ;- english want following item list want select one item list filename breaking iterate list select thing based criterion use method condition method one item list filename clearly need count list use pi.count however condition counting filename match pas expression tell count thing expression work item list return true want count false want filename interested x item filtering want count many item filename x.filename thus expression z=>z.filename==x.filename z variable expression x.filename context unchanging iterate z course put criterion >1 get boolean value want wanted duplicate considering filename hashvalue would expand part count z=>z.filename==x.filename && z.hashvalue==x.hashvalue final code get distinct value would list pi = new list list filt = pi.where x=>pi.count z=>z.filename==x.filename && z.hashvalue==x.hashvalue >1 .tolist ; wanted duplicate considering filename hashvalue would expand part count compare hashvalue well since array want use sequenceequal method compare value value final code get distinct value would note create intermediary list went straight original list could go intermediate list code would much going original filtered list

模型：

vectorizer = tf.keras.layers.experimental.preprocessing.TextVectorization(
    output_mode='count',
    max_tokens=4000,
    pad_to_max_tokens=True
)
vectorizer.adapt(train_dataset.data.map(lambda x, y: x))

model = models.Sequential(
    [
        vectorizer,
        layers.Embedding(4000, 64),
        layers.GlobalMaxPool1D(),
        layers.Dense(97, activation='sigmoid'),
    ]
)

model.compile(
    optimizer=optimizers.Adam(),
    loss=losses.BinaryCrossentropy(),
    metrics=[f1_metric],
    run_eagerly=True
)

我使用時出錯history = model.fit(train_dataset.data, epochs=5, batch_size=1)：

Traceback (most recent call last):
  File "/Users/patrykbart/dev/vertex/tagify_vertex/src/test.py", line 68, in <module>
    history = model.fit(
  File "/Users/patrykbart/miniforge3/envs/tagify/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "/Users/patrykbart/miniforge3/envs/tagify/lib/python3.9/site-packages/keras/backend.py", line 5158, in binary_crossentropy
    return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
ValueError: `logits` and `labels` must have the same shape, received ((1, 97) vs (97, 1)).

但是當我使用history = model.fit(train_dataset.data.batch(2), epochs=5, batch_size=1)一切正常時，培訓給出了有效的輸出。那么如何將完整資料集傳遞給 model.fit()？問題出在哪里？

uj5u.com熱心網友回復：

也許嘗試使用這里的實作。正如作者提到的，您可以在微觀、宏觀和加權 f1 分數之間進行選擇：

def tf_f1_score(y_true, y_pred):
    """Computes 3 different f1 scores, micro macro
    weighted.
    micro: f1 score accross the classes, as 1
    macro: mean of f1 scores per class
    weighted: weighted average of f1 scores per class,
            weighted from the support of each class


    Args:
        y_true (Tensor): labels, with shape (batch, num_classes)
        y_pred (Tensor): model's predictions, same shape as y_true

    Returns:
        tuple(Tensor): (micro, macro, weighted)
                    tuple of the computed f1 scores
    """

    f1s = [0, 0, 0]

    y_true = tf.cast(y_true, tf.float64)
    y_pred = tf.cast(y_pred, tf.float64)

    for i, axis in enumerate([None, 0]):
        TP = tf.math.count_nonzero(y_pred * y_true, axis=axis)
        FP = tf.math.count_nonzero(y_pred * (y_true - 1), axis=axis)
        FN = tf.math.count_nonzero((y_pred - 1) * y_true, axis=axis)

        precision = TP / (TP   FP)
        recall = TP / (TP   FN)
        f1 = 2 * precision * recall / (precision   recall)

        f1s[i] = tf.reduce_mean(f1)

    weights = tf.reduce_sum(y_true, axis=0)
    weights /= tf.reduce_sum(weights)

    f1s[2] = tf.reduce_sum(f1 * weights)

    micro, macro, weighted = f1s
    return macro

這是一個作業示例：

import tensorflow as tf

def set_labels(data):
    labels = list(dict(filter(lambda x: x[0] != 'text', data.items())).values())
    return data['text'], labels

train_dataset = tf.data.experimental.make_csv_dataset(
            '/content/data.csv',
            batch_size=1,
            num_epochs=1,
            shuffle=False
        ).map(set_labels)

vectorizer = tf.keras.layers.TextVectorization(
    output_mode='count',
    max_tokens=4000,
    pad_to_max_tokens=True
)
vectorizer.adapt(train_dataset.map(lambda x, y: x))

model = tf.keras.Sequential(
    [
        vectorizer,
        tf.keras.layers.Embedding(4000, 64),
        tf.keras.layers.GlobalMaxPool1D(),
        tf.keras.layers.Dense(97, activation='sigmoid'),
    ]
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics=[tf_f1_score]
)
model.fit(train_dataset, epochs=2)

Epoch 1/2
4/4 [==============================] - 2s 39ms/step - loss: 0.6864 - tf_f1_score: 0.0348
Epoch 2/2
4/4 [==============================] - 0s 17ms/step - loss: 0.6625 - tf_f1_score: 0.0348
<keras.callbacks.History at 0x7f3d78890ad0>

更新 1：使用tfa.metrics.F1Scorewith make_csv_dataset：

import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_addons as tfa

df = pd.DataFrame(data= np.random.randint(2, size=(5, 97))).astype(int)
df['text'] = ['test hello world test hello world test hello world test hello']*5
df.to_csv('data.csv', index=False)

def set_labels(data):
    labels = list(dict(filter(lambda x: x[0] != 'text', data.items())).values())
    return data['text'], labels

train_dataset = tf.data.experimental.make_csv_dataset(
            '/content/data.csv',
            batch_size=1,
            num_epochs=1,
            shuffle=False
        ).map(set_labels)

train_dataset = train_dataset.map(lambda x, y: (tf.squeeze(x, axis=-1), tf.squeeze(y, axis=-1))).batch(2)

tokenizer = tf.keras.layers.TextVectorization(
    output_mode='int',
    max_tokens=4000,
    output_sequence_length=4000
)

tokenizer.adapt(train_dataset.map(lambda x, y: x))

model = tf.keras.Sequential(
    [
        tokenizer,
        tf.keras.layers.Embedding(4000, 64),
        tf.keras.layers.GlobalMaxPool1D(),
        tf.keras.layers.Dense(97, activation='sigmoid')
    ]
)

model.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=[tfa.metrics.F1Score(num_classes=97)]
)
model.fit(
   train_dataset,
    epochs=10
)

轉載請註明出處，本文鏈接：https://www.uj5u.com/qukuanlian/426837.html

標籤：Python 张量流机器学习喀拉斯

上一篇：張量維度必須相等

下一篇：ValueError：無法擠壓dim[1]，預期維度為1，'{{nodeSqueeze}}=Squeeze[T=DT_FLOAT，squeeze_dims=[-1]]得到60