_THROW(out_of_range, "invalid vector<T> subscript"); 的問題-有解無憂

// RF_C2.cpp : 定義控制臺應用程式的入口點。
//

#include "stdafx.h"
#include <iostream>
#include <fstream>
#include <sstream>
#include "random_forest.h"

using namespace std;

vector<decision_tree*>  alltrees;               // 森林（決策樹集合）
vector<TupleData>       trainAll,train,test; // 樣本集
vector<int>             attributes;         // 屬性集（元素為屬性序號）

int                     trainAllNum = 0;
int                     testAllNum  = 0;
int                     MaxAttr;         // 屬性總數
int                     *ArrtNum={0};               // 屬性個數集（元素為屬性最大值）
unsigned int            F;
int                     tree_num    = 10;      // 決策樹個數
const int               leafattrnum = -1;       // 葉子節點的屬性序號

// 讀入資料
void init(char * trainname, char * testname)
{
    trainAllNum     = readData(trainAll, trainname);
    testAllNum      = readData(test, testname);
    calculate_attributes();
    double temp     = (double)trainAllNum;
    temp            = log(temp)/log(2.0);
    F               = (unsigned int)floor(temp+0.5)+1;
    if(F>MaxAttr) F = MaxAttr;
}

// 初始化訓練樣本子集
void sub_init()
{
    // 選取決策樹的訓練樣本集合
    RandomSelectData(trainAll, train);

    // 計算樣本屬性個數
    calculate_ArrtNum();
}

// 讀資料
int readData(vector<TupleData> &data, const char* fileName)
{
    ifstream fin;
    fin.open(fileName);
    string line;

    int datanum=0;

    // 每行資料作為一個樣本
    while(getline(fin,line))
    {
        TupleData d;
        istringstream stream(line);
        string str;

        // 設定每個樣本的標簽和內容
        while(stream>>str)
        {
            if(str.find('a')==0)
            {
                d.label='a';
            }
else if(str.find('b')==0)
            {
                d.label='b';
            }
else if(str.find('c')==0)
            {
                d.label='c';
            }
else if(str.find('d')==0)
            {
                d.label='d';
            }
else if(str.find('e')==0)
            {
                d.label='e';
            }
else if(str.find('f')==0)
            {
                d.label='f';
            }
else if(str.find('g')==0)
            {
                d.label='g';
            }
else if(str.find('h')==0)
            {
                d.label='h';
            }
else if(str.find('i')==0)
            {
                d.label='i';
            }
             else if(str.find('j')==0)
            {
                d.label='j';
            }
            else
            {
                int j=stringtoint(str);
                d.A.push_back(j);
            }
        }

        data.push_back(d);
        datanum++;
    }

    fin.close();
    return datanum;
}

// 生成根節點的訓練樣本子集
void RandomSelectData(vector<TupleData> &data, vector<TupleData> &subdata)
{
    int index;
    subdata.clear();
    int d = 0;
    while (d < trainAllNum)
    {
        index = rand() % trainAllNum;
        subdata.push_back(data.at(index));
        d++;
    }
}

// 計算屬性序列
void calculate_attributes()
{
    // 每個樣本必須具有相同的屬性個數
    TupleData d = trainAll.at(0);
    MaxAttr = d.A.size();
    attributes.clear();

    // 建立屬性集合attributes,元素為屬性序號
    for (int i = 0; i < MaxAttr; i++)
    {
        attributes.push_back(i);
    }

    // 初始化屬性最大值序列，元素為屬性最大值
    ArrtNum = new int[MaxAttr];
}

// 字串轉化為int
int stringtoint(string s)
{
    int sum=0;
    for(int i=0; s[i]!='\0';i++)
    {
        int j=int(s[i])-48;
        sum=sum*10+j;
    }
    return sum;
}

// 計算ArrtNum元素值
void calculate_ArrtNum()
{
    for(int i = 0; i < MaxAttr; i++) ArrtNum[i] = 0;

    // ArrtNum元素值為屬性最大值
    for (vector<TupleData>::const_iterator it = train.begin(); it != train.end(); it++)
    {
        int i = 0;

        for (vector<int>::const_iterator intt=(*it).A.begin(); intt!=(*it).A.end();intt++)
        {
            int valuemax=(*intt)+1;
            if(valuemax>ArrtNum[i]) ArrtNum[i]=valuemax;
            i++;
        }
    }
}

// 計算熵
double Entropy(double p, double s)
{
    double n = s - p;
    double result = 0;
    if (n != 0)
        result += - double(n) / s * log(double(n) / s) / log(2.0);
    if (p != 0)
        result += double(-p) / s * log(double(p) / s) / log(2.0);
    return result;
}

// 訓練一棵決策樹
int creat_classifier(decision_tree *&p, const vector<TupleData> &samples, vector<int> &attributes)
{
    if (p == NULL)
        p = new decision_tree();

    // 根據樣本真實類別，輸出葉子節點類別
    if (Allthesame(samples, 'a'))
    {
        p->node.label = 'a';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'b'))
    {
        p->node.label = 'b';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'c'))
    {
        p->node.label = 'c';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'd'))
    {
        p->node.label = 'd';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'e'))
    {
        p->node.label = 'e';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'f'))
    {
        p->node.label = 'f';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'g'))
    {
        p->node.label = 'g';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'h'))
    {
        p->node.label = 'h';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
if (Allthesame(samples, 'i'))
    {
        p->node.label = 'i';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
    if (Allthesame(samples, 'j'))
    {
        p->node.label = 'j';
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }
    // 如果屬性序列為空，當前節點就為葉子節點
    if (attributes.size() == 0)
    {
        p->node.label = Majorityclass(samples);
        p->node.attrNum = leafattrnum;
        p->childs.clear();
        return 1;
    }

    // 計算當前節點的最優屬性
    p->node.attrNum = BestGainArrt(samples, attributes);

    // 中間節點無標簽
    p->node.label = ' ';

    // 計算子節點候選屬性集合，候選集合元素越來越少
    vector<int> newAttributes;
    for (vector<int>::iterator it = attributes.begin(); it != attributes.end(); it++)
        if ((*it) != p->node.attrNum)
            newAttributes.push_back((*it));

    // 初始化樣本子集，建立maxvalue個樣本子集，也就說明該節點有maxvalue個子節點
    // 為什么不建立一個閾值，進行二分類？
    int maxvalue = ArrtNum[p->node.attrNum];
    vector<TupleData>* subSamples = new vector<TupleData>[maxvalue];
    for (int i = 0; i < maxvalue; i++)
        subSamples[i].clear();

    // 將樣本集合分為樣本子集
    for (vector<TupleData>::const_iterator it = samples.begin(); it != samples.end(); it++)
    {
        // 對樣本進行分類，分別分到maxvalue個子節點中
        // p->node.attrNum是當前節點的最優屬性序號
        // (*it).A.at(p->node.attrNum)正是子節點的序號
        // 基于當前節點最優屬性，計算當前樣本的歸類
        subSamples[(*it).A.at(p->node.attrNum)].push_back((*it));
    }

    decision_tree *child;
    for (int i = 0; i < maxvalue; i++)
    {
        child = new decision_tree;

        if (subSamples[i].size() == 0)
            child->node.label = Majorityclass(samples);
        else
            creat_classifier(child, subSamples[i], newAttributes);

        p->childs.push_back(child);
    }
    delete[] subSamples;
    return 0;
}

uj5u.com熱心網友回復：

// 計算節點處的資訊增益
int BestGainArrt(const vector<TupleData> &samples, vector<int> &attributes)
{
    int attr,
        bestAttr = 0,
        s = (int)samples.size();
int z[10]={0};

    // 計算正樣本個數
    for (vector<TupleData>::const_iterator it = samples.begin(); it != samples.end(); it++)
    {
        if ((*it).label == 'a')
            z[0]++;
if ((*it).label == 'b')
            z[1]++;
if ((*it).label == 'c')
            z[2]++;
if ((*it).label == 'd')
            z[3]++;
if ((*it).label == 'e')
            z[4]++;
if ((*it).label == 'f')
            z[5]++;
if ((*it).label == 'g')
            z[6]++;
if ((*it).label == 'h')
            z[7]++;
if ((*it).label == 'i')
            z[8]++;
if ((*it).label == 'j')
            z[9]++;
    }

    double infoD = 0;
    double bestResult = 0;

    // 計算初始熵
    for(int i=0;i<10;i++)
{
infoD += Entropy(z[i], s);
}

    vector<int> m_attributes;

    // 隨機確定候選屬性集
    RandomSelectAttr(attributes, m_attributes);

    // 遍歷屬性（即主題），通過資訊增益篩選最優屬性
    for (vector<int>::iterator it = m_attributes.begin(); it != m_attributes.end(); it++)
    {
        attr            = (*it);
        double result   = infoD;

        // 第attr個屬性的最大屬性值
        int maxvalue    = ArrtNum[attr];

        // 正負樣本集
        int* subP1       = new int[maxvalue];
int* subP2       = new int[maxvalue];
int* subP3       = new int[maxvalue];
int* subP4       = new int[maxvalue];
int* subP5       = new int[maxvalue];
int* subP6       = new int[maxvalue];
int* subP7       = new int[maxvalue];
int* subP8       = new int[maxvalue];
int* subP9       = new int[maxvalue];
int* subP0       = new int[maxvalue];
        int* sub        = new int[maxvalue];

        for (int i = 0; i < maxvalue; i++)
        {
            subP1[i] = 0;
subP2[i] = 0;
subP3[i] = 0;
subP4[i] = 0;
subP5[i] = 0;
subP6[i] = 0;
subP7[i] = 0;
subP8[i] = 0;
subP9[i] = 0;
subP0[i] = 0;
            sub[i]  = 0;
        }

        // 基于特定屬性，對當前訓練樣本進行分類
        // 屬性計算這一步的確沒有，屬性值直接存盤在樣本中
        for (vector<TupleData>::const_iterator jt = samples.begin(); jt != samples.end(); jt++)
        {

switch ((*jt).label)
{
case 'a': subP1[(*jt).A.at(attr)]++;break;
case 'b': subP2[(*jt).A.at(attr)]++;break;
case 'c': subP3[(*jt).A.at(attr)]++;break;
case 'd': subP4[(*jt).A.at(attr)]++;break;
case 'e': subP5[(*jt).A.at(attr)]++;break;
case 'f': subP6[(*jt).A.at(attr)]++;break;
case 'g': subP7[(*jt).A.at(attr)]++;break;
case 'h': subP8[(*jt).A.at(attr)]++;break;
case 'i': subP9[(*jt).A.at(attr)]++;break;
case 'j': subP0[(*jt).A.at(attr)]++;break;
default:  break;

}
            sub[(*jt).A.at(attr)]++;
        }

        // 計算特定屬性下資訊增益（相對熵）
        double SplitInfo = 0;
        for(int i = 0; i < maxvalue; i++)
        {
            double partsplitinfo;
            partsplitinfo   = -double(sub[i])/s*log(double(sub[i])/s)/log(2.0);
            SplitInfo       = SplitInfo+partsplitinfo;
        }

        double infoattr = 0;
        for (int i = 0; i < maxvalue; i++)
        {
int temp=subP1[i]+subP2[i]+subP3[i]+subP4[i]+subP5[i]+subP6[i]+subP7[i]+subP8[i]+subP9[i]+subP0[i];
            double partentropy=0;
            partentropy    += Entropy(subP1[i],temp );
partentropy    += Entropy(subP2[i],temp);
partentropy    += Entropy(subP3[i],temp);
partentropy    += Entropy(subP4[i],temp);
partentropy    += Entropy(subP5[i],temp);
partentropy    += Entropy(subP6[i],temp);
partentropy    += Entropy(subP7[i],temp);
partentropy    += Entropy(subP8[i],temp);
partentropy    += Entropy(subP9[i],temp);
partentropy    += Entropy(subP0[i],temp);
            infoattr        = infoattr+((double)(temp)/(double)(s))*partentropy;
        }
        result = result - infoattr;
        result = result / SplitInfo;

        // 尋找最優屬性
        if (result > bestResult)
        {
            bestResult      = result;
            bestAttr        = attr;
        }
        delete[] subP1;
delete[] subP2;
delete[] subP3;
delete[] subP4;
delete[] subP5;
delete[] subP6;
delete[] subP7;
delete[] subP8;
delete[] subP9;
delete[] subP0;
        delete[] sub;
    }

    if (bestResult == 0)
    {
        bestAttr=attributes.at(0);
    }
    return bestAttr;
}

void RandomSelectAttr(vector<int> &data, vector<int> &subdata)
{
    int index;
    unsigned int dataNum=data.size();
    subdata.clear();
    if(dataNum<=F)
    {
        for (vector<int>::iterator it = data.begin(); it != data.end(); it++)
        {
            int attr = (*it);
            subdata.push_back(attr);
        }
    }
    else
    {
        set<int> AttrSet;
        AttrSet.clear();
        while (AttrSet.size() < F)
        {
            index = rand() % dataNum;
            if (AttrSet.count(index) == 0)
            {
                AttrSet.insert(index);
                subdata.push_back(data.at(index));
            }
        }
    }
}

bool Allthesame(const vector<TupleData> &samples, char ch)
{
    for (vector<TupleData>::const_iterator it = samples.begin(); it != samples.end(); it++)
        if ((*it).label != ch)
            return false;
    return true;
}

// 確定節點中哪個類別樣本個數最多
char Majorityclass(const vector<TupleData> &samples)
{
    int  n = 0,temp;
int pp[10]={0};
    for (vector<TupleData>::const_iterator it = samples.begin(); it != samples.end(); it++)
{
        if ((*it).label == 'a')
            pp[0]++;
        else if ((*it).label == 'b')
            pp[1]++;
else if ((*it).label == 'c')
            pp[2]++;
else if ((*it).label == 'd')
            pp[3]++;
else if ((*it).label == 'e')
            pp[4]++;
else if ((*it).label == 'f')
            pp[5]++;
else if ((*it).label == 'g')
            pp[6]++;
else if ((*it).label == 'h')
            pp[7]++;
else if ((*it).label == 'i')
            pp[8]++;
else if ((*it).label == 'j')
            pp[9]++;
}

    for(int i=0;i<10;i++)
{
if(n<pp[i])
{
n=pp[i];
temp=i;
}
}

if (temp==0)
        return 'a';
    else if(temp==1)
        return 'b';
else if(temp==2)
        return 'c';
else if(temp==3)
        return 'd';
else if(temp==4)
        return 'e';
else if(temp==5)
        return 'f';
else if(temp==6)
        return 'g';
else if(temp==7)
        return 'h';
else if(temp==8)
        return 'i';
else if(temp==9)
        return 'j';

}

// 測驗階段
char testClassifier(decision_tree *p, TupleData d)
{
    // 抵達葉子節點
    if (p->node.label != ' ')
        return p->node.label;

    // 節點處最優屬性
    int attrNum = p->node.attrNum;

    // 錯誤樣本
    if (d.A.at(attrNum) < 0)
        return ' ';

    // 確定分支
    return testClassifier(p->childs.at(d.A.at(attrNum)), d);
}

void testData()
{
    for (vector<TupleData>::iterator it = test.begin(); it != test.end(); it++)
    {
        printf("新樣本\n");

int s[10]={0,0,0,0,0,0,0,0,0,0};

        for(int i = 0; i < tree_num; i++)
        {
printf("第%d棵樹\n",i);
            char m;
m=testClassifier(alltrees.at(i), (*it));
printf("over");
switch (m)
{
case 'a': s[0]++;break;
case 'b': s[1]++;break;
case 'c': s[2]++;break;
case 'd': s[3]++;break;
case 'e': s[4]++;break;
case 'f': s[5]++;break;
case 'g': s[6]++;break;
case 'h': s[7]++;break;
case 'i': s[8]++;break;
case 'j': s[9]++;break;
default:    break;
}

for(int j=0;j<10;j++)
{
printf("p的第%d個為：%d\n",j,s[j]);
}
        }

        int tt=0,oo=0;
for(int j=0;j<10;j++)
{
if(oo<s[j])
{
oo=s[j];
tt=j;
}
}
if (tt==0)
printf("預測為a");
else if(tt==1)
printf("預測為b");
else if(tt==2)
printf("預測為c");
else if(tt==3)
printf("預測為d");
else if(tt==4)
printf("預測為e");
else if(tt==5)
printf("預測為f");
else if(tt==6)
printf("預測為g");
else if(tt==7)
printf("預測為h");
else if(tt==8)
printf("預測為i");
else if(tt==9)
printf("預測為j");
printf("\n");
printf("實際為%c\n",(*it).label);
    }
}

void freeClassifier(decision_tree *p)
{
    if (p == NULL)
        return;
    for (vector<decision_tree*>::iterator it = p->childs.begin(); it != p->childs.end(); it++)
    {
        freeClassifier(*it);
    }
    delete p;
}

void freeArrtNum()
{
    delete[] ArrtNum;
}

//void showResult()
//{
//    cout << "Train size: "<< trainAllNum<<endl;
//    cout << "Test size: "<<testAllNum<<endl;
//    cout << "True positive: " << TP << endl;
//    cout << "False negative: "<< FN<<endl;
//    cout << "False positive: "<<FP<<endl;
//    cout << "True negative: "<<TN<<endl;
//}

int main( )
{
   srand((unsigned)time(NULL));

    // 初始化樣本
init("F:/c.txt", "F:/d.txt");

    // 訓練階段
    for(int i = 0; i < tree_num; i++)
    {
        printf("第 %d 棵決策樹訓練開始\n", i);

        // 每棵樹的訓練樣本子集
        sub_init();

        // 訓練每棵決策樹
        decision_tree * root=NULL;
        creat_classifier(root, train, attributes);

        // 建立森林
        alltrees.push_back(root);

        printf("第 %d 棵決策樹訓練完畢\n", i);
    }

    // 測驗階段
    testData();

    for (vector<decision_tree *>::const_iterator it = alltrees.begin(); it != alltrees.end(); it++)
    {
        freeClassifier((*it));
    }

    freeArrtNum();

    //showResult();

    system("pause");
    return 0;
}

uj5u.com熱心網友回復：

這個程式是在http://www.it165.net/pro/html/201503/37220.html這基礎上改的，想變成一個多分類，一直出現
_THROW(invalid_argument, "invalid vector<T> argument");的錯誤，不一定在第幾棵樹就卡死了，幫忙看看呀，謝謝啦

轉載請註明出處，本文鏈接：https://www.uj5u.com/houduan/94388.html

標籤：基礎類

上一篇：元旦散分

下一篇：久違了！