幾天前,我在社區問了這個問題,很快就得到了我們的同胞的答復。該解決方案在 Valgrind 上也很棒。
根據他/她的建議,我撰寫了以下函式split_dataset作為決策樹演算法的一部分。下面是我寫的代碼,
#include <stdio.h>
#include <stdlib.h>
void printArray(double array[], unsigned int size)
{
for (unsigned int i = 0; i < size; i) {
printf("%.3f ", array[i]);
}
printf("\n");
}
typedef struct
{
size_t length;
double **designMatrix_Y;
} DecisionTreeData;
DecisionTreeData *split_dataset(int index, //var index
double value, //best cutoff
int row, //nrows of design matrix, number of variables
double **designMatrix_Y) //design matrix (X) and response Y
{
// Buffers to hold rows of data as we are distributing rows based on the split.
double **leftDesignMatrix_Y = calloc(row, sizeof *leftDesignMatrix_Y);
double **rightDesignMatrix_Y = calloc(row, sizeof *rightDesignMatrix_Y);
size_t left_count = 0;
size_t right_count = 0;
for (size_t i = 0; i < row; i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count ;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count ;
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
DecisionTreeData *data_split = malloc(sizeof(DecisionTreeData) * 2);
data_split[0] = (DecisionTreeData){left_count, leftDesignMatrix_Y};
data_split[1] = (DecisionTreeData){right_count, rightDesignMatrix_Y};
return data_split;
}
double *alloc_dvector(unsigned int length)
{
return malloc((length * (sizeof(double))));
}
void dealloc_dvector(double *array)
{
free((char *) array);
}
double **alloc_2dArray(unsigned int row, unsigned int col)
{
double **Array = (double **) malloc((size_t) ((row) * (sizeof(double *))));
for (unsigned int i = 0; i < row; i ) {
Array[i] = alloc_dvector(col);
}
return Array;
}
void dealloc_2dArray(double **Array, unsigned int row, unsigned int col)
{
for (unsigned int i = 0; i < row; i )
{
dealloc_dvector(Array[i]);
}
free((char *) Array);
}
int main()
{
double **designMatrix_Y = alloc_2dArray(4,4);
for(int i = 0; i<4; i )
{
for(int j= 0; j<4; j )
{
designMatrix_Y[i][j] = i*j;
}
}
DecisionTreeData *dataSplits = split_dataset(2,
2,
4,
designMatrix_Y);
//rename for convenience
size_t leftSize = (dataSplits[0]).length;
double **designMatrix_Y_L = (dataSplits[0]).designMatrix_Y;
double **designMatrix_Y_R = (dataSplits[1]).designMatrix_Y;
for(int i = 0; i<leftSize; i)
printArray(designMatrix_Y_L[i],4);
free(designMatrix_Y_L);
free(designMatrix_Y_R);
free(dataSplits);
dealloc_2dArray(designMatrix_Y,4,4);
return 0;
}
它在我的 mac mini M1 中運行良好(不支持 Valgrind,看不到日志)但在 Linux 上崩潰(分段錯誤)。此外,我還附上了來自 Valgrind 的錯誤訊息以及上面的代碼。
==4777== LEAK SUMMARY:
==4777== definitely lost: 0 bytes in 0 blocks
==4777== indirectly lost: 0 bytes in 0 blocks
==4777== possibly lost: 0 bytes in 0 blocks
==4777== still reachable: 224 bytes in 8 blocks
==4777== suppressed: 0 bytes in 0 blocks
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
==4777==
==4777== 1 errors in context 1 of 4:
==4777== Invalid read of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777== Address 0x0 is not stack'd, malloc'd or (recently) free'd
==4777==
==4777==
==4777== 1 errors in context 2 of 4:
==4777== Use of uninitialised value of size 8
==4777== at 0x4006C7: printArray (test.c:7)
==4777== by 0x400A5E: main (test.c:117)
==4777==
==4777==
==4777== 1 errors in context 3 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007B4: split_dataset (test.c:36)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4777== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4777== by 0x400809: split_dataset (test.c:49)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777==
==4777== 2 errors in context 4 of 4:
==4777== Invalid write of size 8
==4777== at 0x4007E7: split_dataset (test.c:43)
==4777== by 0x400A06: main (test.c:106)
==4777== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4777== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4777== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4777== by 0x400828: split_dataset (test.c:50)
==4777== by 0x400A06: main (test.c:106)
==4777==
==4777== ERROR SUMMARY: 5 errors from 4 contexts (suppressed: 0 from 0)
(END)
通過錯誤代碼,我意識到錯誤可能來自printArray(). 所以,我在沒有 for 回圈的情況下又運行了一次printArray()。Valgrind 的日志更改如下。
==4021== HEAP SUMMARY:
==4021== in use at exit: 0 bytes in 0 blocks
==4021== total heap usage: 15 allocs, 15 frees, 336 bytes allocated
==4021==
==4021== All heap blocks were freed -- no leaks are possible
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
==4021==
==4021== 1 errors in context 1 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007B4: split_dataset (test.c:36)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x52052e8 is 0 bytes after a block of size 8 alloc'd
==4021== at 0x4C2C291: realloc (vg_replace_malloc.c:834)
==4021== by 0x400809: split_dataset (test.c:49)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021==
==4021== 2 errors in context 2 of 2:
==4021== Invalid write of size 8
==4021== at 0x4007E7: split_dataset (test.c:43)
==4021== by 0x400A06: main (test.c:106)
==4021== Address 0x5205380 is 0 bytes after a block of size 0 alloc'd
==4021== at 0x4C29EBD: malloc (vg_replace_malloc.c:306)
==4021== by 0x4C2C210: realloc (vg_replace_malloc.c:834)
==4021== by 0x400828: split_dataset (test.c:50)
==4021== by 0x400A06: main (test.c:106)
==4021==
==4021== ERROR SUMMARY: 3 errors from 2 contexts (suppressed: 0 from 0)
(END)
那么,任何人都可以幫助解決這個問題嗎?有什么建議嗎?提前感謝您的時間為我繁瑣的描述。讓我知道是否有任何不清楚的地方。
gcc 版本 9.3.0
uj5u.com熱心網友回復:
這兩個重新分配不屬于它們在該 for 回圈內的位置。當產生的左/乘分割的最終大小確定時,應該在for 回圈之后。通過在每次迭代中調整它們的大小,您可以將它們修復為當前大小,這意味著下一次迭代將保證超出一個插槽(無論哪一側是目標)。地址消毒劑報告將證實這一點。
雙方都故意過度分配以保存row潛在的指標。那些只需要在拆分迭代完成后塵埃落定后進行調整。
這(在此處標記)如下:
for (size_t i = 0; i < row; i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count ;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count ;
}
// HERE HERE HERE HERE HERE
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
}
應該是這樣的:
for (size_t i = 0; i < row; i)
{
if (designMatrix_Y[index][i] <= value)
{
// Copy the row into the left half
leftDesignMatrix_Y[left_count] = designMatrix_Y[i];
left_count ;
}
else
{
// Copy the row into the right half
rightDesignMatrix_Y[right_count] = designMatrix_Y[i];
right_count ;
}
}
//realloc the memory for exact space
leftDesignMatrix_Y = realloc(leftDesignMatrix_Y, left_count * sizeof(double *));
rightDesignMatrix_Y = realloc(rightDesignMatrix_Y, right_count * sizeof(double *));
轉載請註明出處,本文鏈接:https://www.uj5u.com/yidong/467412.html
