當我在C中下載相同的檔案時洗掉檔案-有解無憂

當我們在 Internet 上下載一些相同的檔案時，檔案名變成 (2), (3)...

例子

我想用C把這些檔案去掉，首先我要找檔案列印。我寫了一些代碼。但它不起作用。

int main(){

        const char *path;
        DIR *dir;
        struct dirent* entry;
        if((path=getenv("HOME"))==NULL){//get HOME path
                path = getpwuid(getuid())->pw_dir;
        }
        const char *downloads = "/Downloads";
        strcat(path,downloads); //make ~/Downloads
        if(chdir(path)!=0){
                perror("chdir()");
                return -1;
        }
        if((dir=opendir(path))==NULL){ //open directory
                perror("open");
                return 1;
        }
        while((entry=readdir(dir))!=NULL){
                struct dirent *cmpentry;
                DIR *cmpdir;
                if((cmpdir=opendir(path))==NULL){
                        perror("opendir");
                        return -1;
                }


                while((cmpentry=readdir(cmpdir))!=NULL){
                        if((entry->d_name[0]!='.')&&strcmp(entry->d_name,cmpentry->d_name)!=0){
                                char *ptr=strstr(cmpentry->d_name,entry->d_name);
                                if(ptr!=NULL)
                                        printf("%s\n",cmpentry->d_name);
                          
                                }
                        }
                }
        }

我該如何解決？

uj5u.com熱心網友回復：

一系列問題...

path沒有足夠的空間strcat，所以你有 UB（未定義的行為）
無需使用chdir
沒有closedir呼叫，所以對于一個大目錄，你會用完檔案描述符。
沒有跳過.和..條目
使用strcmpandstrstr是不夠的。重復和/或遺漏。
重復打開同一個目錄很慢/很浪費。最好讀取一次目錄并將條目保存在陣列中。

一些修復：

捕獲陣列中的資料
struct root使用將檔案名拆分為組件部分（例如foo(1).pdf--> foo、(1)和.pdf）的輔助結構（例如下面）
添加了長度和檔案內容的比較

這是原始代碼，帶有錯誤注釋：

int
main()
{

    const char *path;
    DIR *dir;
    struct dirent *entry;

    // get HOME path
    if ((path = getenv("HOME")) == NULL) {
        path = getpwuid(getuid())->pw_dir;
    }
    const char *downloads = "/Downloads";

    // make ~/Downloads
// NOTE/BUG: not enough space in path
// NOTE/BUG: path is a const
    strcat(path, downloads);
// NOTE/BUG: no need to chdir as opendir is enough
    if (chdir(path) != 0) {
        perror("chdir()");
        return -1;
    }

    // open directory
// NOTE/BUG: no closedir for this
    if ((dir = opendir(path)) == NULL) {
        perror("open");
        return 1;
    }

    while ((entry = readdir(dir)) != NULL) {
// NOTE/BUG: no check for "." or ".."
        struct dirent *cmpentry;
        DIR *cmpdir;

// NOTE/BUG: no closedir for this
        if ((cmpdir = opendir(path)) == NULL) {
            perror("opendir");
            return -1;
        }

        while ((cmpentry = readdir(cmpdir)) != NULL) {
// NOTE/BUG: strcmp sense is inverted
// NOTE/BUG: strcmp wrong
            if ((entry->d_name[0] != '.') &&
                strcmp(entry->d_name, cmpentry->d_name) != 0) {
                char *ptr = strstr(cmpentry->d_name, entry->d_name);

                if (ptr != NULL)
                    printf("%s\n", cmpentry->d_name);
            }
        }
    }
}

在上面的代碼中，我使用cpp條件來表示舊代碼與新代碼：

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

注意：這可以通過運行檔案來清理unifdef -k

這是重構的代碼。是這樣注釋的：

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <pwd.h>
#include <string.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/stat.h>

#ifdef DEBUG
#define dbgprt(_fmt...) \
    fprintf(stderr,_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

// filename parsing control
struct root {
    struct dirent root_ent;             // raw directory entry
    off_t root_size;                    // file size
    int root_paren;                     // 1=has "(1)"
    int root_dup;                       // 1=is a duplicate
    char *root_suf;                     // suffix/entension (e.g. ".pdf")
    char root_core[256];                // root/core/base name
};

// rootshow -- show root struct contents
void
rootshow(const struct root *root,const char *who)
{

    dbgprt("rootshow: d_name='%s' root_dup=%d root_paren=%d root_core='%s' root_suf='%s' (from %s)\n",
        root->root_ent.d_name,
        root->root_dup,root->root_paren,
        root->root_core,root->root_suf,who);
}

// rootof -- split up filenames into components
void
rootof(struct root *root,struct dirent *ent,off_t size)
{
    char tail[256];

    memset(root,0,sizeof(*root));

    do {
        // get directory entry
        root->root_ent = *ent;

        // remember the file size
        root->root_size = size;

        // get the filename
        strcpy(tail,ent->d_name);

        // remember and strip the extension
        char *dot = strrchr(tail,'.');
        if (dot != NULL) {
            root->root_suf = &ent->d_name[dot - tail];
            *dot = 0;
        }

        // get root/base (e.g. "foo.pdf" --> "foo")
        strcpy(root->root_core,tail);

        // rightmost part of file must be "(1)"
        char *rparen = &tail[strlen(tail) - 1];
        if (*rparen != ')')
            break;

        // assume it's of the correct form
        root->root_paren = 1;

        // look for "(" and ensure it has some digits
        char *lparen = rparen - 1;
        for (;  lparen >= tail;  --lparen) {
            if (*lparen == '(')
                break;
            if (! isdigit(*lparen)) {
                root->root_paren = 0;
                break;
            }
        }

        // we got something like "X)" (i.e. _not_ "(1)")
        if (! root->root_paren)
            break;

        // assume it's _not_ a match
        root->root_paren = 0;

        // we got something like "()"
        if ((lparen   1) == rparen)
            break;

        // we must have the "("
        if (lparen < tail)
            break;
        if (*lparen != '(')
            break;

        // strip "(1)"
        *lparen = 0;

        root->root_paren = 1;
        strcpy(root->root_core,tail);
    } while (0);

#if DEBUG
    rootshow(root,"rootof");
#endif
}

// fullpath -- get full path (e.g. dir/tail)
void
fullpath(char *path,const char *dir,const char *tail)
{

    strcpy(path,dir);
    strcat(path,"/");
    strcat(path,tail);
}

// dirload -- load up directory into list
struct root *
dirload(const char *path,int *countp)
{
    char file[1024];
    struct root *list = NULL;
    int count = 0;
    int cap = 0;

    // open directory
    DIR *dirp = opendir(path);
    if (dirp == NULL) {
        perror("open");
        exit(1);
    }

    while (1) {
        struct dirent *ent = readdir(dirp);
        if (ent == NULL)
            break;

        // skip over "." and ".."
        const char *tail = ent->d_name;
        if (tail[0] == '.') {
            if (tail[1] == 0)
                continue;
            if ((tail[1] == '.') && (tail[2] == 0))
                continue;
        }

        // optional -- only ordinary files
#if 1
        if (ent->d_type != DT_REG)
            continue;
#endif

        // enlarge array
        if (count >= cap) {
            cap  = 10;
            list = realloc(list,sizeof(*list) * cap);
            if (list == NULL) {
                perror("realloc");
                exit(1);
            }
        }

        // get file size
        struct stat st;
        fullpath(file,path,ent->d_name);
        if (stat(file,&st) < 0) {
            perror(file);
            exit(1);
        }

        // parse the filename
        rootof(&list[count],ent,st.st_size);
          count;
    }

    closedir(dirp);

    // return count to caller
    *countp = count;

    return list;
}

// filematch -- compare the file contents
// RETURNS: 1=match, 0=mismatch
int
filematch(const char *dir,const struct root *lhs,const struct root *rhs)
{
    int fdlhs;
    char lhsfile[1024];
    char lhsbuf[4096];

    int fdrhs;
    char rhsfile[1024];
    char rhsbuf[4096];

    int match = 0;

    do {
        // file sizes must match
        if (lhs->root_size != rhs->root_size)
            break;

        // open the LHS file
        fullpath(lhsfile,dir,lhs->root_ent.d_name);
        fdlhs = open(lhsfile,O_RDONLY);
        if (fdlhs < 0) {
            perror(lhsfile);
            exit(1);
        }

        // open the RHS file
        fullpath(rhsfile,dir,rhs->root_ent.d_name);
        fdrhs = open(rhsfile,O_RDONLY);
        if (fdrhs < 0) {
            perror(rhsfile);
            exit(1);
        }

        match = 1;

        off_t resid = lhs->root_size;
        ssize_t rlen;
        ssize_t xlen;
        for (;  resid > 0;  resid -= rlen) {
            if (resid > sizeof(lhsbuf))
                rlen = sizeof(lhsbuf);
            else
                rlen = resid;

            // get LHS chunk
            xlen = read(fdlhs,lhsbuf,rlen);
            if (xlen != rlen) {
                perror(lhsfile);
                exit(1);
            }

            // get RHS chunk
            xlen = read(fdrhs,rhsbuf,rlen);
            if (xlen != rlen) {
                perror(rhsfile);
                exit(1);
            }

            // they must match
            if (memcmp(lhsbuf,rhsbuf,rlen) != 0) {
                match = 0;
                break;
            }
        }

        close(fdlhs);
        close(fdrhs);
    } while (0);

    return match;
}

int
main(int argc,char **argv)
{
    char path[1024];

    // skip over program name
    --argc;
      argv;

    // find the directory
    do {
        if (argc > 0) {
            strcpy(path,*argv);
            break;
        }

        // get HOME path
        const char *home = getenv("HOME");
        if (home == NULL)
            home = getpwuid(getuid())->pw_dir;

        // make ~/Downloads
        fullpath(path,home,"Downloads");
    } while (0);

#if DEBUG
    setlinebuf(stdout);
    setlinebuf(stderr);
#endif

    int count = 0;
    struct root *list = dirload(path,&count);

    for (int lhsidx = 0;  lhsidx < count;    lhsidx) {
        struct root *lhs = &list[lhsidx];

        // must _not_ have "(1)"
        if (lhs->root_paren)
            continue;

        rootshow(lhs,"LHS");

        for (int rhsidx = 0;  rhsidx < count;    rhsidx) {
            // skip over the same entry
            if (rhsidx == lhsidx)
                continue;

            struct root *rhs = &list[rhsidx];

            rootshow(rhs,"RHS");

            // file types must match
            if (rhs->root_ent.d_type != lhs->root_ent.d_type)
                continue;

            // must have "(1)"
            if (! rhs->root_paren)
                continue;

            // suffix must match
            // both entries must have [or _not_ have] a suffix
            if (lhs->root_suf != NULL) {
                if (rhs->root_suf == NULL)
                    continue;
                if (strcmp(lhs->root_suf,rhs->root_suf) != 0)
                    continue;
            }
            else {
                if (rhs->root_suf != NULL)
                    continue;
            }

            // core must match
            if (strcmp(lhs->root_core,rhs->root_core) != 0)
                continue;

            // contents must match
            if (! filematch(path,lhs,rhs))
                continue;

            printf("%s is dup of %s\n",
                rhs->root_ent.d_name,lhs->root_ent.d_name);

            // mark it as a removable duplicate
            rhs->root_dup = 1;
        }
    }

    return 0;
}

這是一個測驗perl腳本：

#!/usr/bin/perl
# dotest -- test program

master(@ARGV);
exit(0);

# master -- master control
sub master
{
    my(@argv) = @_;

    $xfile = shift(@argv);
    $xfile //= "duptest";
    $pwd = $ENV{PWD};
    $xfile = "$pwd/$xfile";

    $tstdir = "/tmp/testdir";

    dotest("abc","xyz");

    dotest("abc.pdf","jkl");
    dotest("abc(1).pdf","jkl");

    dotest("abc(2)","xyz");
    dotest("abc(3)","xx");
    dotest("abc(3)","xzy");

    dotest("def","blah");
    dotest("def(3)","blah");
    dotest("def.pdf","blah");
}

sub dotest
{
    my($file,$body) = @_;

    printf("\n");
    printf("%s\n","-" x 80);

    system("rm -fr $tstdir");
    system("mkdir -p $tstdir");

    push(@allfiles,[$file,$body]);

    ###@rfiles = shuffle(@allfiles);
    @rfiles = @allfiles;

    foreach $pair (@rfiles) {
        ($tail,$body) = @$pair;
        printf("dotest: FILE %s '%s'\n",$tail,$body);

        $file = sprintf("%s/%s",$tstdir,$tail);

        open($xfdst,">$file") or
            die("dotest: unable to open '$file' -- $!\n");
        print($xfdst $body);
        close($xfdst);
    }

    @fsort = sort(@allfiles);

    @xfiles = (`$xfile $tstdir`);
    $code = $? >> 8;
    die("dotest: program aborted\n")
        if ($code);

    foreach $tail (@xfiles) {
        chomp($tail);
        printf("dotest: XDUP %s\n",$tail);
    }
}

這是測驗程式的輸出：


--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: XDUP abc(1).pdf is dup of abc.pdf

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: FILE def.pdf 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

uj5u.com熱心網友回復：

readdir() 不像 ls 那樣讀取檔案，而是按其在目錄中的位置順序讀取檔案。您的程式有作業變體，但它作業不正常，而不是您想要的方式。請自行更正。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <unistd.h>
#include <pwd.h>

int main(){
int m;
char path[256],downloads[256],substr[256],buf[160],*ptr;
DIR *dir,*cmpdir;
struct dirent entry,cmpentry,*pe;

 strcpy(path,getenv("HOME"));
 if(path==NULL){//get HOME path
  strcpy(path,getpwuid(getuid())->pw_dir);
 }
        strcpy(downloads,"/Downloads");
// strcpy(downloads,"/tmp/down");
 strcat(path,downloads);errno=0; //make ~/Downloads
 if(chdir(path)!=0){
    m = errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
  return -1;
 }
 errno=0;
 if((dir=opendir(path))==NULL){ //open directory
    m=errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
    return 1;
 }
 while((pe=readdir(dir))!=NULL){entry=*pe;
    errno=0;if((cmpdir=opendir(path))==NULL){m=errno;
     strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
     return -1;
    }
 }
 while((pe=readdir(cmpdir))!=NULL){cmpentry=*pe;
 if((entry.d_name[0]!='.')&&(strncmp(entry.d_name,"..",2)!=0)
    &&(strcmp(entry.d_name,cmpentry.d_name)!=0)){
fprintf(stdout,"%s %s\n",entry.d_name,cmpentry.d_name);fflush(stdout);
     ptr=strstr(cmpentry.d_name,entry.d_name);
     if(ptr!=NULL){strcpy(substr,ptr);
         fprintf(stdout,"%s\n",cmpentry.d_name);
    }
   }
  }
return 0;}

轉載請註明出處，本文鏈接：https://www.uj5u.com/caozuo/537442.html

標籤：CLinux

上一篇：是否可以在c的函式中傳遞結構成員？

下一篇：Unsafe.cpp(JDK原始碼)的method:staticjlong??find_field_offset中的JavaFieldStream是什么意思？