當我們在 Internet 上下載一些相同的檔案時,檔案名變成 (2), (3)...
例子
我想用C把這些檔案去掉,首先我要找檔案列印。我寫了一些代碼。但它不起作用。
int main(){
const char *path;
DIR *dir;
struct dirent* entry;
if((path=getenv("HOME"))==NULL){//get HOME path
path = getpwuid(getuid())->pw_dir;
}
const char *downloads = "/Downloads";
strcat(path,downloads); //make ~/Downloads
if(chdir(path)!=0){
perror("chdir()");
return -1;
}
if((dir=opendir(path))==NULL){ //open directory
perror("open");
return 1;
}
while((entry=readdir(dir))!=NULL){
struct dirent *cmpentry;
DIR *cmpdir;
if((cmpdir=opendir(path))==NULL){
perror("opendir");
return -1;
}
while((cmpentry=readdir(cmpdir))!=NULL){
if((entry->d_name[0]!='.')&&strcmp(entry->d_name,cmpentry->d_name)!=0){
char *ptr=strstr(cmpentry->d_name,entry->d_name);
if(ptr!=NULL)
printf("%s\n",cmpentry->d_name);
}
}
}
}
我該如何解決?
uj5u.com熱心網友回復:
一系列問題...
path沒有足夠的空間strcat,所以你有 UB(未定義的行為)- 無需使用
chdir - 沒有
closedir呼叫,所以對于一個大目錄,你會用完檔案描述符。 - 沒有跳過
.和..條目 - 使用
strcmpandstrstr是不夠的。重復和/或遺漏。 - 重復打開同一個目錄很慢/很浪費。最好讀取一次目錄并將條目保存在陣列中。
一些修復:
- 捕獲陣列中的資料
struct root使用將檔案名拆分為組件部分(例如foo(1).pdf-->foo、(1)和.pdf)的輔助結構(例如下面)- 添加了長度和檔案內容的比較
這是原始代碼,帶有錯誤注釋:
int
main()
{
const char *path;
DIR *dir;
struct dirent *entry;
// get HOME path
if ((path = getenv("HOME")) == NULL) {
path = getpwuid(getuid())->pw_dir;
}
const char *downloads = "/Downloads";
// make ~/Downloads
// NOTE/BUG: not enough space in path
// NOTE/BUG: path is a const
strcat(path, downloads);
// NOTE/BUG: no need to chdir as opendir is enough
if (chdir(path) != 0) {
perror("chdir()");
return -1;
}
// open directory
// NOTE/BUG: no closedir for this
if ((dir = opendir(path)) == NULL) {
perror("open");
return 1;
}
while ((entry = readdir(dir)) != NULL) {
// NOTE/BUG: no check for "." or ".."
struct dirent *cmpentry;
DIR *cmpdir;
// NOTE/BUG: no closedir for this
if ((cmpdir = opendir(path)) == NULL) {
perror("opendir");
return -1;
}
while ((cmpentry = readdir(cmpdir)) != NULL) {
// NOTE/BUG: strcmp sense is inverted
// NOTE/BUG: strcmp wrong
if ((entry->d_name[0] != '.') &&
strcmp(entry->d_name, cmpentry->d_name) != 0) {
char *ptr = strstr(cmpentry->d_name, entry->d_name);
if (ptr != NULL)
printf("%s\n", cmpentry->d_name);
}
}
}
}
在上面的代碼中,我使用cpp條件來表示舊代碼與新代碼:
#if 0
// old code
#else
// new code
#endif
#if 1
// new code
#endif
注意:這可以通過運行檔案來清理unifdef -k
這是重構的代碼。是這樣注釋的:
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <pwd.h>
#include <string.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/stat.h>
#ifdef DEBUG
#define dbgprt(_fmt...) \
fprintf(stderr,_fmt)
#else
#define dbgprt(_fmt...) \
do { } while (0)
#endif
// filename parsing control
struct root {
struct dirent root_ent; // raw directory entry
off_t root_size; // file size
int root_paren; // 1=has "(1)"
int root_dup; // 1=is a duplicate
char *root_suf; // suffix/entension (e.g. ".pdf")
char root_core[256]; // root/core/base name
};
// rootshow -- show root struct contents
void
rootshow(const struct root *root,const char *who)
{
dbgprt("rootshow: d_name='%s' root_dup=%d root_paren=%d root_core='%s' root_suf='%s' (from %s)\n",
root->root_ent.d_name,
root->root_dup,root->root_paren,
root->root_core,root->root_suf,who);
}
// rootof -- split up filenames into components
void
rootof(struct root *root,struct dirent *ent,off_t size)
{
char tail[256];
memset(root,0,sizeof(*root));
do {
// get directory entry
root->root_ent = *ent;
// remember the file size
root->root_size = size;
// get the filename
strcpy(tail,ent->d_name);
// remember and strip the extension
char *dot = strrchr(tail,'.');
if (dot != NULL) {
root->root_suf = &ent->d_name[dot - tail];
*dot = 0;
}
// get root/base (e.g. "foo.pdf" --> "foo")
strcpy(root->root_core,tail);
// rightmost part of file must be "(1)"
char *rparen = &tail[strlen(tail) - 1];
if (*rparen != ')')
break;
// assume it's of the correct form
root->root_paren = 1;
// look for "(" and ensure it has some digits
char *lparen = rparen - 1;
for (; lparen >= tail; --lparen) {
if (*lparen == '(')
break;
if (! isdigit(*lparen)) {
root->root_paren = 0;
break;
}
}
// we got something like "X)" (i.e. _not_ "(1)")
if (! root->root_paren)
break;
// assume it's _not_ a match
root->root_paren = 0;
// we got something like "()"
if ((lparen 1) == rparen)
break;
// we must have the "("
if (lparen < tail)
break;
if (*lparen != '(')
break;
// strip "(1)"
*lparen = 0;
root->root_paren = 1;
strcpy(root->root_core,tail);
} while (0);
#if DEBUG
rootshow(root,"rootof");
#endif
}
// fullpath -- get full path (e.g. dir/tail)
void
fullpath(char *path,const char *dir,const char *tail)
{
strcpy(path,dir);
strcat(path,"/");
strcat(path,tail);
}
// dirload -- load up directory into list
struct root *
dirload(const char *path,int *countp)
{
char file[1024];
struct root *list = NULL;
int count = 0;
int cap = 0;
// open directory
DIR *dirp = opendir(path);
if (dirp == NULL) {
perror("open");
exit(1);
}
while (1) {
struct dirent *ent = readdir(dirp);
if (ent == NULL)
break;
// skip over "." and ".."
const char *tail = ent->d_name;
if (tail[0] == '.') {
if (tail[1] == 0)
continue;
if ((tail[1] == '.') && (tail[2] == 0))
continue;
}
// optional -- only ordinary files
#if 1
if (ent->d_type != DT_REG)
continue;
#endif
// enlarge array
if (count >= cap) {
cap = 10;
list = realloc(list,sizeof(*list) * cap);
if (list == NULL) {
perror("realloc");
exit(1);
}
}
// get file size
struct stat st;
fullpath(file,path,ent->d_name);
if (stat(file,&st) < 0) {
perror(file);
exit(1);
}
// parse the filename
rootof(&list[count],ent,st.st_size);
count;
}
closedir(dirp);
// return count to caller
*countp = count;
return list;
}
// filematch -- compare the file contents
// RETURNS: 1=match, 0=mismatch
int
filematch(const char *dir,const struct root *lhs,const struct root *rhs)
{
int fdlhs;
char lhsfile[1024];
char lhsbuf[4096];
int fdrhs;
char rhsfile[1024];
char rhsbuf[4096];
int match = 0;
do {
// file sizes must match
if (lhs->root_size != rhs->root_size)
break;
// open the LHS file
fullpath(lhsfile,dir,lhs->root_ent.d_name);
fdlhs = open(lhsfile,O_RDONLY);
if (fdlhs < 0) {
perror(lhsfile);
exit(1);
}
// open the RHS file
fullpath(rhsfile,dir,rhs->root_ent.d_name);
fdrhs = open(rhsfile,O_RDONLY);
if (fdrhs < 0) {
perror(rhsfile);
exit(1);
}
match = 1;
off_t resid = lhs->root_size;
ssize_t rlen;
ssize_t xlen;
for (; resid > 0; resid -= rlen) {
if (resid > sizeof(lhsbuf))
rlen = sizeof(lhsbuf);
else
rlen = resid;
// get LHS chunk
xlen = read(fdlhs,lhsbuf,rlen);
if (xlen != rlen) {
perror(lhsfile);
exit(1);
}
// get RHS chunk
xlen = read(fdrhs,rhsbuf,rlen);
if (xlen != rlen) {
perror(rhsfile);
exit(1);
}
// they must match
if (memcmp(lhsbuf,rhsbuf,rlen) != 0) {
match = 0;
break;
}
}
close(fdlhs);
close(fdrhs);
} while (0);
return match;
}
int
main(int argc,char **argv)
{
char path[1024];
// skip over program name
--argc;
argv;
// find the directory
do {
if (argc > 0) {
strcpy(path,*argv);
break;
}
// get HOME path
const char *home = getenv("HOME");
if (home == NULL)
home = getpwuid(getuid())->pw_dir;
// make ~/Downloads
fullpath(path,home,"Downloads");
} while (0);
#if DEBUG
setlinebuf(stdout);
setlinebuf(stderr);
#endif
int count = 0;
struct root *list = dirload(path,&count);
for (int lhsidx = 0; lhsidx < count; lhsidx) {
struct root *lhs = &list[lhsidx];
// must _not_ have "(1)"
if (lhs->root_paren)
continue;
rootshow(lhs,"LHS");
for (int rhsidx = 0; rhsidx < count; rhsidx) {
// skip over the same entry
if (rhsidx == lhsidx)
continue;
struct root *rhs = &list[rhsidx];
rootshow(rhs,"RHS");
// file types must match
if (rhs->root_ent.d_type != lhs->root_ent.d_type)
continue;
// must have "(1)"
if (! rhs->root_paren)
continue;
// suffix must match
// both entries must have [or _not_ have] a suffix
if (lhs->root_suf != NULL) {
if (rhs->root_suf == NULL)
continue;
if (strcmp(lhs->root_suf,rhs->root_suf) != 0)
continue;
}
else {
if (rhs->root_suf != NULL)
continue;
}
// core must match
if (strcmp(lhs->root_core,rhs->root_core) != 0)
continue;
// contents must match
if (! filematch(path,lhs,rhs))
continue;
printf("%s is dup of %s\n",
rhs->root_ent.d_name,lhs->root_ent.d_name);
// mark it as a removable duplicate
rhs->root_dup = 1;
}
}
return 0;
}
這是一個測驗perl腳本:
#!/usr/bin/perl
# dotest -- test program
master(@ARGV);
exit(0);
# master -- master control
sub master
{
my(@argv) = @_;
$xfile = shift(@argv);
$xfile //= "duptest";
$pwd = $ENV{PWD};
$xfile = "$pwd/$xfile";
$tstdir = "/tmp/testdir";
dotest("abc","xyz");
dotest("abc.pdf","jkl");
dotest("abc(1).pdf","jkl");
dotest("abc(2)","xyz");
dotest("abc(3)","xx");
dotest("abc(3)","xzy");
dotest("def","blah");
dotest("def(3)","blah");
dotest("def.pdf","blah");
}
sub dotest
{
my($file,$body) = @_;
printf("\n");
printf("%s\n","-" x 80);
system("rm -fr $tstdir");
system("mkdir -p $tstdir");
push(@allfiles,[$file,$body]);
###@rfiles = shuffle(@allfiles);
@rfiles = @allfiles;
foreach $pair (@rfiles) {
($tail,$body) = @$pair;
printf("dotest: FILE %s '%s'\n",$tail,$body);
$file = sprintf("%s/%s",$tstdir,$tail);
open($xfdst,">$file") or
die("dotest: unable to open '$file' -- $!\n");
print($xfdst $body);
close($xfdst);
}
@fsort = sort(@allfiles);
@xfiles = (`$xfile $tstdir`);
$code = $? >> 8;
die("dotest: program aborted\n")
if ($code);
foreach $tail (@xfiles) {
chomp($tail);
printf("dotest: XDUP %s\n",$tail);
}
}
這是測驗程式的輸出:
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: XDUP abc(1).pdf is dup of abc.pdf
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: FILE def.pdf 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
uj5u.com熱心網友回復:
readdir() 不像 ls 那樣讀取檔案,而是按其在目錄中的位置順序讀取檔案。您的程式有作業變體,但它作業不正常,而不是您想要的方式。請自行更正。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <unistd.h>
#include <pwd.h>
int main(){
int m;
char path[256],downloads[256],substr[256],buf[160],*ptr;
DIR *dir,*cmpdir;
struct dirent entry,cmpentry,*pe;
strcpy(path,getenv("HOME"));
if(path==NULL){//get HOME path
strcpy(path,getpwuid(getuid())->pw_dir);
}
strcpy(downloads,"/Downloads");
// strcpy(downloads,"/tmp/down");
strcat(path,downloads);errno=0; //make ~/Downloads
if(chdir(path)!=0){
m = errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return -1;
}
errno=0;
if((dir=opendir(path))==NULL){ //open directory
m=errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return 1;
}
while((pe=readdir(dir))!=NULL){entry=*pe;
errno=0;if((cmpdir=opendir(path))==NULL){m=errno;
strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
return -1;
}
}
while((pe=readdir(cmpdir))!=NULL){cmpentry=*pe;
if((entry.d_name[0]!='.')&&(strncmp(entry.d_name,"..",2)!=0)
&&(strcmp(entry.d_name,cmpentry.d_name)!=0)){
fprintf(stdout,"%s %s\n",entry.d_name,cmpentry.d_name);fflush(stdout);
ptr=strstr(cmpentry.d_name,entry.d_name);
if(ptr!=NULL){strcpy(substr,ptr);
fprintf(stdout,"%s\n",cmpentry.d_name);
}
}
}
return 0;}
轉載請註明出處,本文鏈接:https://www.uj5u.com/caozuo/537442.html
標籤:CLinux
下一篇:Unsafe.cpp(JDK原始碼)的method:staticjlong??find_field_offset中的JavaFieldStream是什么意思?
