源码网商城,靠谱的源码在线交易网站 我的订单 购物车 帮助

源码网商城

fdupe 查找重复文件的Perl脚本代码

  • 时间:2021-06-26 22:59 编辑: 来源: 阅读:
  • 扫一扫,手机访问
摘要:fdupe 查找重复文件的Perl脚本代码
图: [img]http://files.jb51.net/file_images/article/201303/K1uf.png[/img]
[u]复制代码[/u] 代码如下:
#!/usr/bin/perl # # fdupe tool - finding duplicate files # # $Id: fdupe,v 1.7 2011/10/14 20:11:21 root Exp root $ # # Source code Copyright (c) 1998,2011 Bernhard Schneider. # May be used only for non-commercial purposes with # appropriate acknowledgement of copyright. # # FILE :        fdupe # DESCRIPTION : script finds duplicate Files. # AUTHOR:       Bernhard Schneider <bernhard@neaptide.org> # hints, crrections & ideas are welcome # # usage: fdupe.pl <path> <path> ... #        find / -xdev | fdupe.pl # # how to select and remove duplicates: #   redirect output to >file, edit the file and mark lines you #   wish to move/delete with a preceding dash (-) #   Use following script to delete marked files: #   #!/usr/bin/perl -n #   chomp; unlink if s/^-//; # # history: # 12.05.99 - goto statment replaced with next # 14.05.99 - minor changes # 18.05.99 - removed confusing 'for $y' #            included hash-search # 20.05.99 - minor changes # 02.03.00 - some functions rewritten, optimized for speed # 10.01.01 - hint-fix by Ozzie |ozric at kyuzz.org| # 05.03.02 - fixed hangups by reading block/char-Devices # 08.09.11 - skips checking of hard links # 14.10.11 - accept file names from stdin # #use strict; # uncomment for debugging $|=1; local (*F1,*F2); my %farray = (); my $statF1; # ------------------------------ # traverse directories sub scan ($) {     my ($dir) = $_[0];     opendir (DIR, $dir) or die "($dir) $!:$@";     map {           (-d) ? scan ($_) : push @{$farray{-s $_}},$_              unless (-l or -S  or -p or -c or -b);     } map "$dir/$_", grep !/^\.\.?$/, readdir (DIR); closedir (DIR); } # ------------------------------ # get chunk of bytes from a file sub getchunk ($$) {   my ($fsize,$pfname) = @_;   my $chunksize = 32;   my ($nread,$buff);   return undef unless open(F1,$$pfname);   $statF1 = [(stat  F1)[3,1]];   binmode F1;   $nread = read (F1,$buff,$chunksize);   ($nread == $chunksize || $nread == $fsize) ? "$buff" : undef; }  # ------------------------------ # compare two files sub mycmp ($) {   my ($fptr) = $_[0];   my ($buffa, $buffb);   my ($nread1,$nread2);   my $statF2;   my ($buffsize) = 16*1024;   return -1 unless (open(F2,"<$$fptr"));   $statF2 = [(stat  F2)[3,1]];   return 0    if ($statF2->[0] > 1 && $statF1->[1] == $statF2->[1]);   binmode F2;   seek (F1,0,0);   do {  $nread1 = read (F1,$buffa,$buffsize);      $nread2 = read (F2,$buffb,$buffsize);      if (($nread1 != $nread2) || ($buffa cmp $buffb)) {          return -1;         }   } while ($nread1);   return 0; } # ------------------------------ print "collecting files and sizes ...\n"; if (-t STDIN) {  $ARGV[0] = '.' unless $ARGV[0]; # use wd if no arguments given  map scan $_, @ARGV; } else {   while (<STDIN>)  {   s癧\r\n]$鞍g;   push @{$farray{-s $_}},$_    unless (-l or -S  or -p or -c or -b);  } } print "now comparing ...\n"; for my $fsize (reverse sort {$a <=> $b} keys %farray) {   my ($i,$fptr,$fref,$pnum,%dupes,%index,$chunk);   # skip files with unique file size   next if $#{$farray{$fsize}} == 0;   $pnum  = 0;   %dupes = %index = ();   nx:   for (my $nx=0;$nx<=$#{$farray{$fsize}};$nx++) # $nx now 1..count of files   {                                             # with the same size  $fptr = \$farray{$fsize}[$nx];          # ref to the first file     $chunk = getchunk $fsize,$fptr;     if ($pnum) {    for $i (@{$index{$chunk}}) {          $fref = ${$dupes{$i}}[0];       unless (mycmp $fref) {             # found duplicate, collecting          push @{$dupes{$i}},$fptr;    next nx;       }    }     }     # nothing found, collecting     push @{$dupes{$pnum}},$fptr;     push @{$index{$chunk}}, $pnum++;   }   # show found dupes for actual size   for $i (keys %dupes) {     $#{$dupes{$i}} || next;     print "\n size: $fsize\n\n";     for (@{$dupes{$i}}) {         print $$_,"\n";     }   } } close F1; close F2;
  • 全部评论(0)
联系客服
客服电话:
400-000-3129
微信版

扫一扫进微信版
返回顶部