On Oct 2, 6:17 am, Remco Hh <re... / huijdts.nl> wrote:
> I want to search in a directory for files, matching a certain regular
> expression. The script should not return true or false, but should give
> me a list (array) of filenames which are found.

Here's my 'findfile' script that I use daily. It lets you use a regexp
for the filename, file content, specify depth of search, whether or
not to show all matches inside a file, and so on.

(You may need to unwrap some of the longer lines after copy/paste.)

See additional notes at the end.

Slim2:/usr/local/bin phrogz$ cat findfile
#!/usr/bin/env ruby

USAGE = <<ENDUSAGE
Usage:
   findfile  [-d max_depth] [-a] [-c] [-i] name_regexp
[content_regexp]
   -d,--depth        the maximum depth to recurse to (defaults to no
limit)
   -a,--showall      with content_regexp, show every match per file
                     (defaults to only show the first-match per file)
   -c,--usecase      with content_regexp, use case-sensitive matching
                     (defaults to case-insensitive)
   -i,--includedirs  also find directories matching name_regexp
                     (defaults to files only; incompatible with
content_regexp)
   -h,--help         show some help examples
ENDUSAGE

EXAMPLES = <<ENDEXAMPLES

Examples:
   findfile foo
   # Print the path to all files with 'foo' in the name

   findfile -i foo
   # Print the path to all files and directories with 'foo' in the
name

   findfile js$
   # Print the path to all files whose name ends in "js"

   findfile js$ vector
   # Print the path to all files ending in "js" with "Vector" or
"vector"
   # (or "vEcTOr", "VECTOR", etc.) in the contents, and print some of
the
   # first line that has that content.

   findfile js$ -c Vector
   # Like above, but must match exactly "Vector" (not 'vector' or
'VECTOR').

   findfile . vector -a
   # Print the path to every file with "Vector" (any case) in it
somewhere
   # printing every line in those files (with line numbers) with that
content.

   findfile -d 0 .
   # Print the path to every file that is in the current directory.

   findfile -d 1 .
   # Print the path to every file that is in the current directory or
any
   # of its child directories (but no subdirectories of the children).
ENDEXAMPLES

ARGS = {}
UNFLAGGED_ARGS = [ :name_regexp, :content_regexp ]
next_arg = UNFLAGGED_ARGS.first
ARGV.each{ |arg|
   case arg
     when '-d','--depth'
       next_arg = :max_depth
     when '-a','--showall'
       ARGS[:showall] = true
     when '-c','--usecase'
       ARGS[:usecase] = true
     when '-i','--includedirs'
       ARGS[:includedirs] = true
     when '-h','--help'
       ARGS[:help] = true
     else
       if next_arg
         if next_arg==:max_depth
           arg = arg.to_i + 1
         end
         ARGS[next_arg] = arg
         UNFLAGGED_ARGS.delete( next_arg )
       end
       next_arg = UNFLAGGED_ARGS.first
   end
}

if ARGS[:help] or !ARGS[:name_regexp]
   puts USAGE
   puts EXAMPLES if ARGS[:help]
   exit
end

class Dir
   def self.crawl( path, max_depth=nil, include_directories=false,
depth=0, &block )
     return if max_depth && depth > max_depth
     begin
       if File.directory?( path )
         yield( path, depth ) if include_directories
         files = Dir.entries( path ).select{ |f| true unless f=~/^\.
{1,2}$/ }
         unless files.empty?
           files.collect!{ |file_path|
             Dir.crawl( path+'/'+file_path, max_depth,
include_directories, depth+1, &block )
           }.flatten!
         end
         return files
       else
         yield( path, depth )
       end
     rescue SystemCallError => the_error
       warn "ERROR: #{the_error}"
     end
   end

end

start_time = Time.new
name_match = Regexp.new(ARGS[:name_regexp], true )
content_match = ARGS[:content_regexp] && Regexp.new( ".
{0,20}#{ARGS[:content_regexp]}.{0,20}", !ARGS[:usecase] )

file_count = 0
matching_count = 0
Dir.crawl( '.', ARGS[:max_depth], ARGS[:includedirs] && !
content_match){ |file_path, depth|
   if File.split( file_path )[ 1 ] =~ name_match
     if content_match
       if ARGS[:showall]
         shown_file = false
         IO.readlines( file_path ).each_with_index{ |
line_text,line_number|
           if match = line_text[content_match]
             unless shown_file
               puts file_path
               matching_count += 1
               shown_file = true
             end
             puts ( "%5d:  " % line_number ) + match
           end
         }
         puts " " if shown_file
       elsif IO.read( file_path ) =~ content_match
         puts file_path,"  #{$~}"," "
         matching_count += 1
       end
     else
       puts file_path
       matching_count += 1
     end
   end
   file_count += 1
}
elapsed = Time.new - start_time
puts "Found #{matching_count} file#{matching_count==1?'':'s'} (out of
#{file_count}) in #{elapsed} seconds"



You do have to watch for shell escaping of the regexp, either escaping
chars as needed or quoting your regexp:

Slim2:/usr/local/bin phrogz$ findfile \d
./findfile
./index_gem_repository.rb
./p4d
./rdoc
./rdoc-osa
./svnadmin
./svndumpfilter
./update_rubygems
Found 8 files (out of 40) in 0.001228 seconds

Slim2:/usr/local/bin phrogz$ findfile \\d
./p4
./p4d
./rot13
./sqlite3
Found 4 files (out of 40) in 0.001088 seconds

Slim2:/usr/local/bin phrogz$ findfile \\d$
./p4
./rot13
./sqlite3
Found 3 files (out of 40) in 0.001118 seconds

Slim2:/usr/local/bin phrogz$ findfile "\d$"
./p4
./rot13
./sqlite3
Found 3 files (out of 40) in 0.001298 seconds