For speed fanatics only.
I racked my brain trying to come up with a way to make
this faster. The best method I could find produced
only a modest 14% speedup with my test data.
z ## Read, parse, and create csv records.
z ## 2005-02-03
z ## Added a faster mode.
z
z # The program conforms to the csv specification at this site:
z # http://www.creativyst.com/Doc/Articles/CSV/CSV01.htm
z # The only extra is that you can change the field-separator.
z # For a field-separator other than a comma, for example
z # a semicolon:
z # ";".is_fs
z #
z # After a record has been read and parsed,
z # $csv_s contains the record in raw string format.
z #
z # If $csv_error_check == true, fields will be checked
z # for improperly escaped double-quotes.
z #
z # If $csv_fast == true, a slightly faster parser will be used.
z # Differences: the csv file cannot contain 1.chr;
z # an empty line will be parsed to [] instead of [""].
z
z class Array
z def to_csv
z ",".is_fs if $csv_fs.nil?
z s = ''
z self.map { |item|
z str = item.to_s
z # Quote the string if it contains the field-separator or
z # a " or a newline, or if it has leading or
z # trailing whitespace.
z if str.index($csv_fs) or /^\s|"|\n|\s$/.match(str)
z str = '"' + str.gsub( /"/, '""' ) + '"'
z end
z str
z }.join($csv_fs)
z end
z def unescape
z self.map{|x| x.gsub( /""/, '"' ) }
z end
z end
z
z class String
z # Set regexp for parse_csv.
z # self is the field-separator, which must be
z # a single character.
z def is_fs
z $csv_fs = self
z if "^" == $csv_fs
z fs = "\\^"
z else
z fs = $csv_fs
z end
z $csv_re = \
z ## Assumes embedded quotes are escaped as "".
z %r{ \s*
z (?:
z "( [^"]* (?: "" [^"]* )* )" |
z ( .*? )
z )
z \s*
z [#{fs}]
z }mx
z end
z
z def parse_string
z ",".is_fs if $csv_fs.nil?
z
z if $csv_fast
z
z # Place 1.chr after each field;
z # unescape quotes;
z # make the array.
z (self + $csv_fs).gsub( $csv_re, '\1\2'+"\1" )\
z .gsub( /""/, '"' )\
z [0..-2].split( "\1", -1 )
z
z else
z (self + $csv_fs).scan( $csv_re ).flatten.compact.unescape
z end
z end
z
z end
z
z
z def get_rec( file )
z $csv_s = ""
z begin
z if file.eof?
z raise "The csv file is malformed." if $csv_s.size>0
z return nil
z end
z $csv_s += file.gets
z end until $csv_s.count( '"' ) % 2 == 0
z $csv_s.chomp!
z $csv_s.parse_string
z end
z
z
z $csv_fast = true
z
z while rec = get_rec( ARGF )
z puts "----------------"
z puts $csv_s
z p rec
z puts rec.to_csv
z end