Robert Feldt wrote:
> On Mon, 19 Mar 2001, Michael Neumann wrote:
> 
> > bzip2 uses BWT (I don't recall what that stands for).
> > You can use BWT in combination with RLE which results in 
> > better compression as RLE alone.
> > 
> BWT is Burrows-Wheeler transform.
> 
> > I've implemented BWT some time ago in Ruby, but the whole program
> > including RLE was about 100 lines of code.
> > 
> > If you're interested I'll send you the sourcecode.
> >  
> I'd be interested; please send it to me.

The source is below.
BWT only is very short, but it makes no sense to use BWT alone, because
it do not compress. 

> > Another way could be to use what I call "bit table" compression,
> > where I generate an array of the most frequently used characters
> > and compress the whole data with a reduced bit-size (e.g. 5 bit). 
> > 
> Sounds like Huffman-coding to me?

Only that I do not create a tree.



#######################################



#!/usr/local/bin/ruby

#
# BWT compression
# 



class String  
  # rotates the string left, returns copy of the string
  def rol(n)
    self[n,size] + self[0,n]
  end
end # class String


##############################


def bwt_compress(str)
  arr = (0...(str.size)).to_a

  arr.sort! { |a,b|
    str.rol(a) <=> str.rol(b)
  }
  start_with = arr.index(0)

  l = arr.collect {|i| str.rol(i)[-1,1]}.join("")
  [start_with, l]
end



def generate_transformation_vector(l,f)
  l = l.split("")
  f = f.split("")

  t = []
  f.each_with_index {|ele,i|
    t[i] = l.index(f[i])
    l[t[i]] = nil
  }
  return t
end


def bwt_decompress(start_with, l)
  str = ""
  i = start_with
  f = l.split("").sort.join("")
  t = generate_transformation_vector(l,f)
  
  while str.size != l.size do
     str += f[i,1]
     i = t[i]
  end
  return str
end

##############################

#
# escape_char had to be an Integer
#
def rle_compress(escape_char, str)
  last_char = nil
  buf_size = 0
  ret_str = ""

  for i in 0...(str.size) do
       
    if buf_size == 255 then
      ret_str += escape_char.chr + buf_size.chr + last_char.chr
      buf_size = 0
      last_char = nil
    end
    
    if str[i] == last_char then
      buf_size += 1
    else
      if buf_size == 0 then
      elsif buf_size > 3 or last_char == escape_char then
        ret_str += escape_char.chr + buf_size.chr + last_char.chr
      else
        buf_size.times { ret_str += last_char.chr }      
      end
      buf_size = 1
      last_char = str[i]
    end 

  end
  
  if buf_size > 0 then
    if buf_size > 3 or last_char == escape_char then
      ret_str += escape_char.chr + buf_size.chr + last_char.chr
    else
      ret_str += last_char.chr
    end
  end 
  ret_str
end


def rle_decompress(escape_char, str)
  ret_str = ""
  i = 0
  while i < str.size do
    if str[i] == escape_char then
      ret_str += str[i+2].chr * str[i+1]
      i += 3
    else
      ret_str += str[i].chr
      i += 1
    end
  end
  ret_str
end


########################




def compress(str,f)
  start_with, l = bwt_compress(str)

  f.puts start_with
  f.print l#rle_compress(255, l)
end


def decompress(f)
  start_with = f.gets.to_i  
  cp = f.read

  bwt_decompress( start_with, cp)#rle_decompress(255, cp)) 
end

mode = ARGV.shift
from_file = ARGV.shift
to_file   = ARGV.shift


case mode
when "compress"
  File.open(to_file, "w+") {|f|
    compress( File::readlines(from_file).to_s, f )
  }
when "decompress"
  File.open(to_file, "w+") {|f|
    f.print decompress(File::open(from_file))
  }
end



--
Michael Neumann