On 8/21/06, Bil Kleb <Bil.Kleb / nasa.gov> wrote:
> Here is an example used for computing correlations.
> It's almost a direct translation of several hundred
> lines of Fortran 77.
>
> If you can bare to look, please help, even it is simply
> to say "yo bonehead, why didn't you just require 'some_library'?"

# Returns an array with correlation and y-normalized correlation^2
# Assumes two nested arrays of variables and samples.  For example,
# here's a case with 3 samples of 3 input variables and 2 output variables:
#
#   x = [ [a1,a2,a3], [b1,b2,b3], [c1,c2,c3] ]
#   y = [ [r1,r2,r3], [s1,s2,s3] ]

class Array
  def sum
    if block_given?
      inject(0) {|s, i| s + yield(i)}
    else
      inject(0) {|s, i| s + i}
    end
  end

  def sums(&blk)
    map {|i| i.sum(&blk)}
  end

  def map2d_with_indices
    a = []
    each_with_index {|x,i|
      a[i] = []
      x.each_with_index {|y,j| a[i][j] = yield [y,i,j] }
    }
    a
  end
end

def correlation(x,y)

  fail 'size arrays unequal' if x.first.size != y.first.size # lame

  n = x.first.size

  sum_x = x.sums
  sum_x_sq = x.sums {|e| e**2}

  sum_y = y.sums
  sum_y_sq = y.sums {|e| e**2}

  sum_xy = x.map { y.map { 0.0 } }

  0.upto(n-1) do |k|
    x.each_index do |i|
      y.each_index do |j|
        sum_xy[i][j] += x[i][k]*y[j][k]
      end
    end
  end

  corr = sum_xy.map2d_with_indices {|sumxy, i, j|
      dx = n*sum_x_sq[i] - sum_x[i]**2
      dy = n*sum_y_sq[j] - sum_y[j]**2
      (dx*dy).zero? ? 0.0 :
	( n*sumxy - sum_x[i]*sum_y[j] ) / Math.sqrt(dx*dy)
  }

  sum_corr_sq_y = corr.transpose.sums {|e| e**2}

  corr_sq = corr.map2d_with_indices {|e, i, j|
    sum_corr_sq_y[j].zero? ? 0.0 : (e**2)/sum_corr_sq_y[j]
  }

  [corr, corr_sq]
end