Here are *two* solutions.
The first one was my final attempt before looking at Dominik Bathon's
code:
$time ./sample2 5_000_000 1_000_000_000 >big.txt
done
real 0m45.417s
user 0m42.871s
sys 0m0.830s
That means that it's about 7 seconds faster than Dominik's code on my
machine :-)
$ head big.txt
46
117
267
628
885
1152
1202
1293
1327
1528
$ tail big.txt
999998089
999998134
999998217
999998746
999998822
999998890
999998939
999999029
999999059
999999329
$ wc -l big.txt
5000000 big.txt
## CODE ##
#!/usr/bin/ruby -w
STDOUT.sync = false
GC.disable
num,ceil = ARGV.map { |s| s.to_i }
keep = {}
warn "init keeplist"
# just dumping the values in and then checking the length
# of the hash is *much* faster than checking if there already
# is a value in there.
while keep.length < num
keep[rand(ceil)] = true
end
warn "converting to array"
keep2 = keep.keys.sort
# note that, even with GC disabled, it's still faster to
# convert integers to strings before printing than to rely on
# the automatic conversion
warn "converting to strings"
keep3 = []
while i = keep2.shift
keep3 << i.to_s
end
warn "printing"
puts keep3
warn "done"
## END ##
I've also made a more optimized version based on some observations from
Dominik Bathon's solution:
$ time ./sample3 5_000_000 1_000_000_000 >big.txt
init keeplist
converting to array
writing
real 0m29.010s
user 0m27.775s
sys 0m0.990s
$ head big.txt
926
1118
1274
1607
1740
1870
1909
2059
2268
2291
$ tail big.txt
999997896
999998081
999998251
999998367
999998762
999998765
999998847
999999195
999999347
999999615
$ wc -l big.txt
5000000 big.txt
Warning: this code takes up almost 500 Mb to run!
## CODE ##
#!/usr/bin/ruby -w
STDOUT.sync = false
GC.disable
num,ceil = ARGV.map { |s| s.to_i }
keep = {}
warn "init keeplist"
# heavily unrolled loops :-)
# shaves of quite a bit of time
smpl = num - 100
while keep.length <= smpl
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
keep[rand(ceil)] = true
end
while keep.length < num
keep[rand(ceil)] = true
end
warn "converting to array"
keep2 = keep.keys.sort
$,="\n"
e =""
warn "writing"
while keep2.length >= 100
STDOUT.write [
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
keep2.shift.to_s,
e];
end
puts keep2 unless keep2.empty?;
exit;
## END ##
Joost.