Issue #4447 has been updated by Yui NARUSE.

Category set to M17N
Status changed from Open to Assigned
Assignee set to Yukihiro Matsumoto

This request sounds reasonable.
A patch is following:

 diff --git a/string.c b/string.c
 index 23784ab..cea9028 100644
 --- a/string.c
 +++ b/string.c
 @@ -3987,6 +3987,95 @@ rb_str_setbyte(VALUE str, VALUE index, VALUE value)
 return value;
 }
 
 +static VALUE
 +str_byte_substr(VALUE str, long beg, long len)
 +{
 +	char *p, *s = RSTRING_PTR(str), *e = s + RSTRING_LEN(str);
 +	VALUE str2;
 +	if (beg > RSTRING_LEN(str)) return Qnil;
 +	if (beg < 0) {
 +	    beg += RSTRING_LEN(str);
 +	    if (beg < 0) return Qnil;
 +	}
 +	if (beg + len > RSTRING_LEN(str))
 +	    len = RSTRING_LEN(str) - beg;
 +	if (len <= 0) {
 +	    len = 0;
 +	    p = 0;
 +	}
 +	else
 +	    p = s + beg;
 +
 +	if (len > RSTRING_EMBED_LEN_MAX && beg + len == RSTRING_LEN(str)) {
 +	    str2 = rb_str_new4(str);
 +	    str2 = str_new3(rb_obj_class(str2), str2);
 +	    RSTRING(str2)->as.heap.ptr += RSTRING(str2)->as.heap.len - len;
 +	    RSTRING(str2)->as.heap.len = len;
 +	}
 +	else {
 +	    str2 = rb_str_new5(str, p, len);
 +	    OBJ_INFECT(str2, str);
 +	}
 +
 +	return str2;
 +}
 +
 +static VALUE
 +str_byte_aref(VALUE str, VALUE indx)
 +{
 +    long idx;
 +    switch (TYPE(indx)) {
 +      case T_FIXNUM:
 +	idx = FIX2LONG(indx);
 +
 +      num_index:
 +	str = str_byte_substr(str, idx, 1);
 +	if (!NIL_P(str) && RSTRING_LEN(str) == 0) return Qnil;
 +	return str;
 +
 +      default:
 +	/* check if indx is Range */
 +	{
 +	    long beg, len = RSTRING_LEN(str);
 +	    VALUE tmp;
 +
 +	    switch (rb_range_beg_len(indx, &beg, &len, len, 0)) {
 +	      case Qfalse:
 +		break;
 +	      case Qnil:
 +		return Qnil;
 +	      default:
 +		tmp = str_byte_substr(str, beg, len);
 +		return tmp;
 +	    }
 +	}
 +	idx = NUM2LONG(indx);
 +	goto num_index;
 +    }
 +    return Qnil;		/* not reached */
 +}
 +
 +/*
 + *  call-seq:
 + *     str.byteslice() -> new_str
 + *
 + *     "hello".byteslice(1)     #=> "e"
 + *     "hello".byteslice(1, 2)  #=> "el"
 + *     "\u3042".byteslice(1, 2) #=> "\x81\x82"
 + */
 +
 +static VALUE
 +rb_str_byteslice(int argc, VALUE *argv, VALUE str)
 +{
 +    if (argc == 2) {
 +	return str_byte_substr(str, NUM2LONG(argv[0]), NUM2LONG(argv[1]));
 +    }
 +    if (argc != 1) {
 +	rb_raise(rb_eArgError, "wrong number of arguments (%d for 1..2)", argc);
 +    }
 +    return str_byte_aref(str, argv[0]);
 +}
 +
 /*
 *  call-seq:
 *     str.reverse   -> new_str
 @@ -7649,6 +7738,7 @@ Init_String(void)
 rb_define_method(rb_cString, "chr", rb_str_chr, 0);
 rb_define_method(rb_cString, "getbyte", rb_str_getbyte, 1);
 rb_define_method(rb_cString, "setbyte", rb_str_setbyte, 2);
 +    rb_define_method(rb_cString, "byteslice", rb_str_byteslice, -1);
 
 rb_define_method(rb_cString, "to_i", rb_str_to_i, -1);
 rb_define_method(rb_cString, "to_f", rb_str_to_f, 0);
 diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb
 index c5d3a53..f18c814 100644
 --- a/test/ruby/test_string.rb
 +++ b/test/ruby/test_string.rb
 @@ -1944,4 +1944,33 @@ class TestString < Test::Unit::TestCase
 assert_equal(S("hello world"), a)
 assert_equal(S("hello "), b)
 end
 +
 +  def b(str)
 +    str.force_encoding(Encoding::ASCII_8BIT)
 +  end
 +
 +  def test_byteslice
 +    assert_equal(b("h"), "hello".byteslice(0))
 +    assert_equal(nil, "hello".byteslice(5))
 +    assert_equal(b("o"), "hello".byteslice(-1))
 +    assert_equal(nil, "hello".byteslice(-6))
 +
 +    assert_equal(b(""), "hello".byteslice(0, 0))
 +    assert_equal(b("hello"), "hello".byteslice(0, 6))
 +    assert_equal(b("hello"), "hello".byteslice(0, 6))
 +    assert_equal(b(""), "hello".byteslice(5, 1))
 +    assert_equal(b("o"), "hello".byteslice(-1, 6))
 +    assert_equal(nil, "hello".byteslice(-6, 1))
 +
 +    assert_equal(b("h"), "hello".byteslice(0..0))
 +    assert_equal(b(""), "hello".byteslice(5..0))
 +    assert_equal(b("o"), "hello".byteslice(4..5))
 +    assert_equal(nil, "hello".byteslice(6..0))
 +    assert_equal(b(""), "hello".byteslice(-1..0))
 +    assert_equal(b("llo"), "hello".byteslice(-3..5))
 +
 +    assert_equal(b("\x81"), "\u3042".byteslice(1))
 +    assert_equal(b("\x81\x82"), "\u3042".byteslice(1, 2))
 +    assert_equal(b("\x81\x82"), "\u3042".byteslice(1..2))
 +  end
 end
----------------------------------------
Feature #4447: add String#byteslice() method
http://redmine.ruby-lang.org/issues/4447

Author: Suraj Kurapati
Status: Assigned
Priority: Normal
Assignee: Yukihiro Matsumoto
Category: M17N
Target version: 


Please add a String#byteslice() method to the Ruby 1.9 core API.

Without that method, I am forced to *inefficiently* perform byte-based
string slicing by (1) unpacking the entire string into an Array (with
String#unpack or worse: my_string.bytes.to_a) then (2) slicing that 
Array and finally (3) joining the sliced Array into a string (with 
Array#pack or worse: my_array.map(&:chr).join), all as shown below:

  class String
    unless method_defined? :byteslice
      ##
      # Does the same thing as String#slice but
      # operates on bytes instead of characters.
      #
      def byteslice(*args)
        unpack('C*').slice(*args).pack('C*')
      end
    end
  end

Thanks for your consideration.


-- 
http://redmine.ruby-lang.org