StringScannerからMatchDataがとれない(2)
indexでは参照できるけど名前では参照できなかったので,参照できるようにしてみた.
(2009-10-25修正):マッチしてない部分の名前での参照がバグってたので修正.
--- orig_strscan.c 2009-10-18 18:39:10.000000000 +0900 +++ strscan.c 2009-10-25 00:26:44.000000000 +0900 @@ -36,6 +36,7 @@ /* the regexp register; legal only when MATCHED_P(s) */ struct re_registers regs; + VALUE regexp; }; #define MATCHED_P(s) ((s)->flags & FLAG_MATCHED) @@ -160,6 +161,7 @@ strscan_mark(struct strscanner *p) { rb_gc_mark(p->str); + rb_gc_mark(p->regexp); } static void @@ -173,12 +175,13 @@ strscan_s_allocate(VALUE klass) { struct strscanner *p; - + p = ALLOC(struct strscanner); MEMZERO(p, struct strscanner, 1); CLEAR_MATCH_STATUS(p); onig_region_init(&(p->regs)); p->str = Qnil; + p->regexp = Qnil; return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p); } @@ -230,6 +233,7 @@ if (self != orig) { self->flags = orig->flags; self->str = orig->str; + self->regexp = orig->regexp; self->prev = orig->prev; self->curr = orig->curr; onig_region_copy(&self->regs, &orig->regs); @@ -449,6 +453,7 @@ } MATCHED(p); + p->regexp = regex; p->prev = p->curr; if (succptr) { p->curr += p->regs.end[0]; @@ -955,20 +960,55 @@ * s.post_match # -> "1975 14:39" * s.pre_match # -> "" */ + +static int +name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end) +{ + int num; + + num = onig_name_to_backref_number(RREGEXP(regexp)->ptr, + (const unsigned char* )name, (const unsigned char* )name_end, regs); + if (num >= 1) { + return num; + } + else { + VALUE s = rb_str_new(name, (long )(name_end - name)); + rb_raise(rb_eIndexError, "undefined group name reference: %s", + StringValuePtr(s)); + } +} + static VALUE strscan_aref(VALUE self, VALUE idx) { struct strscanner *p; - long i; + long i = 0; GET_SCANNER(self, p); if (! MATCHED_P(p)) return Qnil; - - i = NUM2LONG(idx); - if (i < 0) - i += p->regs.num_regs; - if (i < 0) return Qnil; - if (i >= p->regs.num_regs) return Qnil; + + const char *s; + switch (TYPE(idx)) { + case T_FIXNUM: + i = FIX2LONG(idx); + if (i < 0) + i += p->regs.num_regs; + if (i < 0) return Qnil; + if (i >= p->regs.num_regs) return Qnil; + break; + case T_SYMBOL: + s = rb_id2name(SYM2ID(idx)); + goto name_to_backref; + break; + case T_STRING: + s = StringValuePtr(idx); + + name_to_backref: + i = name_to_backref_number(&p->regs, p->regexp, s, s + strlen(s)); + break; + default: + rb_raise(rb_eTypeError, "wrong argument type %s (expected Fixnum, Symbol or String)", rb_obj_classname(idx)); + } if (p->regs.beg[i] == -1) return Qnil; return extract_range(p, p->prev + p->regs.beg[i],
s = StringScanner.new("hoge") s.scan(/.(?<k>.)./) p s[:k] # => "o"
re.cからコードを一部コピペする羽目に.適当なのでちゃんとできてるかよくわからない.