MatchData is the type of the special variable $~, and is the type of the object returned by Regexp#match and Regexp#last_match. It encapsulates all the results of a pattern match, results normally accessed through the special variables $&, $’, $`, $1, $2, and so on. Matchdata is also known as MatchingData.
Match Reference—MatchData acts as an array, and may be accessed using the normal array indexing techniques. mtch[0] is equivalent to the special variable $&, and returns the entire matched string. mtch[1], mtch[2], and so on return the values of the matched backreferences (portions of the pattern between parentheses).
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m[0] #=> "HX1138" m[1, 2] #=> ["H", "X"] m[1..3] #=> ["H", "X", "113"] m[-3, 2] #=> ["X", "113"]
/* * call-seq: * mtch[i] => obj * mtch[start, length] => array * mtch[range] => array * * Match Reference---<code>MatchData</code> acts as an array, and may be * accessed using the normal array indexing techniques. <i>mtch</i>[0] is * equivalent to the special variable <code>$&</code>, and returns the entire * matched string. <i>mtch</i>[1], <i>mtch</i>[2], and so on return the values * of the matched backreferences (portions of the pattern between parentheses). * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m[0] #=> "HX1138" * m[1, 2] #=> ["H", "X"] * m[1..3] #=> ["H", "X", "113"] * m[-3, 2] #=> ["X", "113"] */ static VALUE match_aref(argc, argv, match) int argc; VALUE *argv; VALUE match; { VALUE idx, rest; rb_scan_args(argc, argv, "11", &idx, &rest); if (!NIL_P(rest) || !FIXNUM_P(idx) || FIX2INT(idx) < 0) { return rb_ary_aref(argc, argv, match_to_a(match)); } return rb_reg_nth_match(FIX2INT(idx), match); }
Returns the offset of the start of the nth element of the match array in the string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.begin(0) #=> 1 m.begin(2) #=> 2
/* * call-seq: * mtch.begin(n) => integer * * Returns the offset of the start of the <em>n</em>th element of the match * array in the string. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.begin(0) #=> 1 * m.begin(2) #=> 2 */ static VALUE match_begin(match, n) VALUE match, n; { int i = NUM2INT(n); if (i < 0 || RMATCH(match)->regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (RMATCH(match)->regs->beg[i] < 0) return Qnil; return INT2FIX(RMATCH(match)->regs->beg[i]); }
Returns the array of captures; equivalent to mtch.to_a.
f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures f1 #=> "H" f2 #=> "X" f3 #=> "113" f4 #=> "8"
/* * call-seq: * mtch.captures => array * * Returns the array of captures; equivalent to <code>mtch.to_a[1..-1]</code>. * * f1,f2,f3,f4 = /(.)(.)(\d+)(\d)/.match("THX1138.").captures * f1 #=> "H" * f2 #=> "X" * f3 #=> "113" * f4 #=> "8" */ static VALUE match_captures(match) VALUE match; { return match_array(match, 1); }
Returns the offset of the character immediately following the end of the nth element of the match array in the string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.end(0) #=> 7 m.end(2) #=> 3
/* * call-seq: * mtch.end(n) => integer * * Returns the offset of the character immediately following the end of the * <em>n</em>th element of the match array in the string. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.end(0) #=> 7 * m.end(2) #=> 3 */ static VALUE match_end(match, n) VALUE match, n; { int i = NUM2INT(n); if (i < 0 || RMATCH(match)->regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (RMATCH(match)->regs->beg[i] < 0) return Qnil; return INT2FIX(RMATCH(match)->regs->end[i]); }
Returns a printable version of mtch.
puts /.$/.match("foo").inspect #=> #<MatchData "o"> puts /(.)(.)(.)/.match("foo").inspect #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> puts /(.)(.)?(.)/.match("fo").inspect #=> #<MatchData "fo" 1:"f" 2:nil 3:"o">
/* * call-seq: * mtch.inspect => str * * Returns a printable version of <i>mtch</i>. * * puts /.$/.match("foo").inspect * #=> #<MatchData "o"> * * puts /(.)(.)(.)/.match("foo").inspect * #=> #<MatchData "foo" 1:"f" 2:"o" 3:"o"> * * puts /(.)(.)?(.)/.match("fo").inspect * #=> #<MatchData "fo" 1:"f" 2:nil 3:"o"> * */ static VALUE match_inspect(VALUE match) { const char *cname = rb_obj_classname(match); VALUE str; int i; struct re_registers *regs = RMATCH(match)->regs; int num_regs = regs->num_regs; str = rb_str_buf_new2("#<"); rb_str_buf_cat2(str, cname); for (i = 0; i < num_regs; i++) { VALUE v; rb_str_buf_cat2(str, " "); if (0 < i) { char buf[sizeof(i)*3+1]; snprintf(buf, sizeof(buf), "%d", i); rb_str_buf_cat2(str, buf); rb_str_buf_cat2(str, ":"); } v = rb_reg_nth_match(i, match); if (v == Qnil) rb_str_buf_cat2(str, "nil"); else rb_str_buf_append(str, rb_str_inspect(v)); } rb_str_buf_cat2(str, ">"); return str; }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
/* * call-seq: * mtch.length => integer * mtch.size => integer * * Returns the number of elements in the match array. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.length #=> 5 * m.size #=> 5 */ static VALUE match_size(match) VALUE match; { return INT2FIX(RMATCH(match)->regs->num_regs); }
Returns a two-element array containing the beginning and ending offsets of the nth match.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.offset(0) #=> [1, 7] m.offset(4) #=> [6, 7]
/* * call-seq: * mtch.offset(n) => array * * Returns a two-element array containing the beginning and ending offsets of * the <em>n</em>th match. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.offset(0) #=> [1, 7] * m.offset(4) #=> [6, 7] */ static VALUE match_offset(match, n) VALUE match, n; { int i = NUM2INT(n); if (i < 0 || RMATCH(match)->regs->num_regs <= i) rb_raise(rb_eIndexError, "index %d out of matches", i); if (RMATCH(match)->regs->beg[i] < 0) return rb_assoc_new(Qnil, Qnil); return rb_assoc_new(INT2FIX(RMATCH(match)->regs->beg[i]), INT2FIX(RMATCH(match)->regs->end[i])); }
Returns the portion of the original string after the current match. Equivalent to the special variable $’.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.post_match #=> ": The Movie"
/* * call-seq: * mtch.post_match => str * * Returns the portion of the original string after the current match. * Equivalent to the special variable <code>$'</code>. * * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") * m.post_match #=> ": The Movie" */ VALUE rb_reg_match_post(match) VALUE match; { VALUE str; long pos; if (NIL_P(match)) return Qnil; if (RMATCH(match)->BEG(0) == -1) return Qnil; str = RMATCH(match)->str; pos = RMATCH(match)->END(0); str = rb_str_substr(str, pos, RSTRING(str)->len - pos); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns the portion of the original string before the current match. Equivalent to the special variable $`.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.pre_match #=> "T"
/* * call-seq: * mtch.pre_match => str * * Returns the portion of the original string before the current match. * Equivalent to the special variable <code>$`</code>. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.pre_match #=> "T" */ VALUE rb_reg_match_pre(match) VALUE match; { VALUE str; if (NIL_P(match)) return Qnil; if (RMATCH(match)->BEG(0) == -1) return Qnil; str = rb_str_substr(RMATCH(match)->str, 0, RMATCH(match)->BEG(0)); if (OBJ_TAINTED(match)) OBJ_TAINT(str); return str; }
Returns an array containing match strings for which block gives true. MatchData#select will be removed from Ruby 1.9.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") p m.select{|x| /X/ =~ x} #=> ["HX1138", "X"]
/* * call-seq: * mtch.select{|obj| block} => array * * Returns an array containing match strings for which <em>block</em> * gives <code>true</code>. MatchData#select will be removed from Ruby 1.9. * * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") * p m.select{|x| /X/ =~ x} #=> ["HX1138", "X"] */ static VALUE match_select(argc, argv, match) int argc; VALUE *argv; VALUE match; { if (argc > 0) { rb_raise(rb_eArgError, "wrong number of arguments (%d for 0)", argc); } else { struct re_registers *regs = RMATCH(match)->regs; VALUE target = RMATCH(match)->str; VALUE result = rb_ary_new(); int i; int taint = OBJ_TAINTED(match); for (i=0; i<regs->num_regs; i++) { VALUE str = rb_str_substr(target, regs->beg[i], regs->end[i]-regs->beg[i]); if (taint) OBJ_TAINT(str); if (RTEST(rb_yield(str))) { rb_ary_push(result, str); } } return result; } }
Returns the number of elements in the match array.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.length #=> 5 m.size #=> 5
/* * call-seq: * mtch.length => integer * mtch.size => integer * * Returns the number of elements in the match array. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.length #=> 5 * m.size #=> 5 */ static VALUE match_size(match) VALUE match; { return INT2FIX(RMATCH(match)->regs->num_regs); }
Returns a frozen copy of the string passed in to match.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.string #=> "THX1138."
/* * call-seq: * mtch.string => str * * Returns a frozen copy of the string passed in to <code>match</code>. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.string #=> "THX1138." */ static VALUE match_string(match) VALUE match; { return RMATCH(match)->str; /* str is frozen */ }
Returns the array of matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_a #=> ["HX1138", "H", "X", "113", "8"]
Because to_a is called when expanding *variable, there‘s a useful assignment shortcut for extracting matched fields. This is slightly slower than accessing the fields directly (as an intermediate array is generated).
all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) all #=> "HX1138" f1 #=> "H" f2 #=> "X" f3 #=> "113"
/* * call-seq: * mtch.to_a => anArray * * Returns the array of matches. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.to_a #=> ["HX1138", "H", "X", "113", "8"] * * Because <code>to_a</code> is called when expanding * <code>*</code><em>variable</em>, there's a useful assignment * shortcut for extracting matched fields. This is slightly slower than * accessing the fields directly (as an intermediate array is * generated). * * all,f1,f2,f3 = *(/(.)(.)(\d+)(\d)/.match("THX1138.")) * all #=> "HX1138" * f1 #=> "H" * f2 #=> "X" * f3 #=> "113" */ static VALUE match_to_a(match) VALUE match; { return match_array(match, 0); }
Returns the entire matched string.
m = /(.)(.)(\d+)(\d)/.match("THX1138.") m.to_s #=> "HX1138"
/* * call-seq: * mtch.to_s => str * * Returns the entire matched string. * * m = /(.)(.)(\d+)(\d)/.match("THX1138.") * m.to_s #=> "HX1138" */ static VALUE match_to_s(match) VALUE match; { VALUE str = rb_reg_last_match(match); if (NIL_P(str)) str = rb_str_new(0,0); if (OBJ_TAINTED(match)) OBJ_TAINT(str); if (OBJ_TAINTED(RMATCH(match)->str)) OBJ_TAINT(str); return str; }
Uses each index to access the matching values, returning an array of the corresponding matches.
m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") m.to_a #=> ["HX1138", "H", "X", "113", "8"] m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"]
/* * call-seq: * mtch.values_at([index]*) => array * * Uses each <i>index</i> to access the matching values, returning an array of * the corresponding matches. * * m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie") * m.to_a #=> ["HX1138", "H", "X", "113", "8"] * m.values_at(0, 2, -2) #=> ["HX1138", "X", "113"] */ static VALUE match_values_at(argc, argv, match) int argc; VALUE *argv; VALUE match; { return rb_values_at(match, RMATCH(match)->regs->num_regs, argc, argv, match_entry); }