Schemeの数値リテラルにマッチする正規表現(3)
テストの仕方に問題があって,ある文字列にきっちりマッチするかどうかで確かめてたんだけど,トークンとして抜き出してくるにはそれでは不適切なことがわかった.そこで肯定先読みを使って,区切り文字が続いてるかどうかを調べるようにした.
それから,問題のなさそうな範囲で欲張りマッチを強欲マッチに変えておいた.その所為で結構伸びた.
2503bytes.
/(?:(?:(?:#b(?:#[ei])?|(?:#[ei])?#b)(?:(?:[+-]?(?:(?:[01]++#*+)|(?:[01]++#*+\/[0 1]++#*+)))|(?:[+-]?(?:(?:[01]++#*+)|(?:[01]++#*+\/[01]++#*+))@[+-]?(?:(?:[01]++# *+)|(?:[01]++#*+\/[01]++#*+)))|(?:[+-]?(?:(?:[01]++#*+)|(?:[01]++#*+\/[01]++#*+) )[+-](?:(?:[01]++#*+)|(?:[01]++#*+\/[01]++#*+))?i)|(?:[+-](?:(?:[01]++#*+)|(?:[0 1]++#*+\/[01]++#*+))?+i)))|(?:(?:#o(?:#[ei])?|(?:#[ei])?#o)(?:(?:[+-]?(?:(?:[0-7 ]++#*+)|(?:[0-7]++#*+\/[0-7]++#*+)))|(?:[+-]?(?:(?:[0-7]++#*+)|(?:[0-7]++#*+\/[0 -7]++#*+))@[+-]?(?:(?:[0-7]++#*+)|(?:[0-7]++#*+\/[0-7]++#*+)))|(?:[+-]?(?:(?:[0- 7]++#*+)|(?:[0-7]++#*+\/[0-7]++#*+))[+-](?:(?:[0-7]++#*+)|(?:[0-7]++#*+\/[0-7]++ #*+))?i)|(?:[+-](?:(?:[0-7]++#*+)|(?:[0-7]++#*+\/[0-7]++#*+))?+i)))|(?:(?:(?:#d) ?(?:#[ei])?|(?:#[ei])?(?:#d)?)(?:(?:[+-]?(?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]+ +#*+)|(?:(?:[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[ 0-9]++)?+)|(?:[0-9]++\.[0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+ (?:[esfdl][+-]?[0-9]++)?+))))|(?:[+-]?(?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]++#* +)|(?:(?:[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[0-9 ]++)?+)|(?:[0-9]++\.[0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+(?: [esfdl][+-]?[0-9]++)?+)))@[+-]?(?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]++#*+)|(?:( ?:[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+) |(?:[0-9]++\.[0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+(?:[esfdl] [+-]?[0-9]++)?+))))|(?:[+-]?(?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]++#*+)|(?:(?:[ 0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(? :[0-9]++\.[0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+(?:[esfdl][+- ]?[0-9]++)?+)))[+-](?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]++#*+)|(?:(?:[0-9]++#*+ (?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:[0-9]++\ .[0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+(?:[esfdl][+-]?[0-9]++ )?+)))?i)|(?:[+-](?:(?:[0-9]++#*+)|(?:[0-9]++#*+\/[0-9]++#*+)|(?:(?:[0-9]++#*+(? :[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:[0-9]++\.[ 0-9]*+#*+(?:[esfdl][+-]?[0-9]++)?+)|(?:\.[0-9]++#++\.#*+(?:[esfdl][+-]?[0-9]++)? +)))?+i)))|(?:(?:#x(?:#[ei])?|(?:#[ei])?#x)(?:(?:[+-]?(?:(?:\h++#*+)|(?:\h++#*+\ /\h++#*+)))|(?:[+-]?(?:(?:\h++#*+)|(?:\h++#*+\/\h++#*+))@[+-]?(?:(?:\h++#*+)|(?: \h++#*+\/\h++#*+)))|(?:[+-]?(?:(?:\h++#*+)|(?:\h++#*+\/\h++#*+))[+-](?:(?:\h++#* +)|(?:\h++#*+\/\h++#*+))?i)|(?:[+-](?:(?:\h++#*+)|(?:\h++#*+\/\h++#*+))?+i))))(? =[\t-\r "();\[\]{-}]|\z)/i
適当に名前付きキャプチャ入れて(?:...)
を(...)
に変えれば短くなる予定.
exponent_marker = /[esfdl]/.source sign = /[+-]?/.source exactness = /(?:#[ei])?/.source radix_ary = [/#b/, /#o/, /(?:#d)?/, /#x/].map(&:source) digit = /[0-9]/.source digit_ary = [/[01]/, /[0-7]/, /[0-9]/, /\h/].map(&:source) suffix = /(?:#{exponent_marker}#{sign}#{digit}++)?+/.source la_separator = /(?=[\t-\r "();\[\]{-}]|\z)/.source ary = Array.new(4) {|i| r_prefix = /#{radix_ary[i]}#{exactness}|#{exactness}#{radix_ary[i]}/ r_nosign_integer = /#{digit_ary[i]}++#*+/ r_float = if 2 == i Regexp.union(/#{r_nosign_integer.source}#{suffix}/, /\.#{digit}++#*+#{suffix}/, /#{digit}++\.#{digit}*+#*+#{suffix}/, /\.#{digit}++#++\.#*+#{suffix}/) end ary = [r_nosign_integer, /#{r_nosign_integer.source}\/#{r_nosign_integer.source}/] ary << r_float if r_float r_nosign_real = Regexp.union(ary) r_real = /#{sign}#{r_nosign_real}/ r_complex = Regexp.union(r_real, /#{r_real.source}@#{r_real.source}/, /#{r_real.source}[+-]#{r_nosign_real}?i/, /[+-]#{r_nosign_real}?+i/) r_number = /#{r_prefix}#{r_complex}/ } re = Regexp.new("(?:#{Regexp.union(ary).source.gsub(/-mix/, '')})#{la_separator}", Regexp::IGNORECASE)