R5RSの数値リテラルっぽいものをGaucheのparser.pegでパーズした(2)
前回から虚部の i
の代わりに!
も使えるようになったのとr進数(r>1)が使えるようになった.あと $try
が足りない所があったので足したり.最早,R5RSの数値リテラルっぽいものではなくなってきている.
前回の36進数の虚部が汚いという問題.コメントも頂いたのだけど,構文の汚い部分がそのまま汚いコードとして現れているのと綺麗にしようとすると大掛かりになるので,今回はとりあえず現状の方向でということに.
(use parser.peg) (use util.match) (use srfi-1) (use srfi-13) (use srfi-42) (define-syntax $$ (syntax-rules () ((_ x ...) ($ x ... $)))) (define-syntax $seq0 (syntax-rules () ((_ p ps ...) ($do (temp p) (ps) ... ($return temp))))) (define ($>> p value) ($<< (^_ value) p)) (define (peg-parse-success? parse seq) (not (values-ref (parse seq) 0))) (define parse-number (let* ((sharp ($c #\#)) (dot ($c #\.)) (exactness-prefix ($optional ($seq sharp ($or ($>> ($c #\e) exact) ($>> ($c #\i) inexact))))) (default-exactly ($$ .$ ($$ cons #f))) (sign ($or ($>> ($c #\+) +) ($>> ($c #\-) -))) (sign~ ($optional sign +)) (char-radix ($<< (^c (case c ((#\b) 2) ((#\o) 8) ((#\d) 10) ((#\x) 16) ((#\z) 36))) ($one-of #[bodxz]))) (generic-radix ($<< ($$ string->number $ list->string) ($seq0 ($many ($one-of #[0-9])) ($c #\r)))) (exp-marker ($do (marker ($one-of #[esfdl^])) ($return (if (char=? #\^ marker) #f inexact)))) (i ($one-of #[!i])) (all-digit-string (string-append (string-ec (: c #\0 #\9) c) (string-ec (: c #\a #\z) c))) (digit->integer+ (^c (or (digit->integer c) (+ 10 (- (char->ucs c) (char->ucs #\a)))))) (string->number+ (^ (str radix) (string-fold (^ (c r) (+ (* r radix) (digit->integer+ c))) 0 str))) ($$number-body (^ (radix) (let* ((digit-char-set (string->char-set (string-take all-digit-string (min radix 36)))) (->number (^s ((if (< 36 radix) string->number+ string->number) (list->string s) radix))) (digit ($one-of digit-char-set)) (digit* ($<< ->number ($many digit))) (digit+ ($<< ->number ($many1 digit))) (dot-digits->number (^l (let1 s (list->string l) (/ (string->number s radix) (expt radix (string-size s)))))) (dot-digit* ($<< dot-digits->number ($seq dot ($many digit)))) (dot-digit+ ($<< dot-digits->number ($seq dot ($many1 digit)))) (exp-suffix ($optional ($<< cons exp-marker ($<< (cut <> <>) sign~ digit+)))) (decimal ($or ($do (dec dot-digit+) (e exp-suffix) ($return (cons inexact (* dec (expt radix (or (and e (cdr e)) 0)))))) ($do (int digit+) (dec ($optional dot-digit*)) (e exp-suffix) ($return (cons (and (or dec (and e (car e))) inexact) (* (+ int (or dec 0)) (expt radix (or (and e (cdr e)) 0)))))))) (urational ($<< (default-exactly /) digit+ ($seq ($c #\/) digit+))) (ureal ($or ($try urational) decimal)) (real ($do (signed sign~) (p ureal) ($return (cons (car p) (signed (cdr p)))))) (imag-part-contains-i-as-digit ($do (rest ($many anychar)) (if (null? rest) ($fail #`"expecting ,digit-char-set") (if (peg-parse-success? i (last-pair rest)) ($return (peg-run-parser ($optional ureal '(#f . 1)) (drop-right rest 1))) ($fail "expecting #[!i]"))))) (complex ($or ($try ($do (r real) (($c #\@)) (t real) ($return (cons (or (car r) (car t)) (make-polar (cdr r) (cdr t)))))) ($try ($do (x ($optional ($try real) '(#f . 0))) (signed sign) (y (if (< (- (char->ucs #\i) (char->ucs #\a)) radix) imag-part-contains-i-as-digit ($seq0 ($optional ureal '(#f . 1)) i))) ($return (cons (or (car x) (car y)) (make-rectangular (cdr x) (signed (cdr y))))))) real))) ($seq0 complex eof)))) (radix-prefix ($or ($seq sharp char-radix) generic-radix)) (prefix ($or ($try ($<< xcons radix-prefix exactness-prefix)) ($<< cons exactness-prefix ($optional ($try radix-prefix) 10)))) (number ($do (er prefix) (match er ((exactness . radix) (unless (< 1 radix) (error "bad radix" radix)) ($do (body ($$number-body radix)) ($return (match body ((default-exactness . n) ((or exactness default-exactness identity) n)))))))))) (cut peg-parse-string number <>)))
いい感じにキ*ガイじみてきた:
(parse-number "18rh") ; => 17 (parse-number "18r+h") ; => 17 (parse-number "18r+i") ; => 0.0+1.0i (parse-number "19r+i") ; => 18 (parse-number "19r0+i") ; => 0.0+1.0i (parse-number "19r+!") ; => 0.0+1.0i (parse-number "60r1^a") ; => 604661760000000000 (parse-number "#e60r1^a") ; => 604661760000000000 (parse-number "60r#e1^a") ; => 604661760000000000 (parse-number "#i60r1^a") ; => 6.0466176e17 (parse-number "60r#i1^a") ; => 6.0466176e17
$$number-body
の中で毎回パーザ作ってるのが気持ち悪いけど,重いようだったらキャッシュすればいいんじゃないかな.そんな感じで細部はまだ作り込んでない.
120行くらいあるだろ…数値リテラルのパーザなんだぜ,これ…
だが待ってほしい,マジキチ構文な割にはパーザの可読性が保たれているのではないだろうか?これはつまり parser.peg の力なのではないか?