From 9ec04b3e4be60b330ab0699ba4dc13892a11c631 Mon Sep 17 00:00:00 2001 From: Doug Coleman Date: Fri, 17 Jun 2016 15:00:42 -0700 Subject: [PATCH] modern.lexer: encapsulate some state in an lexer object. --- core/modern/lexer/authors.txt | 1 + core/modern/lexer/lexer-tests.factor | 13 +++ core/modern/lexer/lexer.factor | 115 +++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 core/modern/lexer/authors.txt create mode 100644 core/modern/lexer/lexer-tests.factor create mode 100644 core/modern/lexer/lexer.factor diff --git a/core/modern/lexer/authors.txt b/core/modern/lexer/authors.txt new file mode 100644 index 0000000000..7c1b2f2279 --- /dev/null +++ b/core/modern/lexer/authors.txt @@ -0,0 +1 @@ +Doug Coleman diff --git a/core/modern/lexer/lexer-tests.factor b/core/modern/lexer/lexer-tests.factor new file mode 100644 index 0000000000..c8361c5758 --- /dev/null +++ b/core/modern/lexer/lexer-tests.factor @@ -0,0 +1,13 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: tools.test modern.lexer ; +in: modern.lexer.tests + +{ T{ slice f 0 8 "dinosaur" } f } [ + "dinosaur" lex-til-whitespace [ drop ] 2dip +] unit-test + +{ f f } [ + "dinosaur" + [ lex-til-whitespace 2drop ] [ lex-til-whitespace ] bi [ drop ] 2dip +] unit-test \ No newline at end of file diff --git a/core/modern/lexer/lexer.factor b/core/modern/lexer/lexer.factor new file mode 100644 index 0000000000..192f9feab0 --- /dev/null +++ b/core/modern/lexer/lexer.factor @@ -0,0 +1,115 @@ +! Copyright (C) 2016 Doug Coleman. +! See http://factorcode.org/license.txt for BSD license. +USING: accessors constructors kernel math sequences +sequences.extras slots.syntax ; +in: modern.lexer + +TUPLE: modern-lexer n string stack ; +CONSTRUCTOR: modern-lexer ( string -- obj ) + 0 >>n + V{ } clone >>stack ; inline + +: >lexer< ( lexer -- n string ) slots[ n string ] ; + +:: slice-til-either ( n string tokens -- n'/f string slice/f ch/f ) + n [ + n string '[ tokens member? ] find-from + dup "\s\r\n" member? [ + :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch + ] if + ] [ + f string f f + ] if ; inline + +:: lex-til-either ( lexer tokens -- lexer slice/f ch/f ) + lexer >lexer< tokens slice-til-either :> ( n' string' slice ch ) + lexer + n' >>n + slice ch ; + + +:: slice-til-separator-inclusive ( n string tokens -- n' string slice/f ch/f ) + n string '[ tokens member? ] find-from [ dup [ 1 + ] when ] dip :> ( n' ch ) + n' string + n n' string ? + ch ; inline + +:: lex-til-separator-inclusive ( lexer tokens -- lexer slice/f ch/f ) + lexer >lexer< tokens slice-til-separator-inclusive :> ( n' string' slice ch ) + lexer + n' >>n + slice ch ; + + +: slice-til-separator-exclusive ( n string tokens -- n' string slice/f ch/f ) + slice-til-separator-inclusive dup [ + [ [ 1 - ] change-to ] dip + ] when ; + +:: lex-til-separator-exclusive ( lexer tokens -- lexer slice/f ch/f ) + lexer >lexer< tokens slice-til-separator-exclusive :> ( n' string' slice ch ) + lexer + n' >>n + slice ch ; + +! Don't include the whitespace in the slice +:: slice-til-whitespace ( n string -- n'/f string slice/f ch/f ) + n [ + n string [ "\s\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + f string f f + ] if ; inline + +:: lex-til-whitespace ( lexer -- lexer slice/f ch/f ) + lexer >lexer< slice-til-whitespace :> ( n' string' slice ch ) + lexer + n' >>n + slice ch ; + + +: merge-lex-til-whitespace ( lexer slice -- lexer slice' ) + [ lex-til-whitespace drop ] dip merge-slices ; + + +:: slice-til-eol ( n string -- n'/f string slice/f ch/f ) + n [ + n string '[ "\r\n" member? ] find-from :> ( n' ch ) + n' string + n n' string ? + ch + ] [ + f string f f + ] if ; inline + +:: lex-til-eol ( lexer -- lexer slice/f ch/f ) + lexer >lexer< slice-til-eol :> ( n' string' slice ch ) + lexer + n' >>n + slice ch ; + + +ERROR: subseq-expected-but-got-eof n string expected ; + +:: slice-til-string ( n string search -- n' string payload closing ) + search string n start* :> n' + n' [ n string search subseq-expected-but-got-eof ] unless + n' search length + string + n n' string ? + n' dup search length + string ? ; + +:: lex-til-string ( lexer search -- lexer payload closing ) + lexer >lexer< search slice-til-string :> ( n' string' payload closing ) + lexer + n' >>n + payload closing ;