factor/core/syntax/lexer.facts

IN: parser
USING: help sequences math strings kernel ;

IN: help
: $parsing-note
    drop
    "This word should only be called from parsing words."
    $notes ;

HELP: lexer
{ $var-description "Stores the current " { $link lexer } " instance." }
{ $class-description "An object for tokenizing parser input. It has the following slots:"
    { $list
        { { $link lexer-text } " - the lines being parsed; an array of strings" }
        { { $link lexer-line } " - the line number being parsed; unlike most indices this is 1-based for friendlier error reporting and integration with text editors" }
        { { $link lexer-column } " - the current column position, zero-based" }
    }
"Custom lexing can be implemented by delegating a tuple to an instance of this class and implementing the " { $link skip-word } " and " { $link skip-blank } " generic words." } ;

HELP: <lexer>
{ $values { "text" "a sequence of strings" } }
{ $description "Creates a new lexer for tokenizing the given sequence of lines." } ;

HELP: next-line
{ $values { "lexer" lexer } }
{ $description "Advances the lexer to the next input line, discarding the remainder of the current line." } ;

HELP: file
{ $var-description "Stores the file name being parsed. This is the input parameter to " { $link parse-stream } "." } ;

HELP: parse-error
{ $error-description "Thrown when the parser encounters invalid input. A parse error wraps an underlying error and holds the file being parsed, line number, and column number." } ;

HELP: line-text
{ $values { "lexer" lexer } }
{ $description "Outputs the text of the line being parsed." } ;

HELP: save-location
{ $values { "word" "a word" } }
{ $description "Sets the " { $snippet "\"file\"" } " and " {  $snippet "\"line\"" } " word properties to the current parser location." }
$parsing-note ;

HELP: skip
{ $values { "i" "a starting index" } { "seq" "a sequence" } { "quot" "a quotation with stack effect " { $snippet "( elt -- ? )" } } { "n" integer } }
{ $description "Variant of " { $link find* } " that outputs the length of the sequence instead of -1 if no elements satisfy the predicate." } ;

HELP: skip-blank
{ $values { "lexer" lexer } }
{ $contract "Skips whitespace characters." }
{ $notes "Custom lexers can implement this generic word." } ;

HELP: skip-word
{ $values { "lexer" lexer } }
{ $contract
    "Skips until the end of the current token."
    $nl
    "The default implementation treats a single " { $snippet "\"" } " as a word by itself; otherwise it searches forward until a whitespace character or the end of the line."
}
{ $notes "Custom lexers can implement this generic word." } ;

HELP: scan
{ $values { "token" "a " { $link string } " or " { $link f } } }
{ $description "Reads the next token from parser input. Tokens are delimited by whitespace, with the exception that " { $snippet "\"" } " is treated like a single token even when not followed by whitespace." }
$parsing-note ;

HELP: bad-escape
{ $error-description "This error is thrown if the parser encounters an invalid escape code following a backslash (" { $snippet "\\" } ") in a string literal. See " { $link "escape" } " for a list of valid escape codes." } ;

HELP: escape
{ $values { "escape" "a single-character escape" } { "ch" "a character" } }
{ $description "Converts from a single-character escape code and the corresponding character." }
{ $examples { $example "CHAR: n escape CHAR: \\n = ." "t" } } ;

HELP: parse-string
{ $values { "str" "a new " { $link string } } }
{ $description "Parses the line until a quote (\"), interpreting escape codes along the way." }
$parsing-note ;

HELP: still-parsing?
{ $values { "lexer" lexer } { "?" "a boolean" } }
{ $description "Outputs " { $link f } " if end of input has been reached, " { $link t } " otherwise." } ;