37 lexer
%--------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et
%--------------------------------------------------%
% Copyright (C) 1993-2000, 2003-2008, 2011-2012 The University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%--------------------------------------------------%
%
% File: lexer.m.
% Main author: fjh.
% Stability: high.
%
% Lexical analysis. This module defines the representation of tokens
% and exports predicates for reading in tokens from an input stream.
%
% See ISO Prolog 6.4. Also see the comments at the top of parser.m.
%
%--------------------------------------------------%
%--------------------------------------------------%
:- module lexer.
:- interface.
:- import_module char.
:- import_module io.
%--------------------------------------------------%
:- type token
---> name(string)
; variable(string)
; integer(int)
; big_integer(string) % does not fit in int
; float(float)
; string(string) % "...."
; implementation_defined(string) % $name
; open % '('
; open_ct % '(' without any preceding whitespace
; close % ')'
; open_list % '['
; close_list % ']'
; open_curly % '{'
; close_curly % '}'
; ht_sep % '|'
; comma % ','
; end % '.'
; junk(char) % junk character in the input stream
; error(string) % some other invalid token
; io_error(io.error) % error reading from the input stream
; eof % end-of-file
; integer_dot(int). % the lexer will never return this.
% The integer_dot/1 token is used
% internally in the lexer, to keep
% the grammar LL(1) so that only one
% character of pushback is needed.
% But the lexer will convert
% integer_dot/1 tokens to integer/1
% tokens before returning them.
% For every token, we record the line number of the line on
% which the token occurred.
%
:- type token_context == int. % line number
% This "fat list" representation is more efficient than a list of pairs.
%
:- type token_list
---> token_cons(token, token_context, token_list)
; token_nil.
% Read a list of tokens from the current input stream.
% Keep reading until we encounter either an `end' token
% (i.e. a full stop followed by whitespace) or the end-of-file.
%
:- pred get_token_list(token_list::out, io::di, io::uo) is det.
% The type `offset' represents a (zero-based) offset into a string.
%
:- type offset == int.
% string_get_token_list_max(String, MaxOffset, Tokens,
% InitialPos, FinalPos):
%
% Scan a list of tokens from a string, starting at the current offset
% specified by InitialPos. Keep scanning until either we encounter either
% an `end' token (i.e. a full stop followed by whitespace) or until we
% reach MaxOffset. (MaxOffset must be =< the length of the string.)
% Return the tokens scanned in Tokens, and return the position one
% character past the end of the last token in FinalPos.
%
:- pred string_get_token_list_max(string::in, offset::in, token_list::out,
posn::in, posn::out) is det.
% string_get_token_list(String, Tokens, InitialPos, FinalPos):
%
% calls string_get_token_list_max above with MaxPos = length of String.
%
:- pred string_get_token_list(string::in, token_list::out,
posn::in, posn::out) is det.
% Convert a token to a human-readable string describing the token.
%
:- pred token_to_string(token::in, string::out) is det.
%--------------------------------------------------%
%--------------------------------------------------%