Next: , Previous: lazy, Up: Top


37 lexer

     %--------------------------------------------------%
     % vim: ft=mercury ts=4 sw=4 et
     %--------------------------------------------------%
     % Copyright (C) 1993-2000, 2003-2008, 2011-2012 The University of Melbourne.
     % This file may only be copied under the terms of the GNU Library General
     % Public License - see the file COPYING.LIB in the Mercury distribution.
     %--------------------------------------------------%
     %
     % File: lexer.m.
     % Main author: fjh.
     % Stability: high.
     %
     % Lexical analysis.  This module defines the representation of tokens
     % and exports predicates for reading in tokens from an input stream.
     %
     % See ISO Prolog 6.4.  Also see the comments at the top of parser.m.
     %
     %--------------------------------------------------%
     %--------------------------------------------------%
     
     :- module lexer.
     :- interface.
     
     :- import_module char.
     :- import_module io.
     
     %--------------------------------------------------%
     
     :- type token
         --->    name(string)
         ;       variable(string)
         ;       integer(int)
         ;       big_integer(string) % does not fit in int
         ;       float(float)
         ;       string(string)      % "...."
         ;       implementation_defined(string) % $name
         ;       open                % '('
         ;       open_ct             % '(' without any preceding whitespace
         ;       close               % ')'
         ;       open_list           % '['
         ;       close_list          % ']'
         ;       open_curly          % '{'
         ;       close_curly         % '}'
         ;       ht_sep              % '|'
         ;       comma               % ','
         ;       end                 % '.'
         ;       junk(char)          % junk character in the input stream
         ;       error(string)       % some other invalid token
         ;       io_error(io.error) % error reading from the input stream
         ;       eof                 % end-of-file
         ;       integer_dot(int).   % the lexer will never return this.
                                     % The integer_dot/1 token is used
                                     % internally in the lexer, to keep
                                     % the grammar LL(1) so that only one
                                     % character of pushback is needed.
                                     % But the lexer will convert
                                     % integer_dot/1 tokens to integer/1
                                     % tokens before returning them.
     
         % For every token, we record the line number of the line on
         % which the token occurred.
         %
     :- type token_context == int.   % line number
     
         % This "fat list" representation is more efficient than a list of pairs.
         %
     :- type token_list
         --->    token_cons(token, token_context, token_list)
         ;       token_nil.
     
         % Read a list of tokens from the current input stream.
         % Keep reading until we encounter either an `end' token
         % (i.e. a full stop followed by whitespace) or the end-of-file.
         %
     :- pred get_token_list(token_list::out, io::di, io::uo) is det.
     
         % The type `offset' represents a (zero-based) offset into a string.
         %
     :- type offset == int.
     
         % string_get_token_list_max(String, MaxOffset, Tokens,
         %   InitialPos, FinalPos):
         %
         % Scan a list of tokens from a string, starting at the current offset
         % specified by InitialPos. Keep scanning until either we encounter either
         % an `end' token (i.e. a full stop followed by whitespace) or until we
         % reach MaxOffset. (MaxOffset must be =< the length of the string.)
         % Return the tokens scanned in Tokens, and return the position one
         % character past the end of the last token in FinalPos.
         %
     :- pred string_get_token_list_max(string::in, offset::in, token_list::out,
         posn::in, posn::out) is det.
     
         % string_get_token_list(String, Tokens, InitialPos, FinalPos):
         %
         % calls string_get_token_list_max above with MaxPos = length of String.
         %
     :- pred string_get_token_list(string::in, token_list::out,
         posn::in, posn::out) is det.
     
         % Convert a token to a human-readable string describing the token.
         %
     :- pred token_to_string(token::in, string::out) is det.
     
     %--------------------------------------------------%
     %--------------------------------------------------%