Next: , Previous: , Up: Top   [Contents]


15 char

%--------------------------------------------------%
% vim: ft=mercury ts=4 sw=4 et wm=0 tw=0
%--------------------------------------------------%
% Copyright (C) 1994-2008, 2011 The University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%--------------------------------------------------%
%
% File: char.m.
% Main author: fjh.
% Stability: high.
%
% This module defines some predicates that manipulate characters.
%
% The set of characters which are supported and the mapping from
% characters to integer values are both implementation-dependent.
%
% Originally we used `character' rather than `char' for the type name
% because `char' was used by NU-Prolog to mean something different.
% But now we use `char' and the use of `character' is discouraged.
%
%--------------------------------------------------%
%--------------------------------------------------%

:- module char.
:- interface.

:- import_module enum.
:- import_module list.
:- import_module pretty_printer.

%--------------------------------------------------%

    % A Unicode code point.
    %
:- type char == character.

:- instance enum(character).

    % `char.to_int'/1 and `char.to_int(in, out)' convert a character to its
    % corresponding numerical code (integer value).
    %
    % `char.to_int(out, in)' converts an integer value to a character value.
    % It fails for integer values outside of the Unicode range.
    %
    % Be aware that there is no guarantee that characters can be written to
    % files or to the standard output or standard error streams. Files using an
    % 8-bit national character set would only be able to represent a subset of
    % all possible code points. Currently, the Mercury standard library can
    % only read and write UTF-8 text files, so the entire range is supported
    % (excluding surrogate and noncharacter code points).
    %
    % Note that '\0' is not accepted as a Mercury null character literal.
    % Instead, a null character can be created using `char.det_from_int(0)'.
    % Null characters are not allowed in Mercury strings in C grades.
    %
:- func char.to_int(char) = int.
:- pred char.to_int(char, int).
:- mode char.to_int(in, out) is det.
:- mode char.to_int(in, in) is semidet.    % implied
:- mode char.to_int(out, in) is semidet.

    % Converts an integer to its corresponding character, if any.
    % A more expressive name for the reverse mode of char.to_int.
    %
:- pred char.from_int(int::in, char::out) is semidet.

    % Converts an integer to its corresponding character. Aborts
    % if there isn't one.
    %
:- pred char.det_from_int(int::in, char::out) is det.
:- func char.det_from_int(int) = char.

    % Returns the maximum numerical character code.
    %
:- func char.max_char_value = int.
:- pred char.max_char_value(int::out) is det.

    % Returns the minimum numerical character code.
    %
:- func char.min_char_value = int.
:- pred char.min_char_value(int::out) is det.

    % Convert a character to uppercase.
    % Note that this only converts letters (a-z) in the ASCII range.
    %
:- func char.to_upper(char) = char.
:- pred char.to_upper(char::in, char::out) is det.

    % Convert a character to lowercase.
    % Note that this only converts letters (A-Z) in the ASCII range.
    %
:- func char.to_lower(char) = char.
:- pred char.to_lower(char::in, char::out) is det.

    % char.lower_upper(Lower, Upper) is true iff
    % Lower is a lowercase letter (a-z) and Upper is the corresponding
    % uppercase letter (A-Z) in the ASCII range.
    %
:- pred char.lower_upper(char, char).
:- mode char.lower_upper(in, out) is semidet.
:- mode char.lower_upper(out, in) is semidet.

    % True iff the character is a whitespace character in the ASCII range,
    % i.e. a space, tab, newline, carriage return, form-feed, or vertical tab.
    %
:- pred char.is_whitespace(char::in) is semidet.

    % True iff the character is an uppercase letter (A-Z) in the ASCII range.
    %
:- pred char.is_upper(char::in) is semidet.

    % True iff the character is a lowercase letter (a-z) in the ASCII range.
    %
:- pred char.is_lower(char::in) is semidet.

    % True iff the character is a letter (A-Z, a-z) in the ASCII range.
    %
:- pred char.is_alpha(char::in) is semidet.

    % True iff the character is a letter (A-Z, a-z) or digit (0-9)
    % in the ASCII range.
    %
:- pred char.is_alnum(char::in) is semidet.

    % True iff the character is a letter (A-Z, a-z) or an underscore (_)
    % in the ASCII range.
    %
:- pred char.is_alpha_or_underscore(char::in) is semidet.

    % True iff the character is a letter (A-Z, a-z), a digit (0-9) or an
    % underscore (_) in the ASCII range.
    %
:- pred char.is_alnum_or_underscore(char::in) is semidet.

    % True iff the character is a decimal digit (0-9) in the ASCII range.
    %
:- pred char.is_digit(char::in) is semidet.

    % True iff the character is a binary digit (0 or 1) in the ASCII range.
    %
:- pred char.is_binary_digit(char::in) is semidet.

    % True iff the character is a octal digit (0-7) in the ASCII range.
    %
:- pred char.is_octal_digit(char::in) is semidet.

    % True iff the character is a hexadecimal digit (0-9, a-f, A-F)
    % in the ASCII range.
    %
:- pred char.is_hex_digit(char::in) is semidet.

:- pred char.is_hex_digit(char, int).
:- mode char.is_hex_digit(in, out) is semidet.

    % Convert an integer 0-15 to a hexadecimal digit (0-9, A-F)
    % in the ASCII range.
    %
:- pred char.int_to_hex_char(int, char).
:- mode char.int_to_hex_char(in, out) is semidet.

    % Succeeds if char is a decimal digit (0-9) or letter (a-z or A-Z).
    % Returns the character's value as a digit (0-9 or 10-35).
    %
:- pred char.digit_to_int(char::in, int::out) is semidet.

    % char.int_to_uppercase_digit(Int, DigitChar):
    %
    % True iff `Int' is an integer in the range 0-35 and
    % `DigitChar' is a decimal digit or uppercase letter
    % whose value as a digit is `Int'.
    %
:- pred char.int_to_digit(int, char).
:- mode char.int_to_digit(in, out) is semidet.
:- mode char.int_to_digit(out, in) is semidet.

    % Returns a decimal digit or uppercase letter corresponding to the value.
    % Calls error/1 if the integer is not in the range 0-35.
    %
:- func char.det_int_to_digit(int) = char.
:- pred char.det_int_to_digit(int::in, char::out) is det.

    % Convert a char to a pretty_printer.doc for formatting.
    %
:- func char.char_to_doc(char) = pretty_printer.doc.

    % Encode a Unicode code point in UTF-8.
    % Fails for surrogate code points.
    %
:- pred char.to_utf8(char::in, list(int)::out) is semidet.

    % Encode a Unicode code point in UTF-16 (native endianness).
    % Fails for surrogate code points.
    %
:- pred char.to_utf16(char::in, list(int)::out) is semidet.

    % Succeed if `Char' is a Unicode surrogate code point.
    % In UTF-16, a code point with a scalar value greater than 0xffff
    % is encoded with a pair of surrogate code points.
    %
:- pred char.is_surrogate(char::in) is semidet.

    % Succeed if `Char' is a Noncharacter code point.
    % Sixty-six code points are not used to encode characters.
    % These code points should not be used for interchange, but may be used
    % internally.
    %
:- pred char.is_noncharacter(char::in) is semidet.

%--------------------------------------------------%
%--------------------------------------------------%


Next: , Previous: , Up: Top   [Contents]