diff --git a/source/lex.tex b/source/lex.tex index 6668556b8e..2c4cc3394c 100644 --- a/source/lex.tex +++ b/source/lex.tex @@ -20,6 +20,8 @@ \indextext{pointer literal|see{literal, pointer}} \indextext{user-defined literal|see{literal, user-defined}} \indextext{file, source|see{source file}} +\indextext{null character|see{character, null}} +\indextext{null wide character|see{wide-character, null}} \rSec1[lex.separate]{Separate translation} @@ -27,7 +29,7 @@ \indextext{conventions!lexical|(}% \indextext{compilation!separate|(}% The text of the program is kept in units called -\indextext{source file}\term{source files} in this International +\defnx{source files}{source file} in this International Standard. A source file together with all the headers~(\ref{headers}) and source files included~(\ref{cpp.include}) via the preprocessing directive \tcode{\#include}, less any source lines skipped by any of the @@ -56,7 +58,6 @@ occur, although in practice different phases might be folded together.} \begin{enumerate} -\indextext{source file}% \indextext{character!source file}% \indextext{character set!basic source}% \item Physical source file characters are mapped, in an @@ -174,8 +175,7 @@ \pnum \indextext{character set|(}% -\indextext{character set!basic source}% -The \term{basic source character set} consists of 96 characters: the space character, +The \defnx{basic source character set}{character set!basic source} consists of 96 characters: the space character, the control characters representing horizontal tab, vertical tab, form feed, and new-line, plus the following 91 graphical characters:\footnote{The glyphs for the members of the basic source character set are intended to @@ -229,17 +229,18 @@ \grammarterm{universal-character-name}.} \pnum -The \term{basic execution character set} and the \term{basic -execution wide-character set} shall each contain all the members of the +The \defnx{basic execution character set}{character set!basic execution} and the +\defnx{basic execution wide-character set}{wide-character set!basic execution} +shall each contain all the members of the basic source character set, plus control characters representing alert, -backspace, and carriage return, plus a \term{null character} -(respectively, \term{null wide character}), whose value is 0. +backspace, and carriage return, plus a \defnx{null character}{character!null} +(respectively, \defnx{null wide character}{wide-character!null}), whose value is 0. For each basic execution character set, the values of the members shall be non-negative and distinct from one another. In both the source and execution basic character sets, the value of each character after \tcode{0} in the above list of decimal digits shall be one greater -than the value of the previous. The \term{execution character set} -and the \term{execution wide-character set} are +than the value of the previous. The \defnx{execution character set}{character set!execution} +and the \defnx{execution wide-character set}{wide-character set!execution} are \impldef{execution character set and execution wide-character set} supersets of the basic execution character set and the basic execution wide-character @@ -930,26 +931,22 @@ \pnum \indextext{literal!\idxcode{unsigned}}% \indextext{literal!\idxcode{long}}% -\indextext{literal!integer}% -\indextext{literal!binary}% -\indextext{literal!octal}% -\indextext{literal!decimal}% -\indextext{literal!hexadecimal}% \indextext{literal!base~of integer}% -An \term{integer literal} is a sequence of digits that has no period +An \defnx{integer literal}{literal!integer} is a sequence of digits that has no period or exponent part, with optional separating single quotes that are ignored when determining its value. An integer literal may have a prefix that specifies its base and a suffix that specifies its type. The lexically first digit of the sequence of digits is the most significant. -A \term{binary} integer literal (base two) begins with +A \defnx{binary integer literal}{literal!binary} (base two) begins with \tcode{0b} or \tcode{0B} and consists of a sequence of binary digits. -An \term{octal} integer -literal (base eight) begins with the digit \tcode{0} and consists of a +An \defnx{octal integer literal}{literal!octal} +(base eight) begins with the digit \tcode{0} and consists of a sequence of octal digits.\footnote{The digits \tcode{8} and \tcode{9} are not octal digits. } -A \term{decimal} -integer literal (base ten) begins with a digit other than \tcode{0} and +A \defnx{decimal integer literal}{literal!decimal} +(base ten) begins with a digit other than \tcode{0} and consists of a sequence of decimal digits. -A \term{hexadecimal} integer literal (base sixteen) begins with +A \defnx{hexadecimal integer literal}{literal!hexadecimal} +(base sixteen) begins with \tcode{0x} or \tcode{0X} and consists of a sequence of hexadecimal digits, which include the decimal digits and the letters \tcode{a} through \tcode{f} and \tcode{A} through \tcode{F} with decimal values @@ -1358,10 +1355,8 @@ The integer and fraction parts both consist of a sequence of decimal (base ten) digits if there is no prefix, or hexadecimal (base sixteen) digits if the prefix is \tcode{0x} or \tcode{0X}. -\indextext{literal!decimal floating}% -The literal is a \term{decimal floating literal} in the former case and -\indextext{literal!hexadecimal floating}% -a \term{hexadecimal floating literal} in the latter case. +The literal is a \defnx{decimal floating literal}{literal!decimal floating} in the former case and +a \defnx{hexadecimal floating literal}{literal!hexadecimal floating} in the latter case. Optional separating single quotes in a \grammarterm{digit-sequence} or \grammarterm{hexadecimal-digit-sequence} are ignored when determining its value. @@ -1558,7 +1553,7 @@ also referred to as narrow string literals. A narrow string literal has type \indextext{literal!string!type~of}% -``array of \term{n} \tcode{const char}'', where \term{n} is the size of +``array of \placeholder{n} \tcode{const char}'', where \placeholder{n} is the size of the string as defined below, and has static storage duration~(\ref{basic.stc}). @@ -1573,7 +1568,7 @@ \indextext{prefix!\idxcode{u}}% such as \tcode{u"asdf"}, is a \tcode{char16_t} string literal. A \tcode{char16_t} string literal has -type ``array of \term{n} \tcode{const char16_t}'', where \term{n} is the +type ``array of \placeholder{n} \tcode{const char16_t}'', where \placeholder{n} is the size of the string as defined below; it is initialized with the given characters. A single \grammarterm{c-char} may produce more than one \tcode{char16_t} character in the form of @@ -1585,7 +1580,7 @@ \indextext{prefix!\idxcode{U}}% such as \tcode{U"asdf"}, is a \tcode{char32_t} string literal. A \tcode{char32_t} string literal has -type ``array of \term{n} \tcode{const char32_t}'', where \term{n} is the +type ``array of \placeholder{n} \tcode{const char32_t}'', where \placeholder{n} is the size of the string as defined below; it is initialized with the given characters. @@ -1598,8 +1593,8 @@ \indextext{\idxcode{wchar_t}}% \indextext{literal!string!wide}% \indextext{prefix!\idxcode{L}}% -A wide string literal has type ``array of \term{n} \tcode{const -wchar_t}'', where \term{n} is the size of the string as defined below; it +A wide string literal has type ``array of \placeholder{n} \tcode{const +wchar_t}'', where \placeholder{n} is the size of the string as defined below; it is initialized with the given characters. \pnum @@ -1654,13 +1649,12 @@ \pnum \indextext{\idxcode{0}|seealso{zero,~null}}% \indextext{\idxcode{0}!string terminator}% -\indextext{\idxcode{0}!null~character}% +\indextext{\idxcode{0}!null~character|see {character, null}}% After any necessary concatenation, in translation phase 7~(\ref{lex.phases}), \tcode{'\textbackslash 0'} is appended to every string literal so that programs that scan a string can find its end. \pnum -\indextext{encoding!multibyte}% Escape sequences and \grammarterm{universal-character-name}{s} in non-raw string literals have the same meaning as in character literals~(\ref{lex.ccon}), except that the single quote \tcode{'} is representable either by itself or by the escape sequence @@ -1670,7 +1664,7 @@ \tcode{char16_t} string literal may yield a surrogate pair. \indextext{string!\idxcode{sizeof}}% In a narrow string literal, a \grammarterm{universal-character-name} may map to more -than one \tcode{char} element due to \term{multibyte encoding}. The +than one \tcode{char} element due to \defnx{multibyte encoding}{encoding!multibyte}. The size of a \tcode{char32_t} or wide string literal is the total number of escape sequences, \grammarterm{universal-character-name}{s}, and other characters, plus one for the terminating \tcode{U'\textbackslash 0'} or @@ -1786,93 +1780,93 @@ \pnum A \grammarterm{user-defined-literal} is treated as a call to a literal operator or literal operator template~(\ref{over.literal}). To determine the form of this call for a -given \grammarterm{user-defined-literal} \term{L} with \grammarterm{ud-suffix} \term{X}, -the \grammarterm{literal-operator-id} whose literal suffix identifier is \term{X} is -looked up in the context of \term{L} using the rules for unqualified name -lookup~(\ref{basic.lookup.unqual}). Let \term{S} be the set of declarations found by -this lookup. \term{S} shall not be empty. +given \grammarterm{user-defined-literal} \placeholder{L} with \grammarterm{ud-suffix} \placeholder{X}, +the \grammarterm{literal-operator-id} whose literal suffix identifier is \placeholder{X} is +looked up in the context of \placeholder{L} using the rules for unqualified name +lookup~(\ref{basic.lookup.unqual}). Let \placeholder{S} be the set of declarations found by +this lookup. \placeholder{S} shall not be empty. \pnum -If \term{L} is a \grammarterm{user-defined-integer-literal}, let \term{n} be the literal -without its \grammarterm{ud-suffix}. If \term{S} contains a literal operator with -parameter type \tcode{unsigned long long}, the literal \term{L} is treated as a call of +If \placeholder{L} is a \grammarterm{user-defined-integer-literal}, let \placeholder{n} be the literal +without its \grammarterm{ud-suffix}. If \placeholder{S} contains a literal operator with +parameter type \tcode{unsigned long long}, the literal \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@\term{n}@ULL) +operator "" @\placeholder{X}@(@\placeholder{n}@ULL) \end{codeblock} -Otherwise, \term{S} shall contain a raw literal operator or a literal operator -template~(\ref{over.literal}) but not both. If \term{S} contains a raw literal operator, -the literal \term{L} is treated as a call of the form +Otherwise, \placeholder{S} shall contain a raw literal operator or a literal operator +template~(\ref{over.literal}) but not both. If \placeholder{S} contains a raw literal operator, +the literal \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@"\term{n}{"}@) +operator "" @\placeholder{X}@(@"\placeholder{n}{"}@) \end{codeblock} -Otherwise (\term{S} contains a literal operator template), \term{L} is treated as a call +Otherwise (\placeholder{S} contains a literal operator template), \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>() +operator "" @\placeholder{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>() \end{codeblock} -where \term{n} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence +where \placeholder{n} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence $c_1c_2...c_k$ can only contain characters from the basic source character set. \end{note} \pnum -If \term{L} is a \grammarterm{user-defined-floating-literal}, let \term{f} be the -literal without its \grammarterm{ud-suffix}. If \term{S} contains a literal operator -with parameter type \tcode{long double}, the literal \term{L} is treated as a call of +If \placeholder{L} is a \grammarterm{user-defined-floating-literal}, let \placeholder{f} be the +literal without its \grammarterm{ud-suffix}. If \placeholder{S} contains a literal operator +with parameter type \tcode{long double}, the literal \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@\term{f}@L) +operator "" @\placeholder{X}@(@\placeholder{f}@L) \end{codeblock} -Otherwise, \term{S} shall contain a raw literal operator or a literal operator -template~(\ref{over.literal}) but not both. If \term{S} contains a raw literal operator, -the \term{literal} \term{L} is treated as a call of the form +Otherwise, \placeholder{S} shall contain a raw literal operator or a literal operator +template~(\ref{over.literal}) but not both. If \placeholder{S} contains a raw literal operator, +the \grammarterm{literal} \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@"\term{f}{"}@) +operator "" @\placeholder{X}@(@"\placeholder{f}{"}@) \end{codeblock} -Otherwise (\term{S} contains a literal operator template), \term{L} is treated as a call +Otherwise (\placeholder{S} contains a literal operator template), \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>() +operator "" @\placeholder{X}@<'@$c_1$@', '@$c_2$@', ... '@$c_k$@'>() \end{codeblock} -where \term{f} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence +where \placeholder{f} is the source character sequence $c_1c_2...c_k$. \begin{note} The sequence $c_1c_2...c_k$ can only contain characters from the basic source character set. \end{note} \pnum -If \term{L} is a \grammarterm{user-defined-string-literal}, let \term{str} be the -literal without its \grammarterm{ud-suffix} and let \term{len} be +If \placeholder{L} is a \grammarterm{user-defined-string-literal}, let \placeholder{str} be the +literal without its \grammarterm{ud-suffix} and let \placeholder{len} be the number of -code units in \term{str} (i.e., its length excluding the terminating +code units in \placeholder{str} (i.e., its length excluding the terminating null character). - The literal \term{L} is treated as a call of the form + The literal \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@\term{str}{}@, @\term{len}{}@) +operator "" @\placeholder{X}@(@\placeholder{str}{}@, @\placeholder{len}{}@) \end{codeblock} \pnum -If \term{L} is a \grammarterm{user-defined-character-literal}, let \term{ch} be the +If \placeholder{L} is a \grammarterm{user-defined-character-literal}, let \placeholder{ch} be the literal without its \grammarterm{ud-suffix}. -\term{S} shall contain a literal operator~(\ref{over.literal}) whose only parameter has -the type of \term{ch} and the -literal \term{L} is treated as a call +\placeholder{S} shall contain a literal operator~(\ref{over.literal}) whose only parameter has +the type of \placeholder{ch} and the +literal \placeholder{L} is treated as a call of the form \begin{codeblock} -operator "" @\term{X}@(@\term{ch}{}@) +operator "" @\placeholder{X}@(@\placeholder{ch}{}@) \end{codeblock} \pnum