A.8 Regular Expressions
Pattern[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Disjunction[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
Alternative[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Alternative[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
|
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Alternative[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
[empty]
Alternative[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Term[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Term[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
Assertion[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Atom[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Atom[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
Quantifier
Assertion[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
^
$
\b
\B
(?=
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
(?!
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
(?<=
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
(?<!
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
Quantifier ::
QuantifierPrefix
QuantifierPrefix
?
QuantifierPrefix ::
*
+
?
{
DecimalDigits[~Sep]
}
{
DecimalDigits[~Sep]
,}
{
DecimalDigits[~Sep]
,
DecimalDigits[~Sep]
}
Atom[UnicodeMode, UnicodeSetsMode, NamedCaptureGroups] ::
PatternCharacter
.
\
AtomEscape[?UnicodeMode, ?NamedCaptureGroups]
CharacterClass[?UnicodeMode, ?UnicodeSetsMode]
(
GroupSpecifier[?UnicodeMode]opt
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
(?:
Disjunction[?UnicodeMode, ?UnicodeSetsMode, ?NamedCaptureGroups]
)
SyntaxCharacter :: one of ^ $ \ . * + ? ( ) [ ] { } |
PatternCharacter ::
SourceCharacter but not SyntaxCharacter
AtomEscape[UnicodeMode, NamedCaptureGroups] ::
DecimalEscape
CharacterClassEscape[?UnicodeMode]
CharacterEscape[?UnicodeMode]
[+NamedCaptureGroups]
k
GroupName[?UnicodeMode]
CharacterEscape[UnicodeMode] ::
ControlEscape
c
AsciiLetter
0
[lookahead ∉ DecimalDigit]
HexEscapeSequence
RegExpUnicodeEscapeSequence[?UnicodeMode]
IdentityEscape[?UnicodeMode]
ControlEscape :: one of f n r t v
GroupSpecifier[UnicodeMode] ::
?
GroupName[?UnicodeMode]
GroupName[UnicodeMode] ::
<
RegExpIdentifierName[?UnicodeMode]
>
RegExpIdentifierName[UnicodeMode] ::
RegExpIdentifierStart[?UnicodeMode]
RegExpIdentifierName[?UnicodeMode]
RegExpIdentifierPart[?UnicodeMode]
RegExpIdentifierStart[UnicodeMode] ::
IdentifierStartChar
\
RegExpUnicodeEscapeSequence[+UnicodeMode]
[~UnicodeMode]
UnicodeLeadSurrogate
UnicodeTrailSurrogate
RegExpIdentifierPart[UnicodeMode] ::
IdentifierPartChar
\
RegExpUnicodeEscapeSequence[+UnicodeMode]
[~UnicodeMode]
UnicodeLeadSurrogate
UnicodeTrailSurrogate
RegExpUnicodeEscapeSequence[UnicodeMode] :: [+UnicodeMode]
u
HexLeadSurrogate
\u
HexTrailSurrogate
[+UnicodeMode]
u
HexLeadSurrogate
[+UnicodeMode]
u
HexTrailSurrogate
[+UnicodeMode]
u
HexNonSurrogate
[~UnicodeMode]
u
Hex4Digits
[+UnicodeMode]
u{
CodePoint
}
UnicodeLeadSurrogate ::
any Unicode code point in the inclusive interval from U+D800 to U+DBFF
UnicodeTrailSurrogate ::
any Unicode code point in the inclusive interval from U+DC00 to U+DFFF
Each \u
HexTrailSurrogate for which the choice of associated u
HexLeadSurrogate is ambiguous shall be associated with the nearest possible u
HexLeadSurrogate that would otherwise have no corresponding \u
HexTrailSurrogate.
HexLeadSurrogate ::
Hex4Digits
but only if the MV of Hex4Digits is in the inclusive interval from 0xD800 to 0xDBFF
HexTrailSurrogate ::
Hex4Digits
but only if the MV of Hex4Digits is in the inclusive interval from 0xDC00 to 0xDFFF
HexNonSurrogate ::
Hex4Digits
but only if the MV of Hex4Digits is not in the inclusive interval from 0xD800 to 0xDFFF
IdentityEscape[UnicodeMode] :: [+UnicodeMode]
SyntaxCharacter
[+UnicodeMode]
/
[~UnicodeMode]
SourceCharacter but not UnicodeIDContinue
DecimalEscape ::
NonZeroDigit
DecimalDigits[~Sep]opt
[lookahead ∉ DecimalDigit]
CharacterClassEscape[UnicodeMode] ::
d
D
s
S
w
W
[+UnicodeMode]
p{
UnicodePropertyValueExpression
}
[+UnicodeMode]
P{
UnicodePropertyValueExpression
}
UnicodePropertyValueExpression ::
UnicodePropertyName
=
UnicodePropertyValue
LoneUnicodePropertyNameOrValue
UnicodePropertyName ::
UnicodePropertyNameCharacters
UnicodePropertyNameCharacters ::
UnicodePropertyNameCharacter
UnicodePropertyNameCharactersopt
UnicodePropertyValue ::
UnicodePropertyValueCharacters
LoneUnicodePropertyNameOrValue ::
UnicodePropertyValueCharacters
UnicodePropertyValueCharacters ::
UnicodePropertyValueCharacter
UnicodePropertyValueCharactersopt
UnicodePropertyValueCharacter ::
UnicodePropertyNameCharacter
DecimalDigit
UnicodePropertyNameCharacter ::
AsciiLetter
_
CharacterClass[UnicodeMode, UnicodeSetsMode] ::
[
[lookahead ≠ ^]
ClassContents[?UnicodeMode, ?UnicodeSetsMode]
]
[^
ClassContents[?UnicodeMode, ?UnicodeSetsMode]
]
ClassContents[UnicodeMode, UnicodeSetsMode] ::
[empty]
[~UnicodeSetsMode]
NonemptyClassRanges[?UnicodeMode]
[+UnicodeSetsMode]
ClassSetExpression
NonemptyClassRanges[UnicodeMode] ::
ClassAtom[?UnicodeMode]
ClassAtom[?UnicodeMode]
NonemptyClassRangesNoDash[?UnicodeMode]
ClassAtom[?UnicodeMode]
-
ClassAtom[?UnicodeMode]
ClassContents[?UnicodeMode, ~UnicodeSetsMode]
NonemptyClassRangesNoDash[UnicodeMode] ::
ClassAtom[?UnicodeMode]
ClassAtomNoDash[?UnicodeMode]
NonemptyClassRangesNoDash[?UnicodeMode]
ClassAtomNoDash[?UnicodeMode]
-
ClassAtom[?UnicodeMode]
ClassContents[?UnicodeMode, ~UnicodeSetsMode]
ClassAtom[UnicodeMode] ::
-
ClassAtomNoDash[?UnicodeMode]
ClassAtomNoDash[UnicodeMode] ::
SourceCharacter but not one of \ or ] or -
\
ClassEscape[?UnicodeMode]
ClassEscape[UnicodeMode] ::
b
[+UnicodeMode]
-
CharacterClassEscape[?UnicodeMode]
CharacterEscape[?UnicodeMode]
ClassSetExpression ::
ClassUnion
ClassIntersection
ClassSubtraction
ClassUnion ::
ClassSetRange
ClassUnionopt
ClassSetOperand
ClassUnionopt
ClassIntersection ::
ClassSetOperand
&&
[lookahead ≠ &]
ClassSetOperand
ClassIntersection
&&
[lookahead ≠ &]
ClassSetOperand
ClassSubtraction ::
ClassSetOperand
--
ClassSetOperand
ClassSubtraction
--
ClassSetOperand
ClassSetRange ::
ClassSetCharacter
-
ClassSetCharacter
ClassSetOperand ::
NestedClass
ClassStringDisjunction
ClassSetCharacter
NestedClass ::
[
[lookahead ≠ ^]
ClassContents[+UnicodeMode, +UnicodeSetsMode]
]
[^
ClassContents[+UnicodeMode, +UnicodeSetsMode]
]
\
CharacterClassEscape[+UnicodeMode]
ClassStringDisjunction ::
\q{
ClassStringDisjunctionContents
}
ClassStringDisjunctionContents ::
ClassString
ClassString
|
ClassStringDisjunctionContents
ClassString ::
[empty]
NonEmptyClassString
NonEmptyClassString ::
ClassSetCharacter
NonEmptyClassStringopt
ClassSetCharacter ::
[lookahead ∉ ClassSetReservedDoublePunctuator]
SourceCharacter but not ClassSetSyntaxCharacter
\
CharacterEscape[+UnicodeMode]
\
ClassSetReservedPunctuator
\b
ClassSetReservedDoublePunctuator :: one of && !! ## $$ %% ** ++ ,, .. :: ;; << == >> ?? @@ ^^ `` ~~
ClassSetSyntaxCharacter :: one of ( ) [ ] { } / - \ |
ClassSetReservedPunctuator :: one of & - ! # % , : ; < = > @ ` ~