Package org.jsoup.parser
Class Tokeniser
- java.lang.Object
-
- org.jsoup.parser.Tokeniser
-
final class Tokeniser extends java.lang.Object
Readers the input stream into tokens.
-
-
Field Summary
Fields Modifier and Type Field Description (package private) Token.Character
charPending
private java.lang.StringBuilder
charsBuilder
private java.lang.String
charsString
private int[]
codepointHolder
(package private) Token.Comment
commentPending
(package private) java.lang.StringBuilder
dataBuffer
(package private) Token.Doctype
doctypePending
private Token
emitPending
(package private) Token.EndTag
endPending
private ParseErrorList
errors
private boolean
isEmitPending
private java.lang.String
lastStartTag
private int[]
multipointHolder
private static char[]
notCharRefCharsSorted
private CharacterReader
reader
(package private) static char
replacementChar
(package private) Token.StartTag
startPending
private TokeniserState
state
(package private) Token.Tag
tagPending
(package private) static int[]
win1252Extensions
(package private) static int
win1252ExtensionsStart
-
Constructor Summary
Constructors Constructor Description Tokeniser(CharacterReader reader, ParseErrorList errors)
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description (package private) void
advanceTransition(TokeniserState state)
(package private) java.lang.String
appropriateEndTagName()
private void
characterReferenceError(java.lang.String message)
(package private) int[]
consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)
(package private) void
createCommentPending()
(package private) void
createDoctypePending()
(package private) Token.Tag
createTagPending(boolean start)
(package private) void
createTempBuffer()
(package private) boolean
currentNodeInHtmlNS()
(package private) void
emit(char c)
(package private) void
emit(char[] chars)
(package private) void
emit(int[] codepoints)
(package private) void
emit(java.lang.String str)
(package private) void
emit(Token token)
(package private) void
emitCommentPending()
(package private) void
emitDoctypePending()
(package private) void
emitTagPending()
(package private) void
eofError(TokeniserState state)
(package private) void
error(java.lang.String errorMsg)
(package private) void
error(TokeniserState state)
(package private) TokeniserState
getState()
(package private) boolean
isAppropriateEndTagToken()
(package private) Token
read()
(package private) void
transition(TokeniserState state)
(package private) java.lang.String
unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.
-
-
-
Field Detail
-
replacementChar
static final char replacementChar
- See Also:
- Constant Field Values
-
notCharRefCharsSorted
private static final char[] notCharRefCharsSorted
-
win1252ExtensionsStart
static final int win1252ExtensionsStart
- See Also:
- Constant Field Values
-
win1252Extensions
static final int[] win1252Extensions
-
reader
private final CharacterReader reader
-
errors
private final ParseErrorList errors
-
state
private TokeniserState state
-
emitPending
private Token emitPending
-
isEmitPending
private boolean isEmitPending
-
charsString
private java.lang.String charsString
-
charsBuilder
private java.lang.StringBuilder charsBuilder
-
dataBuffer
java.lang.StringBuilder dataBuffer
-
tagPending
Token.Tag tagPending
-
startPending
Token.StartTag startPending
-
endPending
Token.EndTag endPending
-
charPending
Token.Character charPending
-
doctypePending
Token.Doctype doctypePending
-
commentPending
Token.Comment commentPending
-
lastStartTag
private java.lang.String lastStartTag
-
codepointHolder
private final int[] codepointHolder
-
multipointHolder
private final int[] multipointHolder
-
-
Constructor Detail
-
Tokeniser
Tokeniser(CharacterReader reader, ParseErrorList errors)
-
-
Method Detail
-
read
Token read()
-
emit
void emit(Token token)
-
emit
void emit(java.lang.String str)
-
emit
void emit(char[] chars)
-
emit
void emit(int[] codepoints)
-
emit
void emit(char c)
-
getState
TokeniserState getState()
-
transition
void transition(TokeniserState state)
-
advanceTransition
void advanceTransition(TokeniserState state)
-
consumeCharacterReference
int[] consumeCharacterReference(java.lang.Character additionalAllowedCharacter, boolean inAttribute)
-
createTagPending
Token.Tag createTagPending(boolean start)
-
emitTagPending
void emitTagPending()
-
createCommentPending
void createCommentPending()
-
emitCommentPending
void emitCommentPending()
-
createDoctypePending
void createDoctypePending()
-
emitDoctypePending
void emitDoctypePending()
-
createTempBuffer
void createTempBuffer()
-
isAppropriateEndTagToken
boolean isAppropriateEndTagToken()
-
appropriateEndTagName
java.lang.String appropriateEndTagName()
-
error
void error(TokeniserState state)
-
eofError
void eofError(TokeniserState state)
-
characterReferenceError
private void characterReferenceError(java.lang.String message)
-
error
void error(java.lang.String errorMsg)
-
currentNodeInHtmlNS
boolean currentNodeInHtmlNS()
-
unescapeEntities
java.lang.String unescapeEntities(boolean inAttribute)
Utility method to consume reader and unescape entities found within.- Parameters:
inAttribute
- if the text to be unescaped is in an attribute- Returns:
- unescaped string from reader
-
-