class Lexbor::Tokenizer

Defined in:

lexbor/tokenizer.cr

Constant Summary

CALLBACK = ->(tkz : Lexbor::Lib::HtmlTokenizerT, token : Lexbor::Lib::HtmlTokenT, ctx : ::Pointer(Void)) do tag_id = token.value.tag_id if ctx.null? else state = ctx.as(Lexbor::Tokenizer::State) open_flag = (token.value.type_ & CLOSE_FLAG).to_i == 0 if tag_id == Lexbor::Lib::TagIdT::LXB_TAG_SVG state.svg_namespace = open_flag end if open_flag if state.svg_namespace if tag_id != Lexbor::Lib::TagIdT::LXB_TAG__TEXT Lexbor::Lib.html_tokenizer_set_state_by_tag(tkz, false, tag_id, Lexbor::Lib::NsIdT::LXB_NS_SVG) end else Lexbor::Lib.html_tokenizer_set_state_by_tag(tkz, false, tag_id, Lexbor::Lib::NsIdT::LXB_NS_HTML) end end state.on_token(Token.new(state, token.value)) end token end
CALLBACK_WO_WHITESPACE_TOKENS = ->(tkz : Lexbor::Lib::HtmlTokenizerT, token : Lexbor::Lib::HtmlTokenT, ctx : ::Pointer(Void)) do tag_id = token.value.tag_id if tag_id == Lexbor::Lib::TagIdT::LXB_TAG__TEXT begin_ = token.value.begin_ slice = Slice.new(begin_, token.value.end_ - begin_) whitespaced = slice.all? do |__arg1| __arg1.unsafe_chr.ascii_whitespace? end if whitespaced return token end end if ctx.null? else state = ctx.as(Lexbor::Tokenizer::State) open_flag = (token.value.type_ & CLOSE_FLAG).to_i == 0 if tag_id == Lexbor::Lib::TagIdT::LXB_TAG_SVG state.svg_namespace = open_flag end if open_flag if state.svg_namespace if tag_id != Lexbor::Lib::TagIdT::LXB_TAG__TEXT Lexbor::Lib.html_tokenizer_set_state_by_tag(tkz, false, tag_id, Lexbor::Lib::NsIdT::LXB_NS_SVG) end else Lexbor::Lib.html_tokenizer_set_state_by_tag(tkz, false, tag_id, Lexbor::Lib::NsIdT::LXB_NS_HTML) end end state.on_token(Token.new(state, token.value)) end token end
CLOSE_FLAG = Lexbor::Lib::HtmlTokenTypeT::LXB_HTML_TOKEN_TYPE_CLOSE | Lexbor::Lib::HtmlTokenTypeT::LXB_HTML_TOKEN_TYPE_CLOSE

Constructors

Instance Method Summary

Constructor Detail

def self.new(state, skip_whitespace_tokens : Bool = false) #

[View source]

Instance Method Detail

def finalize #

[View source]
def free #

[View source]
def parse(state, str : String) #

[View source]
def parse(state, slice : Slice) #

[View source]
def tags : Lexbor::Lib::HashT #

[View source]
def tkz : Lexbor::Lib::HtmlTokenizerT #

[View source]