struct Hyoki::Document

Defined in:

hyoki.cr

Constant Summary

ASCII_WORD_REGEX = /\A[[:ascii:]]+\z/
LINE_REGEX = /([^\r\n]*?)(\r\n|\r|\n)|(.+)/
TSV_ESCAPE = {"\n" => "\\n", "\t" => "\\t", "\r" => "\\r", "\\" => "\\\\"}
TSV_ESCAPE_REGEX = Regex.new(TSV_ESCAPE.keys.map do |k| "(?:#{Regex.escape(k)})" end.join("|"))
TSV_HEADER_HETERONYMS = ["surface", "source", "line", "character", "yomi", "surface", "excerpt"].join("\t")
TSV_HEADER_VARIANTS = ["lexical form yomi", "source", "line", "character", "lexical form", "surface", "excerpt"].join("\t")

Constructors

Instance Method Summary

Constructor Detail

def self.new(source_ios : Array(IO), mecab_dict_dir = nil) #

[View source]
def self.new(string : String, mecab_dict_dir = nil) #

[View source]

Instance Method Detail

def excerpt(morpheme, context_length, highlight = nil) #

[View source]
def heteronyms(lines, sort_order, include_ascii) : ReportItems #

Returns an associative list of surface expression to heteronyms: words with same spelling and different pronunciation.


[View source]
def items_to_markdown(items, excerpt_context_length, highlight, &) #

[View source]
def items_to_text(items, excerpt_context_length, highlight, &) #

[View source]
def items_to_tsv(items, excerpt_context_length, highlight, header, &) #

[View source]
def lines : Array(Hyoki::Document::Line) #

[View source]
def markup_as_markdown_inline_code(string) #

[View source]
def report(type = ReportType::Variants, format = ReportFormat::Text, excerpt_context_length = 5, sort_order = SortOrder::Alphabetical, highlight = false, header = nil, include_ascii = true) #

[View source]
def report_heteronyms(format, excerpt_context_length, sort_order, highlight, header, include_ascii) #

[View source]
def report_variants(format, excerpt_context_length, sort_order, highlight, header, include_ascii) #

[View source]
def variants(lines, yomi_parser, sort_order, include_ascii) : ReportItems #

Returns an associative list of yomi (of dictionary form) to variants: words with same pronunciation and different spelling.


[View source]