module Lexbor::Utils::DetectEncoding
Defined in:
lexbor/utils/detect_encoding.crConstant Summary
-
ADDITIONAL_ENCODING_ALIASES_LIST =
begin h = {} of String => LibEncoding::EncodingT {% for name in ["windows", "win", "cp", "windows-cp", "windos", "window"] of ::String %} {% for suffix in ["_", "-", " ", "=", ""] %} {% for i in [1250, 1251, 1252, 1254, 1255, 1256, 1257, 1258] %} h["{{ name.id }}{{ suffix.id }}{{ i }}"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_{{ i }} {% end %} {% end %} {% end %} h["unicode"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["utf"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["uft-8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["utf_8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["uft8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["ansi"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_1252 h["koi8u"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_KOI8_U h["koi8r"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_KOI8_R h["cp-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["ibm-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["dos-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["dos866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["maccyrillic"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_X_MAC_CYRILLIC h["iso-88591"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_1252 {% for i in ( 2..8) %} {% for suffix in ["_", "-", " ", "=", ""] %} {% for suffix2 in ["_", "-", ""] %} h["iso{{ suffix.id }}8859{{ suffix2.id }}{{ i }}"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_ISO_8859_{{ i }} {% end %} {% end %} {% end %} h.rehash h end
-
META_CHECK_LIMIT_BYTES =
8 * 1024
Class Method Summary
- .assoc_encoding(name : String) : LibEncoding::EncodingT | Nil
- .assoc_encoding_with_additionals(name : String) : LibEncoding::EncodingT | Nil
- .detect_bom(slice : Slice) : BomEncoding | Nil
- .find_encodings_in_meta(slice : Slice) : Array(String)
- .find_encodings_in_meta_raw(slice : Slice, &)
-
.find_in_header_value(slice : Slice) : String | Nil
find_in_header_value("text/html; charset=Windows-1251") => "Windows-1251"
-
.html_detect_encoding_and_convert(content : String, content_type : String | Nil = nil, default : String | LibEncoding::EncodingT | Nil = nil, from : String | LibEncoding::EncodingT | Nil = nil, to : String | LibEncoding::EncodingT = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8, replace : String | Nil = "")
Helper method: To detect encoding from bom, header content-type, and meta tag and convert html page for usage see: detect_encoding_spec.cr
Class Method Detail
def self.assoc_encoding_with_additionals(name : String) : LibEncoding::EncodingT | Nil
#
def self.find_in_header_value(slice : Slice) : String | Nil
#
find_in_header_value("text/html; charset=Windows-1251") => "Windows-1251"
def self.html_detect_encoding_and_convert(content : String, content_type : String | Nil = nil, default : String | LibEncoding::EncodingT | Nil = nil, from : String | LibEncoding::EncodingT | Nil = nil, to : String | LibEncoding::EncodingT = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8, replace : String | Nil = "")
#
Helper method: To detect encoding from bom, header content-type, and meta tag and convert html page for usage see: detect_encoding_spec.cr