module Lexbor::Utils::DetectEncoding

Defined in:

lexbor/utils/detect_encoding.cr

Constant Summary

ADDITIONAL_ENCODING_ALIASES_LIST = begin h = {} of String => LibEncoding::EncodingT {% for name in ["windows", "win", "cp", "windows-cp", "windos", "window"] of ::String %} {% for suffix in ["_", "-", " ", "=", ""] %} {% for i in [1250, 1251, 1252, 1254, 1255, 1256, 1257, 1258] %} h["{{ name.id }}{{ suffix.id }}{{ i }}"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_{{ i }} {% end %} {% end %} {% end %} h["unicode"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["utf"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["uft-8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["utf_8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["uft8"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8 h["ansi"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_1252 h["koi8u"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_KOI8_U h["koi8r"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_KOI8_R h["cp-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["ibm-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["dos-866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["dos866"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_IBM866 h["maccyrillic"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_X_MAC_CYRILLIC h["iso-88591"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_WINDOWS_1252 {% for i in ( 2..8) %} {% for suffix in ["_", "-", " ", "=", ""] %} {% for suffix2 in ["_", "-", ""] %} h["iso{{ suffix.id }}8859{{ suffix2.id }}{{ i }}"] = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_ISO_8859_{{ i }} {% end %} {% end %} {% end %} h.rehash h end
META_CHECK_LIMIT_BYTES = 8 * 1024

Class Method Summary

Class Method Detail

def self.assoc_encoding(name : String) : LibEncoding::EncodingT | Nil #

[View source]
def self.assoc_encoding_with_additionals(name : String) : LibEncoding::EncodingT | Nil #

[View source]
def self.detect_bom(slice : Slice) : BomEncoding | Nil #

[View source]
def self.find_encodings_in_meta(slice : Slice) : Array(String) #

[View source]
def self.find_encodings_in_meta_raw(slice : Slice, &) #

[View source]
def self.find_in_header_value(slice : Slice) : String | Nil #

find_in_header_value("text/html; charset=Windows-1251") => "Windows-1251"


[View source]
def self.html_detect_encoding_and_convert(content : String, content_type : String | Nil = nil, default : String | LibEncoding::EncodingT | Nil = nil, from : String | LibEncoding::EncodingT | Nil = nil, to : String | LibEncoding::EncodingT = Lexbor::LibEncoding::EncodingT::LXB_ENCODING_UTF_8, replace : String | Nil = "") #

Helper method: To detect encoding from bom, header content-type, and meta tag and convert html page for usage see: detect_encoding_spec.cr


[View source]