module Pangu

Overview

Paranoid text spacing for good readability, to automatically insert whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).

require "pangu"

Pangu.spacing("當你凝視著bug,bug也凝視著你")
=> "當你凝視著 bug,bug 也凝視著你"

Pangu.spacing("path/to/file.txt")
=> "與 PM 戰鬥的人,應當小心自己不要成為 PM"

Defined in:

pangu.cr

Constant Summary

ANS_CJK = /([A-Za-zΑ-Ωα-ω0-9`~\$%\^&\*\-=\+\\\|\/!;:,\.\?\x{00a1}-\x{00ff}\x{2022}\x{2026}\x{2027}\x{2150}-\x{218f}])([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
ANS_OPERATOR_CJK = /([A-Za-zΑ-Ωα-ω0-9])([\+\-\*\/=&\\|<>])([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
BRACKET_CJK = /([\)\]\}>\x{201d}<])([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
CJK_ANS = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])([A-Za-zΑ-Ωα-ω0-9`\$%\^&\*\-=\+\\\|\/@\x{00a1}-\x{00ff}\x{2022}\x{2027}\x{2150}-\x{218f}])/i
CJK_BRACKET = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])([\(\[\{<\x{201c}>])/i
CJK_BRACKET_CJK = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])([\(\[\{<\x{201c}]+(.*?)[\)\]\}>\x{201d}]+)([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
CJK_HASH = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])(#(\S+))/i
CJK_OPERATOR_ANS = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])([\+\-\*\/=&\\|<>])([A-Za-zΑ-Ωα-ω0-9])/i
CJK_QUOTE = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])(["\'])/i
FIX_BRACKET = /([\(\[\{<\x{201c}]+)(\s*)(.+?)(\s*)([\)\]\}>\x{201d}]+)/i
FIX_QUOTE = /(["\'\(\[\{<\x{201c}]+)(\s*)(.+?)(\s*)(["\'\)\]\}>\x{201d}]+)/i
FIX_SINGLE_QUOTE = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])( )(\')([A-Za-zΑ-Ωα-ω])/i
FIX_SYMBOL = /([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])([~!;:,\.\?\x{2026}])([A-Za-zΑ-Ωα-ω0-9])/i
HASH_CJK = /((\S+)#)([\x{2e80}-\x{2eff}\x{2f00}-\x{2fdf}\x{3040}-\x{309f}\x{30a0}-\x{30ff}\x{3100}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
QUOTE_CJK = /(["\'])([\x{3040}-\x{312f}\x{3200}-\x{32ff}\x{3400}-\x{4dbf}\x{4e00}-\x{9fff}\x{f900}-\x{faff}])/i
VERSION = "0.1.0"

Class Method Summary

Class Method Detail

def self.spacing(text_or_path) #

[View source]
def self.spacing_file(path) #

[View source]
def self.spacing_text(text : String) #

[View source]