peterkahl/charset-from-string

This package is abandoned and no longer maintained. No replacement package was suggested.

Identifies predominant script (character set, language) in a string.

v3.1 2020-11-09 16:13 UTC

This package is auto-updated.

Last update: 2021-01-02 17:57:03 UTC


README

Downloads Download per Month License If this project has business value for you then don't hesitate to support me with a small donation.

Identifies predominant script (character set, language) in a string. This library is capable of identifying:

Arabic
Armenian
Bengali
Burmese
CJK
Cyrillic
Devanagari
Georgian
Greek
Gujarati
Hebrew
Japanese
Khmer
Korean
Lao
Latin
Malayalam
Sinhala
Tamil
Thai
Tibetan

Usage

use peterkahl\CharsetFromString\CharsetFromString;

echo CharsetFromString::getCharset('قراءة الكتب أو المجلات')."\n"; # ARABIC
echo CharsetFromString::getCharset('לקרוא ספרים או כתבי-עת')."\n"; # HEBREW
echo CharsetFromString::getCharset('Кто боится чёрной работы, тот нам не нужен.')."\n"; # CYRILLIC
echo CharsetFromString::getCharset('Lex iniusta non est lex.')."\n"; # LATIN
echo CharsetFromString::getCharset('어디든 가치가 있는 곳으로 가려면 지름길은 없다.')."\n"; # KOREAN
echo CharsetFromString::getCharset('二兎を追う者は一兎をも得ず。')."\n"; # JAPANESE
echo CharsetFromString::getCharset('中共下台。香港獨立。')."\n"; # CJK
echo CharsetFromString::getCharset('เพื่อนกินหาง่าย เพื่อนตายหายาก')."\n"; # THAI
echo CharsetFromString::getCharset('ຜູ້ທີ່ໄລ່ສອງກະຕ່າຍຈະບໍ່ໄດ້ຮັບກະຕ່າຍໂຕ ໜຶ່ງ')."\n"; # LAO
echo CharsetFromString::getCharset('អ្នកដែលដេញទន្សាយពីរក្បាលមិនទទួលបានទន្សាយមួយទេ')."\n"; # KHMER
echo CharsetFromString::getCharset('ཁྱེད་རང་བོད་སྐད་ཤེས་ཀྱི་ཡོད་པས།')."\n"; # TIBETAN
echo CharsetFromString::getCharset('Ο καλός άνθρωπος δεν παθαίνει τίποτα κακό, ούτε ζωντανός ούτε και νεκρός.')."\n"; # GREEK