431 lines
10 KiB
PHP
431 lines
10 KiB
PHP
<?php
|
||
/**
|
||
* word2uni
|
||
* This code is a part of aCAPTCHA project, This copyright notice MUST stay intact for use
|
||
* @package aCAPTCHA
|
||
* @author Abd Allatif Eymsh, Albaraa Hassan
|
||
* @copyright (c) 2021
|
||
* @param string $text
|
||
* @license http://opensource.org/licenses/gpl-license.php GNU General Public License v2
|
||
*/
|
||
|
||
function text2uni($text)
|
||
{
|
||
if (preg_match("/(^(?=.*[a-zA-Z])(?=.*[a-zA-Z]?)[ a-zA-Z]+$)/", $text)){
|
||
$out = $text;
|
||
} else {
|
||
$arr = explode(' ', $text);
|
||
$last = array();
|
||
foreach ($arr as &$word) {
|
||
if (preg_match("/(^(?=.*[\x{0600}-\x{06ff}])(?=.*[\x{0600}-\x{06ff}]?)[\x{0600}-\x{06ff}]+$)/u", $word)) {
|
||
$last[] = word2uni($word);
|
||
} else {
|
||
$last[] = $word;
|
||
}
|
||
|
||
}
|
||
$out = implode(' ', array_reverse($last));
|
||
}
|
||
return $out;
|
||
}
|
||
|
||
|
||
function word2uni($word)
|
||
{
|
||
|
||
if (strlen($word) <= 2) {
|
||
return $word;
|
||
}
|
||
$new_word = array();
|
||
$char_type = array();
|
||
$isolated_chars = array('ا', 'د', 'ذ', 'أ', 'آ', 'ر', 'ؤ', 'ء', 'ز', 'و', 'ى', 'ة');
|
||
|
||
$all_chars = array
|
||
(
|
||
'ا' => array(
|
||
'middle' => 'ﺎ',
|
||
'end' => 'ﺎ',
|
||
'isolated' => 'ا'
|
||
),
|
||
|
||
'ؤ' => array(
|
||
|
||
'middle' => 'ﺅ',
|
||
'end' => 'ﺅ',
|
||
'isolated' => 'ؤ'
|
||
),
|
||
'ء' => array(
|
||
'middle' => 'ﺀ',
|
||
'end' => 'ﺀ',
|
||
'isolated' => 'ء'
|
||
),
|
||
'أ' => array(
|
||
|
||
'middle' => 'ﺄ',
|
||
'end' => 'ﺄ',
|
||
'isolated' => 'أ'
|
||
),
|
||
'آ' => array(
|
||
'middle' => 'ﺂ',
|
||
'end' => 'ﺂ',
|
||
'isolated' => 'آ'
|
||
),
|
||
'ى' => array(
|
||
'middle' => 'ﻰ',
|
||
'end' => 'ﻰ',
|
||
'isolated' => 'ى'
|
||
),
|
||
'ب' => array(
|
||
'beginning' => 'ﺑ',
|
||
'middle' => 'ﺒ',
|
||
'end' => 'ﺐ',
|
||
'isolated' => 'ب'
|
||
),
|
||
'ت' => array(
|
||
'beginning' => 'ﺗ',
|
||
'middle' => 'ﺘ',
|
||
'end' => 'ﺖ',
|
||
'isolated' => 'ت'
|
||
),
|
||
'ث' => array(
|
||
'beginning' => 'ﺛ',
|
||
'middle' => 'ﺜ',
|
||
'end' => 'ﺚ',
|
||
'isolated' => 'ث'
|
||
),
|
||
'ج' => array(
|
||
'beginning' => 'ﺟ',
|
||
'middle' => 'ﺠ',
|
||
'end' => 'ﺞ',
|
||
'isolated' => 'ج'
|
||
),
|
||
'ح' => array(
|
||
'beginning' => 'ﺣ',
|
||
'middle' => 'ﺤ',
|
||
'end' => 'ﺢ',
|
||
'isolated' => 'ح'
|
||
),
|
||
'خ' => array(
|
||
'beginning' => 'ﺧ',
|
||
'middle' => 'ﺨ',
|
||
'end' => 'ﺦ',
|
||
'isolated' => 'خ'
|
||
),
|
||
'د' => array(
|
||
'middle' => 'ﺪ',
|
||
'end' => 'ﺪ',
|
||
'isolated' => 'د'
|
||
),
|
||
'ذ' => array(
|
||
'middle' => 'ﺬ',
|
||
'end' => 'ﺬ',
|
||
'isolated' => 'ذ'
|
||
),
|
||
'ر' => array(
|
||
'middle' => 'ﺮ',
|
||
'end' => 'ﺮ',
|
||
'isolated' => 'ر'
|
||
),
|
||
'ز' => array(
|
||
'middle' => 'ﺰ',
|
||
'end' => 'ﺰ',
|
||
'isolated' => 'ز'
|
||
),
|
||
'س' => array(
|
||
'beginning' => 'ﺳ',
|
||
'middle' => 'ﺴ',
|
||
'end' => 'ﺲ',
|
||
'isolated' => 'س'
|
||
),
|
||
'ش' => array(
|
||
'beginning' => 'ﺷ',
|
||
'middle' => 'ﺸ',
|
||
'end' => 'ﺶ',
|
||
'isolated' => 'ش'
|
||
),
|
||
'ص' => array(
|
||
'beginning' => 'ﺻ',
|
||
'middle' => 'ﺼ',
|
||
'end' => 'ﺺ',
|
||
'isolated' => 'ص'
|
||
),
|
||
'ض' => array(
|
||
'beginning' => 'ﺿ',
|
||
'middle' => 'ﻀ',
|
||
'end' => 'ﺾ',
|
||
'isolated' => 'ض'
|
||
),
|
||
'ط' => array(
|
||
'beginning' => 'ﻃ',
|
||
'middle' => 'ﻄ',
|
||
'end' => 'ﻂ',
|
||
'isolated' => 'ط'
|
||
),
|
||
'ظ' => array(
|
||
'beginning' => 'ﻇ',
|
||
'middle' => 'ﻈ',
|
||
'end' => 'ﻆ',
|
||
'isolated' => 'ظ'
|
||
),
|
||
'ع' => array(
|
||
'beginning' => 'ﻋ',
|
||
'middle' => 'ﻌ',
|
||
'end' => 'ﻊ',
|
||
'isolated' => 'ع'
|
||
),
|
||
'غ' => array(
|
||
'beginning' => 'ﻏ',
|
||
'middle' => 'ﻐ',
|
||
'end' => 'ﻎ',
|
||
'isolated' => 'غ'
|
||
),
|
||
'ف' => array(
|
||
'beginning' => 'ﻓ',
|
||
'middle' => 'ﻔ',
|
||
'end' => 'ﻒ',
|
||
'isolated' => 'ف'
|
||
),
|
||
'ق' => array(
|
||
'beginning' => 'ﻗ',
|
||
'middle' => 'ﻘ',
|
||
'end' => 'ﻖ',
|
||
'isolated' => 'ق'
|
||
),
|
||
'ك' => array(
|
||
'beginning' => 'ﻛ',
|
||
'middle' => 'ﻜ',
|
||
'end' => 'ﻚ',
|
||
'isolated' => 'ك'
|
||
),
|
||
'ل' => array(
|
||
'beginning' => 'ﻟ',
|
||
'middle' => 'ﻠ',
|
||
'end' => 'ﻞ',
|
||
'isolated' => 'ل'
|
||
),
|
||
'م' => array(
|
||
'beginning' => 'ﻣ',
|
||
'middle' => 'ﻤ',
|
||
'end' => 'ﻢ',
|
||
'isolated' => 'م'
|
||
),
|
||
'ن' => array(
|
||
'beginning' => 'ﻧ',
|
||
'middle' => 'ﻨ',
|
||
'end' => 'ﻦ',
|
||
'isolated' => 'ن'
|
||
),
|
||
'ه' => array(
|
||
'beginning' => 'ﻫ',
|
||
'middle' => 'ﻬ',
|
||
'end' => 'ﻪ',
|
||
'isolated' => 'ه'
|
||
),
|
||
'و' => array(
|
||
'middle' => 'ﻮ',
|
||
'end' => 'ﻮ',
|
||
'isolated' => 'و'
|
||
),
|
||
'ي' => array(
|
||
'beginning' => 'ﻳ',
|
||
'middle' => 'ﻴ',
|
||
'end' => 'ﻲ',
|
||
'isolated' => 'ي'
|
||
),
|
||
'ئ' => array(
|
||
'beginning' => 'ﺋ',
|
||
'middle' => 'ﺌ',
|
||
'end' => 'ﺊ',
|
||
'isolated' => 'ئ'
|
||
),
|
||
'ة' => array(
|
||
'middle' => 'ﺔ',
|
||
'end' => 'ﺔ',
|
||
'isolated' => 'ة'
|
||
)
|
||
);
|
||
|
||
if(in_array($word[0].$word[1], $isolated_chars))
|
||
{
|
||
$new_word[] = $word[0].$word[1];
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[0].$word[1]]['beginning'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
|
||
if(strlen($word) > 4)
|
||
{
|
||
if($char_type[0] == 'not_normal')
|
||
|
||
{
|
||
if(in_array($word[2].$word[3], $isolated_chars))
|
||
{
|
||
$new_word[] = $word[2].$word[3];
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[2].$word[3]]['beginning'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[2].$word[3]]['middle'];
|
||
$chars_statue[] = 'middle';
|
||
|
||
if(in_array($word[2].$word[3], $isolated_chars))
|
||
{
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$char_type[] = 'normal';
|
||
}
|
||
}
|
||
$x = 4;
|
||
}
|
||
else
|
||
{
|
||
if (strlen($word) == 4) {
|
||
$new_word = [];
|
||
if($word[0].$word[1] == 'ل' and $word[2].$word[3] == 'ا') {
|
||
$new_word[] = 'ﻻ';
|
||
} else {
|
||
if(in_array($word[0].$word[1], $isolated_chars)) {
|
||
$new_word[] = $all_chars[$word[0].$word[1]]['isolated'];
|
||
$new_word[] = $all_chars[$word[2].$word[3]]['isolated'];
|
||
} else {
|
||
if($word[2].$word[3] == 'ء') {
|
||
$new_word[] = $all_chars[$word[0].$word[1]]['isolated'];
|
||
$new_word[] = 'ء';
|
||
} else {
|
||
$new_word[] = $all_chars[$word[0].$word[1]]['beginning'];
|
||
$new_word[] = $all_chars[$word[2].$word[3]]['end'];
|
||
}
|
||
}
|
||
|
||
}
|
||
|
||
return implode('',array_reverse($new_word));
|
||
}
|
||
$x = 2;
|
||
}
|
||
|
||
for($x=4;$x< (strlen($word)-4) ;$x++)
|
||
{
|
||
if($char_type[count($char_type)-1] == 'not_normal' AND $x %2 == 0)
|
||
{
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
$new_word[] = $word[$x].$word[$x+1];
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['beginning'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
}
|
||
elseif($char_type[count($char_type)-1] == 'normal' AND $x %2 == 0)
|
||
{
|
||
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
}
|
||
|
||
}
|
||
if(strlen($word)>6)
|
||
{
|
||
if($char_type[count($char_type)-1] == 'not_normal')
|
||
{
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
$new_word[] = $word[$x].$word[$x+1];
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
|
||
if($word[strlen($word)-2].$word[strlen($word)-1] == 'ء')
|
||
{
|
||
$new_word[] = $word[$x].$word[$x+1];
|
||
$char_type[] = 'normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['beginning'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
|
||
}
|
||
|
||
$x += 2;
|
||
}
|
||
elseif($char_type[count($char_type)-1] == 'normal')
|
||
{
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
if($word[$x-2].$word[$x-1] == 'ل' and $word[$x].$word[$x+1] == 'ا') {
|
||
$new_word[count($new_word) - 1] = 'ﻼ';
|
||
} else {
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
|
||
}
|
||
$char_type[] = 'not_normal';
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
|
||
$char_type[] = 'normal';
|
||
}
|
||
|
||
$x += 2;
|
||
}
|
||
|
||
|
||
}
|
||
|
||
if($char_type[count($char_type)-1] == 'not_normal')
|
||
{
|
||
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
$new_word[] = $word[$x].$word[$x+1];
|
||
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $word[$x].$word[$x+1];
|
||
}
|
||
|
||
}
|
||
else
|
||
{
|
||
if(in_array($word[$x].$word[$x+1], $isolated_chars))
|
||
{
|
||
if($word[$x-2].$word[$x-1] == 'ل' and $word[$x].$word[$x+1] == 'ا') {
|
||
$new_word[count($new_word) - 1] = 'ﻼ';
|
||
} else {
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['middle'];
|
||
}
|
||
}
|
||
else
|
||
{
|
||
$new_word[] = $all_chars[$word[$x].$word[$x+1]]['end'];
|
||
}
|
||
}
|
||
|
||
return implode('',array_reverse($new_word));
|
||
}
|
||
?>
|