Source for file unicode_data.php
Documentation is available at unicode_data.php
//============================================================+
// File name : unicode_data.php
// Last Update : 2010-10-18
// Author : Nicola Asuni - Tecnick.com S.r.l - Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com
// License : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
// -------------------------------------------------------------------
// Copyright (C) 2008-2010 Nicola Asuni - Tecnick.com S.r.l.
// This file is part of TCPDF software library.
// TCPDF is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
// TCPDF is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
// See the GNU Lesser General Public License for more details.
// You should have received a copy of the GNU Lesser General Public License
// along with TCPDF. If not, see <http://www.gnu.org/licenses/>.
// See LICENSE.TXT file for more information.
// -------------------------------------------------------------------
// Description : Unicode data for TCPDF library.
//============================================================+
// Efthimios Mavrogeorgiadis
* Unicode data class for TCPDF library.
* @copyright 2008-2010 Nicola Asuni - Tecnick.com S.r.l (www.tecnick.com) Via Della Pace, 11 - 09044 - Quartucciu (CA) - ITALY - www.tecnick.com - info@tecnick.com
* @package com.tecnick.tcpdf
* @link http://www.tcpdf.org
* @license http://www.gnu.org/copyleft/lesser.html LGPL
* @since 2.1.000 (2008-01-08)
* This is a PHP class containing Unicde data for TCPDF library
* @name TCPDF_UNICODE_DATA
* @package com.tecnick.tcpdf
* @author Nicola Asuni - info@tecnick.com
* @link http://www.tcpdf.org
* @license http://www.gnu.org/copyleft/lesser.html LGPL
* @var Unicode code for Left-to-Right Mark
* @var Unicode code for Right-to-Left Mark
* @var Unicode code for Left-to-Right Embedding
* @var Unicode code for Right-to-Left Embedding
* @var Unicode code for Pop Directional Format
* @var Unicode code for Left-to-Right Override
* @var Unicode code for Right-to-Left Override
* @var Pattern to test RTL (Righ-To-Left) strings using regular expressions.
| \xD7[\x80\x83\x86\x90-\xAA\xB0-\xB4] # R
| \xDF[\x80-\xAA\xB4\xB5\xBA] # R
| \xEF\xAC[\x9D\x9F\xA0-\xA8\xAA-\xB6\xB8-\xBC\xBE] # R
| \xEF\xAD[\x80\x81\x83\x84\x86-\x8F] # R
| \xF0\x90\xA0[\x80-\x85\x88\x8A-\xB5\xB7\xB8\xBC\xBF] # R
| \xF0\x90\xA4[\x80-\x99] # R
| \xF0\x90\xA8[\x80\x90-\x93\x95-\x97\x99-\xB3] # R
| \xF0\x90\xA9[\x80-\x87\x90-\x98] # R
| \xE2\x80[\xAB\xAE] # RLE & RLO
* @var Pattern to test Arabic strings using regular expressions.
* Source: http://www.w3.org/International/questions/qa-forms-utf-8
\xD8[\x80-\x83\x8B\x8D\x9B\x9E\x9F\xA1-\xBA] # AL
| \xD9[\x80-\x8A\xAD-\xAF\xB1-\xBF] # AL
| \xDB[\x80-\x95\x9D\xA5\xA6\xAE\xAF\xBA-\xBF] # AL
| \xDC[\x80-\x8D\x90\x92-\xAF] # AL
| \xDE[\x80-\xA5\xB1] # AL
| \xEF\xAD[\x90-\xBF] # AL
| \xEF\xAE[\x80-\xB1] # AL
| \xEF\xAF[\x93-\xBF] # AL
| \xEF[\xB0-\xB3][\x80-\xBF] # AL
| \xEF\xB4[\x80-\xBD] # AL
| \xEF\xB5[\x90-\xBF] # AL
| \xEF\xB6[\x80-\x8F\x92-\xBF] # AL
| \xEF\xB7[\x80-\x87\xB0-\xBC] # AL
| \xEF\xB9[\xB0-\xB4\xB6-\xBF] # AL
| \xEF\xBA[\x80-\xBF] # AL
| \xEF\xBB[\x80-\xBC] # AL
| \xD9[\xA0-\xA9\xAB\xAC] # AN
* @var Array of Unicode types
* @var Mirror unicode characters.
* For information on bidi mirroring, see UAX #9: Bidirectional Algorithm,
* at http://www.unicode.org/unicode/reports/tr9/
* @var Arabic shape substitutions: char code => (isolated, final, initial, medial)
1570=> array(65153, 65154, 65153, 65154),
1571=> array(65155, 65156, 65155, 65156),
1572=> array(65157, 65158),
1573=> array(65159, 65160, 65159, 65160),
1574=> array(65161, 65162, 65163, 65164),
1575=> array(65165, 65166, 65165, 65166),
1576=> array(65167, 65168, 65169, 65170),
1577=> array(65171, 65172),
1578=> array(65173, 65174, 65175, 65176),
1579=> array(65177, 65178, 65179, 65180),
1580=> array(65181, 65182, 65183, 65184),
1581=> array(65185, 65186, 65187, 65188),
1582=> array(65189, 65190, 65191, 65192),
1583=> array(65193, 65194, 65193, 65194),
1584=> array(65195, 65196, 65195, 65196),
1585=> array(65197, 65198, 65197, 65198),
1586=> array(65199, 65200, 65199, 65200),
1587=> array(65201, 65202, 65203, 65204),
1588=> array(65205, 65206, 65207, 65208),
1589=> array(65209, 65210, 65211, 65212),
1590=> array(65213, 65214, 65215, 65216),
1591=> array(65217, 65218, 65219, 65220),
1592=> array(65221, 65222, 65223, 65224),
1593=> array(65225, 65226, 65227, 65228),
1594=> array(65229, 65230, 65231, 65232),
1601=> array(65233, 65234, 65235, 65236),
1602=> array(65237, 65238, 65239, 65240),
1603=> array(65241, 65242, 65243, 65244),
1604=> array(65245, 65246, 65247, 65248),
1605=> array(65249, 65250, 65251, 65252),
1606=> array(65253, 65254, 65255, 65256),
1607=> array(65257, 65258, 65259, 65260),
1608=> array(65261, 65262, 65261, 65262),
1609=> array(65263, 65264, 64488, 64489),
1610=> array(65265, 65266, 65267, 65268),
1649=> array(64336, 64337),
1657=> array(64358, 64359, 64360, 64361),
1658=> array(64350, 64351, 64352, 64353),
1659=> array(64338, 64339, 64340, 64341),
1662=> array(64342, 64343, 64344, 64345),
1663=> array(64354, 64355, 64356, 64357),
1664=> array(64346, 64347, 64348, 64349),
1667=> array(64374, 64375, 64376, 64377),
1668=> array(64370, 64371, 64372, 64373),
1670=> array(64378, 64379, 64380, 64381),
1671=> array(64382, 64383, 64384, 64385),
1672=> array(64392, 64393),
1676=> array(64388, 64389),
1677=> array(64386, 64387),
1678=> array(64390, 64391),
1681=> array(64396, 64397),
1688=> array(64394, 64395, 64394, 64395),
1700=> array(64362, 64363, 64364, 64365),
1702=> array(64366, 64367, 64368, 64369),
1705=> array(64398, 64399, 64400, 64401),
1709=> array(64467, 64468, 64469, 64470),
1711=> array(64402, 64403, 64404, 64405),
1713=> array(64410, 64411, 64412, 64413),
1715=> array(64406, 64407, 64408, 64409),
1722=> array(64414, 64415),
1723=> array(64416, 64417, 64418, 64419),
1726=> array(64426, 64427, 64428, 64429),
1728=> array(64420, 64421),
1729=> array(64422, 64423, 64424, 64425),
1733=> array(64480, 64481),
1734=> array(64473, 64474),
1735=> array(64471, 64472),
1736=> array(64475, 64476),
1737=> array(64482, 64483),
1739=> array(64478, 64479),
1740=> array(64508, 64509, 64510, 64511),
1744=> array(64484, 64485, 64486, 64487),
1746=> array(64430, 64431),
1747=> array(64432, 64433)
* @var Arabic laa letter: char code => isolated, final, initial, medial
1570 => array(65269, 65270, 65269, 65270),
1571 => array(65271, 65272, 65271, 65272),
1573 => array(65273, 65274, 65273, 65274),
1575 => array(65275, 65276, 65275, 65276)
* @var Array of character substitutions for sequences of two diacritics symbols.
* Putting the combining mark and character in the same glyph allows us to avoid the two marks overlapping each other in an illegible manner.
* second NSM char code => substitution char
1612=> 64606, # Shadda + Dammatan
1613=> 64607, # Shadda + Kasratan
1614=> 64608, # Shadda + Fatha
1615=> 64609, # Shadda + Damma
1616=> 64610 # Shadda + Kasra
* @var Array of character substitutions from UTF-8 Unicode to Latin1
8249=> 139, # guilsinglleft
8250=> 155, # guilsinglright
8222=> 132, # quotedblbase
8220=> 147, # quotedblleft
8221=> 148, # quotedblright
8218=> 130, # quotesinglbase
} // --- END OF CLASS ---
//============================================================+
//============================================================+
|