Funkce html_entity_decode od PHP 5 podporuje vícebajtové znakové stránky, tedy i UTF-8. Pokud potřebujeme řetězec v tomto kódování převést v PHP 4, musíme si funkci napsat sami. Pro získání převodní tabulky by se dala použít funkce get_html_translation_table, ta ale vrátí pouze znaky z kódování ISO-8859-1. Pokud máme zájem o všechny znaky, nezbývá nic jiného, než je získat přímo z HTML specifikace:
<?php /** Obdoba funkce chr() pro kódování UTF-8 * @param int pozice znaku * @return string UTF-8 reprezentace kódu */ function chr_utf8($code) { if ($code < 0) { return false; } elseif ($code < 128) { return chr($code); } elseif ($code < 2048) { return chr(192 | ($code >> 6)) . chr(128 | ($code & 63)); } elseif ($code < 65536) { return chr(224 | ($code >> 12)) . chr(128 | (($code >> 6) & 63)) . chr(128 | ($code & 63)); } else { return chr(240 | ($code >> 18)) . chr(128 | (($code >> 12) & 63)) . chr(128 | (($code >> 6) & 63)) . chr(128 | ($code & 63)); } } /** Callback pro náhradu HTML entit za UTF-8 znaky * @param array pole vzniklé z regulárního výrazu '&(#(x?))?([^;]+);' * @return string UTF-8 reprezentace entity, false v případě neznámé entity */ function html_entity_replace($matches) { if ($matches[2]) { return chr_utf8(hexdec($matches[3])); } elseif ($matches[1]) { return chr_utf8($matches[3]); } switch ($matches[3]) { case "nbsp": return chr_utf8(160); case "iexcl": return chr_utf8(161); case "cent": return chr_utf8(162); case "pound": return chr_utf8(163); case "curren": return chr_utf8(164); case "yen": return chr_utf8(165); case "brvbar": return chr_utf8(166); case "sect": return chr_utf8(167); case "uml": return chr_utf8(168); case "copy": return chr_utf8(169); case "ordf": return chr_utf8(170); case "laquo": return chr_utf8(171); case "not": return chr_utf8(172); case "shy": return chr_utf8(173); case "reg": return chr_utf8(174); case "macr": return chr_utf8(175); case "deg": return chr_utf8(176); case "plusmn": return chr_utf8(177); case "sup2": return chr_utf8(178); case "sup3": return chr_utf8(179); case "acute": return chr_utf8(180); case "micro": return chr_utf8(181); case "para": return chr_utf8(182); case "middot": return chr_utf8(183); case "cedil": return chr_utf8(184); case "sup1": return chr_utf8(185); case "ordm": return chr_utf8(186); case "raquo": return chr_utf8(187); case "frac14": return chr_utf8(188); case "frac12": return chr_utf8(189); case "frac34": return chr_utf8(190); case "iquest": return chr_utf8(191); case "Agrave": return chr_utf8(192); case "Aacute": return chr_utf8(193); case "Acirc": return chr_utf8(194); case "Atilde": return chr_utf8(195); case "Auml": return chr_utf8(196); case "Aring": return chr_utf8(197); case "AElig": return chr_utf8(198); case "Ccedil": return chr_utf8(199); case "Egrave": return chr_utf8(200); case "Eacute": return chr_utf8(201); case "Ecirc": return chr_utf8(202); case "Euml": return chr_utf8(203); case "Igrave": return chr_utf8(204); case "Iacute": return chr_utf8(205); case "Icirc": return chr_utf8(206); case "Iuml": return chr_utf8(207); case "ETH": return chr_utf8(208); case "Ntilde": return chr_utf8(209); case "Ograve": return chr_utf8(210); case "Oacute": return chr_utf8(211); case "Ocirc": return chr_utf8(212); case "Otilde": return chr_utf8(213); case "Ouml": return chr_utf8(214); case "times": return chr_utf8(215); case "Oslash": return chr_utf8(216); case "Ugrave": return chr_utf8(217); case "Uacute": return chr_utf8(218); case "Ucirc": return chr_utf8(219); case "Uuml": return chr_utf8(220); case "Yacute": return chr_utf8(221); case "THORN": return chr_utf8(222); case "szlig": return chr_utf8(223); case "agrave": return chr_utf8(224); case "aacute": return chr_utf8(225); case "acirc": return chr_utf8(226); case "atilde": return chr_utf8(227); case "auml": return chr_utf8(228); case "aring": return chr_utf8(229); case "aelig": return chr_utf8(230); case "ccedil": return chr_utf8(231); case "egrave": return chr_utf8(232); case "eacute": return chr_utf8(233); case "ecirc": return chr_utf8(234); case "euml": return chr_utf8(235); case "igrave": return chr_utf8(236); case "iacute": return chr_utf8(237); case "icirc": return chr_utf8(238); case "iuml": return chr_utf8(239); case "eth": return chr_utf8(240); case "ntilde": return chr_utf8(241); case "ograve": return chr_utf8(242); case "oacute": return chr_utf8(243); case "ocirc": return chr_utf8(244); case "otilde": return chr_utf8(245); case "ouml": return chr_utf8(246); case "divide": return chr_utf8(247); case "oslash": return chr_utf8(248); case "ugrave": return chr_utf8(249); case "uacute": return chr_utf8(250); case "ucirc": return chr_utf8(251); case "uuml": return chr_utf8(252); case "yacute": return chr_utf8(253); case "thorn": return chr_utf8(254); case "yuml": return chr_utf8(255); case "fnof": return chr_utf8(402); case "Alpha": return chr_utf8(913); case "Beta": return chr_utf8(914); case "Gamma": return chr_utf8(915); case "Delta": return chr_utf8(916); case "Epsilon": return chr_utf8(917); case "Zeta": return chr_utf8(918); case "Eta": return chr_utf8(919); case "Theta": return chr_utf8(920); case "Iota": return chr_utf8(921); case "Kappa": return chr_utf8(922); case "Lambda": return chr_utf8(923); case "Mu": return chr_utf8(924); case "Nu": return chr_utf8(925); case "Xi": return chr_utf8(926); case "Omicron": return chr_utf8(927); case "Pi": return chr_utf8(928); case "Rho": return chr_utf8(929); case "Sigma": return chr_utf8(931); case "Tau": return chr_utf8(932); case "Upsilon": return chr_utf8(933); case "Phi": return chr_utf8(934); case "Chi": return chr_utf8(935); case "Psi": return chr_utf8(936); case "Omega": return chr_utf8(937); case "alpha": return chr_utf8(945); case "beta": return chr_utf8(946); case "gamma": return chr_utf8(947); case "delta": return chr_utf8(948); case "epsilon": return chr_utf8(949); case "zeta": return chr_utf8(950); case "eta": return chr_utf8(951); case "theta": return chr_utf8(952); case "iota": return chr_utf8(953); case "kappa": return chr_utf8(954); case "lambda": return chr_utf8(955); case "mu": return chr_utf8(956); case "nu": return chr_utf8(957); case "xi": return chr_utf8(958); case "omicron": return chr_utf8(959); case "pi": return chr_utf8(960); case "rho": return chr_utf8(961); case "sigmaf": return chr_utf8(962); case "sigma": return chr_utf8(963); case "tau": return chr_utf8(964); case "upsilon": return chr_utf8(965); case "phi": return chr_utf8(966); case "chi": return chr_utf8(967); case "psi": return chr_utf8(968); case "omega": return chr_utf8(969); case "thetasym": return chr_utf8(977); case "upsih": return chr_utf8(978); case "piv": return chr_utf8(982); case "bull": return chr_utf8(8226); case "hellip": return chr_utf8(8230); case "prime": return chr_utf8(8242); case "Prime": return chr_utf8(8243); case "oline": return chr_utf8(8254); case "frasl": return chr_utf8(8260); case "weierp": return chr_utf8(8472); case "image": return chr_utf8(8465); case "real": return chr_utf8(8476); case "trade": return chr_utf8(8482); case "alefsym": return chr_utf8(8501); case "larr": return chr_utf8(8592); case "uarr": return chr_utf8(8593); case "rarr": return chr_utf8(8594); case "darr": return chr_utf8(8595); case "harr": return chr_utf8(8596); case "crarr": return chr_utf8(8629); case "lArr": return chr_utf8(8656); case "uArr": return chr_utf8(8657); case "rArr": return chr_utf8(8658); case "dArr": return chr_utf8(8659); case "hArr": return chr_utf8(8660); case "forall": return chr_utf8(8704); case "part": return chr_utf8(8706); case "exist": return chr_utf8(8707); case "empty": return chr_utf8(8709); case "nabla": return chr_utf8(8711); case "isin": return chr_utf8(8712); case "notin": return chr_utf8(8713); case "ni": return chr_utf8(8715); case "prod": return chr_utf8(8719); case "sum": return chr_utf8(8721); case "minus": return chr_utf8(8722); case "lowast": return chr_utf8(8727); case "radic": return chr_utf8(8730); case "prop": return chr_utf8(8733); case "infin": return chr_utf8(8734); case "ang": return chr_utf8(8736); case "and": return chr_utf8(8743); case "or": return chr_utf8(8744); case "cap": return chr_utf8(8745); case "cup": return chr_utf8(8746); case "int": return chr_utf8(8747); case "there4": return chr_utf8(8756); case "sim": return chr_utf8(8764); case "cong": return chr_utf8(8773); case "asymp": return chr_utf8(8776); case "ne": return chr_utf8(8800); case "equiv": return chr_utf8(8801); case "le": return chr_utf8(8804); case "ge": return chr_utf8(8805); case "sub": return chr_utf8(8834); case "sup": return chr_utf8(8835); case "nsub": return chr_utf8(8836); case "sube": return chr_utf8(8838); case "supe": return chr_utf8(8839); case "oplus": return chr_utf8(8853); case "otimes": return chr_utf8(8855); case "perp": return chr_utf8(8869); case "sdot": return chr_utf8(8901); case "lceil": return chr_utf8(8968); case "rceil": return chr_utf8(8969); case "lfloor": return chr_utf8(8970); case "rfloor": return chr_utf8(8971); case "lang": return chr_utf8(9001); case "rang": return chr_utf8(9002); case "loz": return chr_utf8(9674); case "spades": return chr_utf8(9824); case "clubs": return chr_utf8(9827); case "hearts": return chr_utf8(9829); case "diams": return chr_utf8(9830); case "quot": return chr_utf8(34); case "amp": return chr_utf8(38); case "lt": return chr_utf8(60); case "gt": return chr_utf8(62); case "OElig": return chr_utf8(338); case "oelig": return chr_utf8(339); case "Scaron": return chr_utf8(352); case "scaron": return chr_utf8(353); case "Yuml": return chr_utf8(376); case "circ": return chr_utf8(710); case "tilde": return chr_utf8(732); case "ensp": return chr_utf8(8194); case "emsp": return chr_utf8(8195); case "thinsp": return chr_utf8(8201); case "zwnj": return chr_utf8(8204); case "zwj": return chr_utf8(8205); case "lrm": return chr_utf8(8206); case "rlm": return chr_utf8(8207); case "ndash": return chr_utf8(8211); case "mdash": return chr_utf8(8212); case "lsquo": return chr_utf8(8216); case "rsquo": return chr_utf8(8217); case "sbquo": return chr_utf8(8218); case "ldquo": return chr_utf8(8220); case "rdquo": return chr_utf8(8221); case "bdquo": return chr_utf8(8222); case "dagger": return chr_utf8(8224); case "Dagger": return chr_utf8(8225); case "permil": return chr_utf8(8240); case "lsaquo": return chr_utf8(8249); case "rsaquo": return chr_utf8(8250); case "euro": return chr_utf8(8364); } return false; } // použití echo preg_replace_callback('~&(#(x?))?([^;]+);~', 'html_entity_replace', "a–b&c–d\n"); ?>
Diskuse je zrušena z důvodu spamu.