A user account is required in order to edit this wiki, but we've had to disable public user registrations due to spam.
To request an account, ask an autoconfirmed user on Chat (such as one of these permanent autoconfirmed members).
Web Encodings: Difference between revisions
Jump to navigation
Jump to search
(New page: My scratchpad for encoding related notes. == Goals == * Document existing practices for ** Supported encodings ** Supported aliases ** Supported matching algorithm * Converge the various...) |
(→Opera) |
||
Line 23: | Line 23: | ||
!| Decoded As | !| Decoded As | ||
!| Notes | !| Notes | ||
|- | |||
|- | |||
| big5 | |||
| big5, cnbig5, csbig5 | |||
| | |||
| | |||
|- | |||
| big5-hkscs | |||
| big5hkscs | |||
| | |||
| | |||
|- | |||
| euc-jp | |||
| cseucpkdfmtjapanese, eucjp, extendedunixcodepackedformatforjapanese | |||
| | |||
| | |||
|- | |||
| euc-kr | |||
| cseuckr, csksc56011987, euckr, isoir149, korean, ksc5601, ksc56011987, ksc56011989, windows949 | |||
| | |||
| | |||
|- | |||
| euc-tw | |||
| euctw | |||
| | |||
| | |||
|- | |||
| gb18030 | |||
| gb18030 | |||
| | |||
| | |||
|- | |||
| gbk | |||
| chinese, cngb, cp936, csgb2312, csiso58gb231280, euccn, gb2312, gb231280, gbk, isoir58, ms936, windows936 | |||
| | |||
| | |||
|- | |||
| hz-gb-2312 | |||
| hzgb2312 | |||
| | |||
| | |||
|- | |||
| ibm866 | |||
| 866, cp866, csibm866, ibm866 | |||
| | |||
| | |||
|- | |||
| iso-2022-cn | |||
| iso2022cn | |||
| | |||
| | |||
|- | |||
| iso-2022-jp | |||
| csiso2022jp, iso2022jp | |||
| | |||
| | |||
|- | |||
| iso-2022-jp-1 | |||
| iso2022jp1 | |||
| | |||
| | |||
|- | |||
| iso-2022-kr | |||
| csiso2022kr, iso2022kr | |||
| | |||
| | |||
|- | |||
| iso-8859-1 | |||
| cp819, csisolatin1, ibm819, iso88591, iso885911987, isoir100, l1, latin1 | |||
| windows-1252 | |||
| | |||
|- | |||
| iso-8859-2 | |||
| csisolatin2, iso88592, iso885921987, isoir101, l2, latin2 | |||
| | |||
| | |||
|- | |||
| iso-8859-3 | |||
| csisolatin3, iso88593, iso885931988, isoir109, l3, latin3 | |||
| | |||
| | |||
|- | |||
| iso-8859-4 | |||
| csisolatin4, iso88594, iso885941988, isoir110, l4, latin4 | |||
| | |||
| | |||
|- | |||
| iso-8859-5 | |||
| csisolatincyrillic, cyrillic, iso88595, iso885951988, isoir144 | |||
| | |||
| | |||
|- | |||
| iso-8859-6 | |||
| arabic, asmo708, csiso88596e, csisolatinarabic, ecma114, iso88596, iso885961987, iso88596e, isoir127 | |||
| | |||
| | |||
|- | |||
| iso-8859-6-i | |||
| csiso88596i, iso88596i | |||
| | |||
| | |||
|- | |||
| iso-8859-7 | |||
| csisolatingreek, ecma118, elot928, greek, greek8, iso88597, iso885971987, isoir126 | |||
| | |||
| | |||
|- | |||
| iso-8859-8 | |||
| csiso88598e, csisolatinhebrew, hebrew, iso88598, iso885981988, iso88598e, isoir138, visual | |||
| | |||
| | |||
|- | |||
| iso-8859-8-i | |||
| csiso88598i, iso88598i | |||
| | |||
| | |||
|- | |||
| iso-8859-9 | |||
| csisolatin5, iso88599, iso885991989, isoir148, l5, latin5 | |||
| | |||
| | |||
|- | |||
| iso-8859-10 | |||
| csisolatin6, iso885910, iso8859101992, isoir157, l6, latin6 | |||
| | |||
| | |||
|- | |||
| iso-8859-11 | |||
| iso885911, tis620, tis6202533, windows874 | |||
| | |||
| | |||
|- | |||
| iso-8859-13 | |||
| iso885913 | |||
| | |||
| | |||
|- | |||
| iso-8859-14 | |||
| iso885914, iso8859141998, isoceltic, isoir199, l8, latin8 | |||
| | |||
| | |||
|- | |||
| iso-8859-15 | |||
| iso885915, latin9 | |||
| | |||
| | |||
|- | |||
| iso-8859-16 | |||
| iso885916, iso8859162001, isoir226, l10, latin10 | |||
| | |||
| | |||
|- | |||
| koi8-r | |||
| cskoi8r, koi8r | |||
| | |||
| | |||
|- | |||
| koi8-u | |||
| koi8u | |||
| | |||
| | |||
|- | |||
| macintosh | |||
| csmacintosh, mac, macintosh, macroman | |||
| | |||
| Likely disabled. | |||
|- | |||
| shift_jis | |||
| cp932, csshiftjis, cswindows31j, ms932, mskanji, shiftjis, sjis, windows31j | |||
| | |||
| | |||
|- | |||
| tcvn | |||
| tcvn, viettcvn | |||
| | |||
| | |||
|- | |- | ||
| us-ascii | | us-ascii | ||
Line 29: | Line 205: | ||
| | | | ||
|- | |- | ||
| | | utf-16 | ||
| | | csunicode, csunicode11, csunicodeascii, iso10646j1, iso10646ucs2, iso10646ucsbasic, utf16 | ||
| windows- | | | ||
| | |||
|- | |||
| utf-16be | |||
| utf16be | |||
| | |||
| | |||
|- | |||
| utf-16le | |||
| utf16le | |||
| | |||
| | |||
|- | |||
| utf-7 | |||
| csunicode11utf7, utf7 | |||
| | |||
| Disabled | |||
|- | |||
| utf-8 | |||
| utf8 | |||
| | |||
| | |||
|- | |||
| viscii | |||
| csviscii, viscii | |||
| | |||
| | |||
|- | |||
| windows-1250 | |||
| cp1250, microsoftcp1250, windows1250 | |||
| | |||
| | |||
|- | |||
| windows-1251 | |||
| cp1251, microsoftcp1251, windows1251 | |||
| | |||
| | | | ||
|- | |- | ||
Line 39: | Line 250: | ||
| | | | ||
|- | |- | ||
| | | windows-1253 | ||
| cp1253, microsoftcp1253, windows1253 | |||
| | |||
| | |||
|- | |||
| windows-1254 | |||
| cp1254, microsoftcp1254, windows1254 | |||
| | |||
| | |||
|- | |||
| windows-1255 | |||
| cp1255, microsoftcp1255, windows1255 | |||
| | |||
| | |||
|- | |||
| windows-1256 | |||
| cp1256, microsoftcp1256, windows1256 | |||
| | |||
| | |||
|- | |||
| windows-1257 | |||
| cp1257, microsoftcp1257, windows1257 | |||
| | |||
| | |||
|- | |||
| windows-1258 | |||
| cp1258, microsoftcp1258, windows1258 | |||
| | |||
| | |||
|- | |||
| windows-sami-2 | |||
| samiws2, windowssami2, ws2 | |||
| | |||
| | |||
|- | |||
| x-mac-ce | |||
| macce | |||
| | |||
| Likely disabled. | |||
|- | |||
| x-mac-cyrillic | |||
| maccyrillic | |||
| | |||
| Likely disabled. | |||
|- | |||
| x-mac-greek | |||
| macgreek | |||
| | |||
| Likely disabled. | |||
|- | |||
| x-mac-turkish | |||
| macturkish | |||
| | | | ||
| Likely disabled. | |||
|- | |||
| x-vps | |||
| vps | |||
| | | | ||
| | | | ||
|} | |} | ||
=== Firefox === | === Firefox === |
Revision as of 20:08, 19 August 2009
My scratchpad for encoding related notes.
Goals
- Document existing practices for
- Supported encodings
- Supported aliases
- Supported matching algorithm
- Converge the various used algorithms
- Get the new rules implemented
Current Implementations
Does this differ per platform? Opera might differ a bit on Mac.
Opera
Normalization before matching: UTS22
Encoding | Aliases | Decoded As | Notes |
---|---|---|---|
big5 | big5, cnbig5, csbig5 | ||
big5-hkscs | big5hkscs | ||
euc-jp | cseucpkdfmtjapanese, eucjp, extendedunixcodepackedformatforjapanese | ||
euc-kr | cseuckr, csksc56011987, euckr, isoir149, korean, ksc5601, ksc56011987, ksc56011989, windows949 | ||
euc-tw | euctw | ||
gb18030 | gb18030 | ||
gbk | chinese, cngb, cp936, csgb2312, csiso58gb231280, euccn, gb2312, gb231280, gbk, isoir58, ms936, windows936 | ||
hz-gb-2312 | hzgb2312 | ||
ibm866 | 866, cp866, csibm866, ibm866 | ||
iso-2022-cn | iso2022cn | ||
iso-2022-jp | csiso2022jp, iso2022jp | ||
iso-2022-jp-1 | iso2022jp1 | ||
iso-2022-kr | csiso2022kr, iso2022kr | ||
iso-8859-1 | cp819, csisolatin1, ibm819, iso88591, iso885911987, isoir100, l1, latin1 | windows-1252 | |
iso-8859-2 | csisolatin2, iso88592, iso885921987, isoir101, l2, latin2 | ||
iso-8859-3 | csisolatin3, iso88593, iso885931988, isoir109, l3, latin3 | ||
iso-8859-4 | csisolatin4, iso88594, iso885941988, isoir110, l4, latin4 | ||
iso-8859-5 | csisolatincyrillic, cyrillic, iso88595, iso885951988, isoir144 | ||
iso-8859-6 | arabic, asmo708, csiso88596e, csisolatinarabic, ecma114, iso88596, iso885961987, iso88596e, isoir127 | ||
iso-8859-6-i | csiso88596i, iso88596i | ||
iso-8859-7 | csisolatingreek, ecma118, elot928, greek, greek8, iso88597, iso885971987, isoir126 | ||
iso-8859-8 | csiso88598e, csisolatinhebrew, hebrew, iso88598, iso885981988, iso88598e, isoir138, visual | ||
iso-8859-8-i | csiso88598i, iso88598i | ||
iso-8859-9 | csisolatin5, iso88599, iso885991989, isoir148, l5, latin5 | ||
iso-8859-10 | csisolatin6, iso885910, iso8859101992, isoir157, l6, latin6 | ||
iso-8859-11 | iso885911, tis620, tis6202533, windows874 | ||
iso-8859-13 | iso885913 | ||
iso-8859-14 | iso885914, iso8859141998, isoceltic, isoir199, l8, latin8 | ||
iso-8859-15 | iso885915, latin9 | ||
iso-8859-16 | iso885916, iso8859162001, isoir226, l10, latin10 | ||
koi8-r | cskoi8r, koi8r | ||
koi8-u | koi8u | ||
macintosh | csmacintosh, mac, macintosh, macroman | Likely disabled. | |
shift_jis | cp932, csshiftjis, cswindows31j, ms932, mskanji, shiftjis, sjis, windows31j | ||
tcvn | tcvn, viettcvn | ||
us-ascii | ansix341968, ansix341986, ascii, cp367, csascii, csinvariant, csiso646basic1983, ibm367, invariant, iso646basic1983, iso646irv1991, iso646us, isoir6, ref, us, usascii | windows-1252 | |
utf-16 | csunicode, csunicode11, csunicodeascii, iso10646j1, iso10646ucs2, iso10646ucsbasic, utf16 | ||
utf-16be | utf16be | ||
utf-16le | utf16le | ||
utf-7 | csunicode11utf7, utf7 | Disabled | |
utf-8 | utf8 | ||
viscii | csviscii, viscii | ||
windows-1250 | cp1250, microsoftcp1250, windows1250 | ||
windows-1251 | cp1251, microsoftcp1251, windows1251 | ||
windows-1252 | cp1252, microsoftcp1252, windows1252 | ||
windows-1253 | cp1253, microsoftcp1253, windows1253 | ||
windows-1254 | cp1254, microsoftcp1254, windows1254 | ||
windows-1255 | cp1255, microsoftcp1255, windows1255 | ||
windows-1256 | cp1256, microsoftcp1256, windows1256 | ||
windows-1257 | cp1257, microsoftcp1257, windows1257 | ||
windows-1258 | cp1258, microsoftcp1258, windows1258 | ||
windows-sami-2 | samiws2, windowssami2, ws2 | ||
x-mac-ce | macce | Likely disabled. | |
x-mac-cyrillic | maccyrillic | Likely disabled. | |
x-mac-greek | macgreek | Likely disabled. | |
x-mac-turkish | macturkish | Likely disabled. | |
x-vps | vps |
Firefox
FIXME
Chrome
FIXME
Internet Explorer
FIXME