ChrEnc.mjs 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. /**
  2. * Character encoding resources.
  3. *
  4. * @author n1474335 [n1474335@gmail.com]
  5. * @copyright Crown Copyright 2016
  6. * @license Apache-2.0
  7. */
  8. /**
  9. * Character encoding format mappings.
  10. */
  11. export const IO_FORMAT = {
  12. "UTF-8 (65001)": 65001,
  13. "UTF-7 (65000)": 65000,
  14. "UTF-16LE (1200)": 1200,
  15. "UTF-16BE (1201)": 1201,
  16. "UTF-32LE (12000)": 12000,
  17. "UTF-32BE (12001)": 12001,
  18. "IBM EBCDIC International (500)": 500,
  19. "IBM EBCDIC US-Canada (37)": 37,
  20. "IBM EBCDIC Multilingual/ROECE (Latin 2) (870)": 870,
  21. "IBM EBCDIC Greek Modern (875)": 875,
  22. "IBM EBCDIC French (1010)": 1010,
  23. "IBM EBCDIC Turkish (Latin 5) (1026)": 1026,
  24. "IBM EBCDIC Latin 1/Open System (1047)": 1047,
  25. "IBM EBCDIC Lao (1132/1133/1341)": 1132,
  26. "IBM EBCDIC US-Canada (037 + Euro symbol) (1140)": 1140,
  27. "IBM EBCDIC Germany (20273 + Euro symbol) (1141)": 1141,
  28. "IBM EBCDIC Denmark-Norway (20277 + Euro symbol) (1142)": 1142,
  29. "IBM EBCDIC Finland-Sweden (20278 + Euro symbol) (1143)": 1143,
  30. "IBM EBCDIC Italy (20280 + Euro symbol) (1144)": 1144,
  31. "IBM EBCDIC Latin America-Spain (20284 + Euro symbol) (1145)": 1145,
  32. "IBM EBCDIC United Kingdom (20285 + Euro symbol) (1146)": 1146,
  33. "IBM EBCDIC France (20297 + Euro symbol) (1147)": 1147,
  34. "IBM EBCDIC International (500 + Euro symbol) (1148)": 1148,
  35. "IBM EBCDIC Icelandic (20871 + Euro symbol) (1149)": 1149,
  36. "IBM EBCDIC Germany (20273)": 20273,
  37. "IBM EBCDIC Denmark-Norway (20277)": 20277,
  38. "IBM EBCDIC Finland-Sweden (20278)": 20278,
  39. "IBM EBCDIC Italy (20280)": 20280,
  40. "IBM EBCDIC Latin America-Spain (20284)": 20284,
  41. "IBM EBCDIC United Kingdom (20285)": 20285,
  42. "IBM EBCDIC Japanese Katakana Extended (20290)": 20290,
  43. "IBM EBCDIC France (20297)": 20297,
  44. "IBM EBCDIC Arabic (20420)": 20420,
  45. "IBM EBCDIC Greek (20423)": 20423,
  46. "IBM EBCDIC Hebrew (20424)": 20424,
  47. "IBM EBCDIC Korean Extended (20833)": 20833,
  48. "IBM EBCDIC Thai (20838)": 20838,
  49. "IBM EBCDIC Icelandic (20871)": 20871,
  50. "IBM EBCDIC Cyrillic Russian (20880)": 20880,
  51. "IBM EBCDIC Turkish (20905)": 20905,
  52. "IBM EBCDIC Latin 1/Open System (1047 + Euro symbol) (20924)": 20924,
  53. "IBM EBCDIC Cyrillic Serbian-Bulgarian (21025)": 21025,
  54. "OEM United States (437)": 437,
  55. "OEM Greek (formerly 437G); Greek (DOS) (737)": 737,
  56. "OEM Baltic; Baltic (DOS) (775)": 775,
  57. "OEM Russian; Cyrillic + Euro symbol (808)": 808,
  58. "OEM Multilingual Latin 1; Western European (DOS) (850)": 850,
  59. "OEM Latin 2; Central European (DOS) (852)": 852,
  60. "OEM Cyrillic (primarily Russian) (855)": 855,
  61. "OEM Turkish; Turkish (DOS) (857)": 857,
  62. "OEM Multilingual Latin 1 + Euro symbol (858)": 858,
  63. "OEM Portuguese; Portuguese (DOS) (860)": 860,
  64. "OEM Icelandic; Icelandic (DOS) (861)": 861,
  65. "OEM Hebrew; Hebrew (DOS) (862)": 862,
  66. "OEM French Canadian; French Canadian (DOS) (863)": 863,
  67. "OEM Arabic; Arabic (864) (864)": 864,
  68. "OEM Nordic; Nordic (DOS) (865)": 865,
  69. "OEM Russian; Cyrillic (DOS) (866)": 866,
  70. "OEM Modern Greek; Greek, Modern (DOS) (869)": 869,
  71. "OEM Cyrillic (primarily Russian) + Euro Symbol (872)": 872,
  72. "Windows-874 Thai (874)": 874,
  73. "Windows-1250 Central European (1250)": 1250,
  74. "Windows-1251 Cyrillic (1251)": 1251,
  75. "Windows-1252 Latin (1252)": 1252,
  76. "Windows-1253 Greek (1253)": 1253,
  77. "Windows-1254 Turkish (1254)": 1254,
  78. "Windows-1255 Hebrew (1255)": 1255,
  79. "Windows-1256 Arabic (1256)": 1256,
  80. "Windows-1257 Baltic (1257)": 1257,
  81. "Windows-1258 Vietnam (1258)": 1258,
  82. "ISO-8859-1 Latin 1 Western European (28591)": 28591,
  83. "ISO-8859-2 Latin 2 Central European (28592)": 28592,
  84. "ISO-8859-3 Latin 3 South European (28593)": 28593,
  85. "ISO-8859-4 Latin 4 North European (28594)": 28594,
  86. "ISO-8859-5 Latin/Cyrillic (28595)": 28595,
  87. "ISO-8859-6 Latin/Arabic (28596)": 28596,
  88. "ISO-8859-7 Latin/Greek (28597)": 28597,
  89. "ISO-8859-8 Latin/Hebrew (28598)": 28598,
  90. "ISO 8859-8 Hebrew (ISO-Logical) (38598)": 38598,
  91. "ISO-8859-9 Latin 5 Turkish (28599)": 28599,
  92. "ISO-8859-10 Latin 6 Nordic (28600)": 28600,
  93. "ISO-8859-11 Latin/Thai (28601)": 28601,
  94. "ISO-8859-13 Latin 7 Baltic Rim (28603)": 28603,
  95. "ISO-8859-14 Latin 8 Celtic (28604)": 28604,
  96. "ISO-8859-15 Latin 9 (28605)": 28605,
  97. "ISO-8859-16 Latin 10 (28606)": 28606,
  98. "ISO 2022 JIS Japanese with no halfwidth Katakana (50220)": 50220,
  99. "ISO 2022 JIS Japanese with halfwidth Katakana (50221)": 50221,
  100. "ISO 2022 Japanese JIS X 0201-1989 (1 byte Kana-SO/SI) (50222)": 50222,
  101. "ISO 2022 Korean (50225)": 50225,
  102. "ISO 2022 Simplified Chinese (50227)": 50227,
  103. "ISO 6937 Non-Spacing Accent (20269)": 20269,
  104. "EUC Japanese (51932)": 51932,
  105. "EUC Simplified Chinese (51936)": 51936,
  106. "EUC Korean (51949)": 51949,
  107. "ISCII Devanagari (57002)": 57002,
  108. "ISCII Bengali (57003)": 57003,
  109. "ISCII Tamil (57004)": 57004,
  110. "ISCII Telugu (57005)": 57005,
  111. "ISCII Assamese (57006)": 57006,
  112. "ISCII Oriya (57007)": 57007,
  113. "ISCII Kannada (57008)": 57008,
  114. "ISCII Malayalam (57009)": 57009,
  115. "ISCII Gujarati (57010)": 57010,
  116. "ISCII Punjabi (57011)": 57011,
  117. "Japanese Shift-JIS (932)": 932,
  118. "Simplified Chinese GBK (936)": 936,
  119. "Korean (949)": 949,
  120. "Traditional Chinese Big5 (950)": 950,
  121. "US-ASCII (7-bit) (20127)": 20127,
  122. "Simplified Chinese GB2312 (20936)": 20936,
  123. "KOI8-R Russian Cyrillic (20866)": 20866,
  124. "KOI8-U Ukrainian Cyrillic (21866)": 21866,
  125. "Mazovia (Polish) MS-DOS (620)": 620,
  126. "Arabic (ASMO 708) (708)": 708,
  127. "Arabic (Transparent ASMO); Arabic (DOS) (720)": 720,
  128. "Kamenický (Czech) MS-DOS (895)": 895,
  129. "Korean (Johab) (1361)": 1361,
  130. "MAC Roman (10000)": 10000,
  131. "Japanese (Mac) (10001)": 10001,
  132. "MAC Traditional Chinese (Big5) (10002)": 10002,
  133. "Korean (Mac) (10003)": 10003,
  134. "Arabic (Mac) (10004)": 10004,
  135. "Hebrew (Mac) (10005)": 10005,
  136. "Greek (Mac) (10006)": 10006,
  137. "Cyrillic (Mac) (10007)": 10007,
  138. "MAC Simplified Chinese (GB 2312) (10008)": 10008,
  139. "Romanian (Mac) (10010)": 10010,
  140. "Ukrainian (Mac) (10017)": 10017,
  141. "Thai (Mac) (10021)": 10021,
  142. "MAC Latin 2 (Central European) (10029)": 10029,
  143. "Icelandic (Mac) (10079)": 10079,
  144. "Turkish (Mac) (10081)": 10081,
  145. "Croatian (Mac) (10082)": 10082,
  146. "CNS Taiwan (Chinese Traditional) (20000)": 20000,
  147. "TCA Taiwan (20001)": 20001,
  148. "ETEN Taiwan (Chinese Traditional) (20002)": 20002,
  149. "IBM5550 Taiwan (20003)": 20003,
  150. "TeleText Taiwan (20004)": 20004,
  151. "Wang Taiwan (20005)": 20005,
  152. "Western European IA5 (IRV International Alphabet 5) (20105)": 20105,
  153. "IA5 German (7-bit) (20106)": 20106,
  154. "IA5 Swedish (7-bit) (20107)": 20107,
  155. "IA5 Norwegian (7-bit) (20108)": 20108,
  156. "T.61 (20261)": 20261,
  157. "Japanese (JIS 0208-1990 and 0212-1990) (20932)": 20932,
  158. "Korean Wansung (20949)": 20949,
  159. "Extended/Ext Alpha Lowercase (21027)": 21027,
  160. "Europa 3 (29001)": 29001,
  161. "Atari ST/TT (47451)": 47451,
  162. "HZ-GB2312 Simplified Chinese (52936)": 52936,
  163. "Simplified Chinese GB18030 (54936)": 54936,
  164. };
  165. /**
  166. * Unicode Normalisation Forms
  167. *
  168. * @author Matthieu [m@tthieu.xyz]
  169. * @copyright Crown Copyright 2016
  170. * @license Apache-2.0
  171. */
  172. /**
  173. * Character encoding format mappings.
  174. */
  175. export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];