diff --git a/BUILD.gn b/BUILD.gn index e4ba69032b1afd2bdb7d5e02320a2d97d7763452..c41223a8780b4ffce90cdd4632c29c2851cbf305 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -110,6 +110,10 @@ ohos_shared_library("libpcre2") { "updater", ] license_file = "$PCRE2_LIB_DIR/LICENCE" - part_name = "selinux" - subsystem_name = "security" + innerapi_tags = [ + "platformsdk_indirect", + "chipsetsdk_indirect", + ] + part_name = "pcre2" + subsystem_name = "thirdparty" } diff --git a/README.OpenSource b/README.OpenSource index a0aa847d3a236aa40131e35f6267c6399d7b4058..cb1360c42d17a9be21f868927745b96027f99aeb 100644 --- a/README.OpenSource +++ b/README.OpenSource @@ -3,7 +3,7 @@ "Name": "PCRE2", "License": "PCRE2 LICENCE", "License File": "LICENSE", - "Version Number": "pcre2-10.39", + "Version Number": "pcre2-10.40", "Owner": "jiangxiaofeng8@huawei.com", "Upstream URL": "https://github.com/PhilipHazel/pcre2.git", "Description": "pcre2 is a re_working of the original PCRE1 library to provide an entirely new API." diff --git a/bundle.json b/bundle.json index 0ee0b30f5d0d54940601e410ed0ebf85e8df82ff..c935fa8d3124eb7e55175d4b245d00fddf3385aa 100644 --- a/bundle.json +++ b/bundle.json @@ -11,13 +11,13 @@ "scripts": {}, "licensePath": "pcre2/LICENSE", "component": { - "name": "thirdparty_pcre2", - "subsystem": "", + "name": "pcre2", + "subsystem": "thirdparty", "syscap": [], "features": [], - "adapted_system_type": [], - "rom": "", - "ram": "", + "adapted_system_type": [ "standard" ], + "rom": "512KB", + "ram": "512KB", "deps": { "components": [], "third_party": [] diff --git a/pcre2/.gitignore b/pcre2/.gitignore index e104501d760fb10e44e4fdc588462e3c5277a105..3e3284eeee3c0e6f266f7108f373fb9c7d5aa37e 100644 --- a/pcre2/.gitignore +++ b/pcre2/.gitignore @@ -7,6 +7,7 @@ *.o *~ +__pycache__ .deps .libs diff --git a/pcre2/132html b/pcre2/132html old mode 100755 new mode 100644 diff --git a/pcre2/AUTHORS b/pcre2/AUTHORS index bec8a1e5adce4e605b9da70410ef09f57d054017..11ef898b2501732692fa2c76ad14e01215de0ce5 100644 --- a/pcre2/AUTHORS +++ b/pcre2/AUTHORS @@ -8,7 +8,7 @@ Email domain: gmail.com Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved @@ -19,7 +19,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -30,7 +30,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Emain domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved. #### diff --git a/pcre2/CMakeLists.txt b/pcre2/CMakeLists.txt index 8010497f569a22c57caa151fb18fd4c3a0898d53..7febf337d7fb9e79231d3b6825532dc540cf752f 100644 --- a/pcre2/CMakeLists.txt +++ b/pcre2/CMakeLists.txt @@ -110,6 +110,11 @@ CMAKE_MINIMUM_REQUIRED(VERSION 3.0.0) # GET_TARGET_PROPERTY. This should no longer be required. # CMAKE_POLICY(SET CMP0026 OLD) +# With a recent cmake, you can provide a rootdir to look for non +# standard installed library dependencies, but to do so, the policy +# needs to be set to new (by uncommenting the following) +# CMAKE_POLICY(SET CMP0074 NEW) + # For FindReadline.cmake. This was changed to allow setting CMAKE_MODULE_PATH # on the command line. # SET(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -142,10 +147,16 @@ CHECK_INCLUDE_FILE(windows.h HAVE_WINDOWS_H) CHECK_SYMBOL_EXISTS(bcopy "strings.h" HAVE_BCOPY) CHECK_SYMBOL_EXISTS(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE) CHECK_SYMBOL_EXISTS(memmove "string.h" HAVE_MEMMOVE) -CHECK_SYMBOL_EXISTS(realpath "stdlib.h" HAVE_REALPATH) CHECK_SYMBOL_EXISTS(secure_getenv "stdlib.h" HAVE_SECURE_GETENV) CHECK_SYMBOL_EXISTS(strerror "string.h" HAVE_STRERROR) +CHECK_C_SOURCE_COMPILES( + "#include + #include + int main(int c, char *v[]) { char buf[PATH_MAX]; realpath(v[1], buf); return 0; }" + HAVE_REALPATH +) + set(ORIG_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS}) set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror") CHECK_C_SOURCE_COMPILES( @@ -300,9 +311,19 @@ ENDIF(PCRE2_SUPPORT_LIBZ) IF(EDITLINE_FOUND) OPTION (PCRE2_SUPPORT_LIBEDIT "Enable support for linking pcre2test with libedit." OFF) ENDIF(EDITLINE_FOUND) -IF(PCRE2_SUPPORT_LIBEDIT) - INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) -ENDIF(PCRE2_SUPPORT_LIBEDIT) +IF(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + INCLUDE_DIRECTORIES(${EDITLINE_INCLUDE_DIR}) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ELSE(EDITLINE_FOUND) + IF(PCRE2_SUPPORT_LIBEDIT) + MESSAGE(FATAL_ERROR + " libedit not found, set EDITLINE_INCLUDE_DIR to a compatible header\n" + " or set Editline_ROOT to a full libedit installed tree, as needed\n" + " Might need to enable policy CMP0074 in CMakeLists.txt" + ) + ENDIF(PCRE2_SUPPORT_LIBEDIT) +ENDIF(EDITLINE_FOUND) # readline lib IF(READLINE_FOUND) @@ -340,7 +361,12 @@ IF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) ENDIF(PCRE2_BUILD_PCRE2GREP AND NOT PCRE2_BUILD_PCRE2_8) IF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) - MESSAGE(FATAL_ERROR "Only one of libreadline or libeditline can be specified") + IF(READLINE_FOUND) + MESSAGE(FATAL_ERROR + " Only one of the readline compatible libraries can be enabled.\n" + " Disable libreadline with -DPCRE2_SUPPORT_LIBREADLINE=OFF" + ) + ENDIF(READLINE_FOUND) ENDIF(PCRE2_SUPPORT_LIBREADLINE AND PCRE2_SUPPORT_LIBEDIT) IF(PCRE2_SUPPORT_BSR_ANYCRLF) @@ -1022,25 +1048,23 @@ FILE(GLOB html ${PROJECT_SOURCE_DIR}/doc/html/*.html) FILE(GLOB man1 ${PROJECT_SOURCE_DIR}/doc/*.1) FILE(GLOB man3 ${PROJECT_SOURCE_DIR}/doc/*.3) -FOREACH(man ${man3}) - GET_FILENAME_COMPONENT(man_tmp ${man} NAME) - SET(man3_new ${man3} ${man}) -ENDFOREACH(man ${man3}) -SET(man3 ${man3_new}) - INSTALL(FILES ${man1} DESTINATION man/man1) INSTALL(FILES ${man3} DESTINATION man/man3) INSTALL(FILES ${html} DESTINATION share/doc/pcre2/html) IF(MSVC AND INSTALL_MSVC_PDB) - INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2.pdb - ${PROJECT_BINARY_DIR}/pcre2posix.pdb - DESTINATION bin - CONFIGURATIONS RelWithDebInfo) - INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2d.pdb - ${PROJECT_BINARY_DIR}/pcre2posixd.pdb - DESTINATION bin - CONFIGURATIONS Debug) + INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8.pdb + ${PROJECT_BINARY_DIR}/pcre2-16.pdb + ${PROJECT_BINARY_DIR}/pcre2-32.pdb + ${PROJECT_BINARY_DIR}/pcre2-posix.pdb + DESTINATION bin + CONFIGURATIONS RelWithDebInfo) + INSTALL(FILES ${PROJECT_BINARY_DIR}/pcre2-8d.pdb + ${PROJECT_BINARY_DIR}/pcre2-16d.pdb + ${PROJECT_BINARY_DIR}/pcre2-32d.pdb + ${PROJECT_BINARY_DIR}/pcre2-posixd.pdb + DESTINATION bin + CONFIGURATIONS Debug) ENDIF(MSVC AND INSTALL_MSVC_PDB) # Help, only for nice output diff --git a/pcre2/ChangeLog b/pcre2/ChangeLog index d27542d372fda567935d8061e16b5157d7186ef3..856250f14e344be4c654b2953bfda8684c6b120a 100644 --- a/pcre2/ChangeLog +++ b/pcre2/ChangeLog @@ -1,6 +1,111 @@ Change Log for PCRE2 -------------------- + +Version 10.40 15-April-2022 +--------------------------- + +1. Merged patch from @carenas (GitHub #35, 7db87842) to fix pcre2grep incorrect +handling of multiple passes. + +2. Merged patch from @carenas (GitHub #36, dae47509) to fix portability issue +in pcre2grep with buffered fseek(stdin). + +3. Merged patch from @carenas (GitHub #37, acc520924) to fix tests when -S is +not supported. + +4. Revert an unintended change in JIT repeat detection. + +5. Merged patch from @carenas (GitHub #52, b037bfa1) to fix build on GNU Hurd. + +6. Merged documentation and comments patches from @carenas (GitHub #47). + +7. Merged patch from @carenas (GitHub #49) to remove obsolete JFriedl test code +from pcre2grep. + +8. Merged patch from @carenas (GitHub #48) to fix CMake install issue #46. + +9. Merged patch from @carenas (GitHub #53) fixing NULL checks in matching and +substituting. + +10. Add null_subject and null_replacement modifiers to pcre2test. + +11. Add check for NULL subject to POSIX regexec() function. + +12. Add check for NULL replacement to pcre2_substitute(). + +13. For the subject arguments of pcre2_match(), pcre2_dfa_match(), and +pcre2_substitute(), and the replacement argument of the latter, if the pointer +is NULL and the length is zero, treat as an empty string. Apparently a number +of applications treat NULL/0 in this way. + +14. Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +15. Fix some minor issues raised by clang sanitize. + +16. Very minor code speed up for maximizing character property matches. + +17. A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +18. The Python scripts in the maint directory have been refactored. There are +now three scripts that generate pcre2_ucd.c, pcre2_ucp.h, and pcre2_ucptables.c +(which is #included by pcre2_tables.c). The data lists that used to be +duplicated are now held in a single common Python module. + +19. On CHERI, and thus Arm's Morello prototype, pointers are represented as +hardware capabilities, which consist of both an integer address and additional +metadata, meaning they are twice the size of the platform's size_t type, i.e. +16 bytes on a 64-bit system. The ovector member of heapframe happens to only be +8 byte aligned, and so computing frame_size ended up with a multiple of 8 but +not 16. Whilst the first frame was always suitably aligned, this then +misaligned the frame that follows, resulting in an alignment fault when storing +a pointer to Fecode at the start of match. Patch to fix this issue by Jessica +Clarke PR#72. + +20. Added -LP and -LS listing options to pcre2test. + +21. A user discovered that the library names in CMakeLists.txt for MSVC +debugger (PDB) files were incorrect - perhaps never tried for PCRE2? + +22. An item such as [Aa] is optimized into a caseless single character match. +When this was quantified (e.g. [Aa]{2}) and was also the last literal item in a +pattern, the optimizing "must be present for a match" character check was not +being flagged as caseless, causing some matches that should have succeeded to +fail. + +23. Fixed a unicode properrty matching issue in JIT. The character was not +fully read in caseless matching. + +24. Fixed an issue affecting recursions in JIT caused by duplicated data +transfers. + +25. Merged patch from @carenas (GitHub #96) which fixes some problems with +pcre2test and readline/readedit: + + * Use the right header for libedit in FreeBSD with autoconf + * Really allow libedit with cmake + * Avoid using readline headers with libedit + + Version 10.39 29-October-2021 ----------------------------- diff --git a/pcre2/CheckMan b/pcre2/CheckMan old mode 100755 new mode 100644 diff --git a/pcre2/CleanTxt b/pcre2/CleanTxt old mode 100755 new mode 100644 diff --git a/pcre2/Detrail b/pcre2/Detrail old mode 100755 new mode 100644 diff --git a/pcre2/HACKING b/pcre2/HACKING index 20faf8f47c78b26893c826c318c468d246ddb6ca..cad11b388fa6a99a82b3a26a14a25a232b7e84b6 100644 --- a/pcre2/HACKING +++ b/pcre2/HACKING @@ -546,8 +546,9 @@ Each is followed by two code units that encode the desired property as a type and a value. The types are a set of #defines of the form PT_xxx, and the values are enumerations of the form ucp_xx, defined in the pcre2_ucp.h source file. The value is relevant only for PT_GC (General Category), PT_PC (Particular -Category), PT_SC (Script), and the pseudo-property PT_CLIST, which is used to -identify a list of case-equivalent characters when there are three or more. +Category), PT_SC (Script), PT_BIDICL (Bidi Class), and the pseudo-property +PT_CLIST, which is used to identify a list of case-equivalent characters when +there are three or more. Repeats of these items use the OP_TYPESTAR etc. set of opcodes, followed by three code units: OP_PROP or OP_NOTPROP, and then the desired property type and @@ -827,4 +828,4 @@ not a real opcode, but is used to check at compile time that tables indexed by opcode are the correct length, in order to catch updating errors. Philip Hazel -12 July 2019 +December 2021 diff --git a/pcre2/LICENCE b/pcre2/LICENCE index b1ec61be440ca19dea41f10f41f134f2ce11d6dd..2f3cd5cac530abeded501539f7c446813c8ba466 100644 --- a/pcre2/LICENCE +++ b/pcre2/LICENCE @@ -26,7 +26,7 @@ Email domain: gmail.com Retired from University of Cambridge Computing Service, Cambridge, England. -Copyright (c) 1997-2021 University of Cambridge +Copyright (c) 1997-2022 University of Cambridge All rights reserved. @@ -37,7 +37,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2010-2021 Zoltan Herczeg +Copyright(c) 2010-2022 Zoltan Herczeg All rights reserved. @@ -48,7 +48,7 @@ Written by: Zoltan Herczeg Email local part: hzmester Email domain: freemail.hu -Copyright(c) 2009-2021 Zoltan Herczeg +Copyright(c) 2009-2022 Zoltan Herczeg All rights reserved. diff --git a/pcre2/Makefile.am b/pcre2/Makefile.am index 9a234652c490cac0ed4fdc27ca23045c073ba1ce..b1adb6f415b70610f63391aff1e9b12a460d770a 100644 --- a/pcre2/Makefile.am +++ b/pcre2/Makefile.am @@ -382,6 +382,10 @@ COMMON_SOURCES = \ src/pcre2_valid_utf.c \ src/pcre2_xclass.c +# The pcre2_ucptables.c file is #included by pcre2_tables.c + +EXTRA_DIST += src/pcre2_ucptables.c + if WITH_PCRE2_8 lib_LTLIBRARIES += libpcre2-8.la libpcre2_8_la_SOURCES = \ @@ -663,6 +667,7 @@ EXTRA_DIST += \ testdata/testinput23 \ testdata/testinput24 \ testdata/testinput25 \ + testdata/testinput26 \ testdata/testinputEBC \ testdata/testoutput1 \ testdata/testoutput2 \ @@ -705,6 +710,7 @@ EXTRA_DIST += \ testdata/testoutput23 \ testdata/testoutput24 \ testdata/testoutput25 \ + testdata/testoutput26 \ testdata/testoutputEBC \ testdata/valgrind-jit.supp \ testdata/wintestinput3 \ diff --git a/pcre2/NEWS b/pcre2/NEWS index 4a745252b25e31da729c8b4ae32a591bfa8cfac4..c9b8da2ee2c4915cb805a75edd931bc6051eefe7 100644 --- a/pcre2/NEWS +++ b/pcre2/NEWS @@ -2,6 +2,38 @@ News about PCRE2 releases ------------------------- +Version 10.40 15-April-2022 +--------------------------- + +This is mostly a bug-fixing and code-tidying release. However, there are some +extensions to Unicode property handling: + +* Added support for Bidi_Class and a number of binary Unicode properties, +including Bidi_Control. + +* A number of changes to script matching for \p and \P: + + (a) Script extensions for a character are now coded as a bitmap instead of + a list of script numbers, which should be faster and does not need a + loop. + + (b) Added the syntax \p{script:xxx} and \p{script_extensions:xxx} (synonyms + sc and scx). + + (c) Changed \p{scriptname} from being the same as \p{sc:scriptname} to being + the same as \p{scx:scriptname} because this change happened in Perl at + release 5.26. + + (d) The standard Unicode 4-letter abbreviations for script names are now + recognized. + + (e) In accordance with Unicode and Perl's "loose matching" rules, spaces, + hyphens, and underscores are ignored in property names, which are then + matched independent of case. + +As always, see ChangeLog for a list of all changes (also the Git log). + + Version 10.39 29-October-2021 ----------------------------- diff --git a/pcre2/PrepareRelease b/pcre2/PrepareRelease old mode 100755 new mode 100644 diff --git a/pcre2/README b/pcre2/README index 67e46b494aa7e1433973d8b80293ca589c925b9f..78969444ffc57c33d100a31e709c1bc34a3e6247 100644 --- a/pcre2/README +++ b/pcre2/README @@ -114,12 +114,18 @@ Building PCRE2 using autotools The following instructions assume the use of the widely used "configure; make; make install" (autotools) process. -To build PCRE2 on system that supports autotools, first run the "configure" -command from the PCRE2 distribution directory, with your current directory set +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set to the directory where you want the files to be created. This command is a standard GNU "autoconf" configuration script, for which generic instructions are supplied in the file INSTALL. +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + Most commonly, people build PCRE2 within its own distribution directory, and in this case, on many systems, just running "./configure" is sufficient. However, the usual methods of changing standard defaults are available. For example: @@ -188,10 +194,10 @@ library. They are also documented in the pcre2build man page. As well as supporting UTF strings, Unicode support includes support for the \P, \p, and \X sequences that recognize Unicode character properties. - However, only the basic two-letter properties such as Lu are supported. - Escape sequences such as \d and \w in patterns do not by default make use of - Unicode properties, but can be made to do so by setting the PCRE2_UCP option - or starting a pattern with (*UCP). + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any of the preceding, or any of the Unicode newline sequences, or the NUL (zero) @@ -411,7 +417,7 @@ The "configure" script builds the following files for the basic C library: . Makefile the makefile that builds the library . src/config.h build-time configuration options for the library . src/pcre2.h the public PCRE2 header file -. pcre2-config script that shows the building settings such as CFLAGS +. pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" . libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command @@ -571,9 +577,9 @@ at build time" for more details. Making new tarballs ------------------- -The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats. -The command "make distcheck" does the same, but then does a trial build of the -new distribution to ensure that it works. +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you should first run the PrepareRelease script before making a distribution. This @@ -602,13 +608,13 @@ is available. RunTest outputs a comment when it skips a test. Many (but not all) of the tests that are not skipped are run twice if JIT support is available. On the second run, JIT compilation is forced. This -testing can be suppressed by putting "nojit" on the RunTest command line. +testing can be suppressed by putting "-nojit" on the RunTest command line. The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit libraries that are enabled. If you want to run just one set of tests, call RunTest with either the -8, -16 or -32 option. -If valgrind is installed, you can run the tests under it by putting "valgrind" +If valgrind is installed, you can run the tests under it by putting "-valgrind" on the RunTest command line. To run pcre2test on just one or more specific test files, give their numbers as arguments to RunTest, for example: @@ -905,4 +911,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -Last updated: 29 October 2021 +Last updated: 15 April 2022 diff --git a/pcre2/RunGrepTest b/pcre2/RunGrepTest old mode 100755 new mode 100644 index 25f69bd883b6d5ecaa753732b962d7220ff652fe..443ed76657e3395e04d2e9c4ad7fa0cdb9f095c2 --- a/pcre2/RunGrepTest +++ b/pcre2/RunGrepTest @@ -674,10 +674,14 @@ echo "---------------------------- Test 131 -----------------------------" >>tes echo "RC=$?" >>testtrygrep echo "---------------------------- Test 132 -----------------------------" >>testtrygrep -(cd $srcdir; $valgrind $vjs $pcre2grep -m1 -A3 '^match'; echo '---'; head -1) <$srcdir/testdata/grepinput >>testtrygrep 2>&1 +(cd $srcdir; exec 3>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep echo "---------------------------- Test 133 -----------------------------" >>testtrygrep +(cd $srcdir; exec 3>testtrygrep 2>&1 +echo "RC=$?" >>testtrygrep + +echo "---------------------------- Test 134 -----------------------------" >>testtrygrep (cd $srcdir; $valgrind $vjs $pcre2grep -m1 -O '=$x{41}$x423$o{103}$o1045=' 'fox') <$srcdir/testdata/grepinputv >>testtrygrep 2>&1 echo "RC=$?" >>testtrygrep diff --git a/pcre2/RunTest b/pcre2/RunTest old mode 100755 new mode 100644 index 14d9f60f627b0730815a8ef260551e1fbafe9c0a..9b67870de4f153fc33f3007845fce76ef3cba19f --- a/pcre2/RunTest +++ b/pcre2/RunTest @@ -80,7 +80,8 @@ title22="Test 22: \C tests with UTF (not supported for DFA matching)" title23="Test 23: \C disabled test" title24="Test 24: Non-UTF pattern conversion tests" title25="Test 25: UTF pattern conversion tests" -maxtest=25 +title26="Test 26: Auto-generated unicode property tests" +maxtest=26 if [ $# -eq 1 -a "$1" = "list" ]; then echo $title0 @@ -109,6 +110,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then echo $title23 echo $title24 echo $title25 + echo $title26 exit 0 fi @@ -238,6 +240,7 @@ do22=no do23=no do24=no do25=no +do26=no while [ $# -gt 0 ] ; do case $1 in @@ -267,6 +270,7 @@ while [ $# -gt 0 ] ; do 23) do23=yes;; 24) do24=yes;; 25) do25=yes;; + 26) do26=yes;; -8) arg8=yes;; -16) arg16=yes;; -32) arg32=yes;; @@ -320,7 +324,8 @@ fi # set up a large stack. $sim ./pcre2test -S 64 /dev/null /dev/null -if [ $? -eq 0 -a "$bigstack" != "" ] ; then +support_setstack=$? +if [ $support_setstack -eq 0 -a "$bigstack" != "" ] ; then setstack="-S 64" else setstack="" @@ -416,7 +421,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ $do12 = no -a $do13 = no -a $do14 = no -a $do15 = no -a \ $do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \ $do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \ - $do24 = no -a $do25 = no \ + $do24 = no -a $do25 = no -a $do26 = no \ ]; then do0=yes do1=yes @@ -444,6 +449,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \ do23=yes do24=yes do25=yes + do26=yes fi # Handle any explicit skips at this stage, so that an argument list may consist @@ -479,7 +485,9 @@ for bmode in "$test8" "$test16" "$test32"; do echo '' >testtry checkspecial '-C' checkspecial '--help' - checkspecial '-S 1 -t 10 testSinput' + if [ $support_setstack -eq 0 ] ; then + checkspecial '-S 1 -t 10 testSinput' + fi echo " OK" fi @@ -860,6 +868,20 @@ for bmode in "$test8" "$test16" "$test32"; do fi fi + # Auto-generated unicode property tests + + if [ $do26 = yes ] ; then + echo $title26 + if [ $utf -eq 0 ] ; then + echo " Skipped because UTF-$bits support is not available" + else + for opt in "" $jitopt; do + $sim $valgrind ${opt:+$vjs} ./pcre2test -q $setstack $bmode $opt $testdata/testinput26 testtry + checkresult $? 26 "$opt" + done + fi + fi + # End of loop for 8/16/32-bit tests done diff --git a/pcre2/autogen.sh b/pcre2/autogen.sh old mode 100755 new mode 100644 diff --git a/pcre2/cmake/FindEditline.cmake b/pcre2/cmake/FindEditline.cmake index 2d0b7cc543c3b5259ea2886d1903ee39c27ea93e..1f0c9514badb006a238b59d10fc79d310c5c0142 100644 --- a/pcre2/cmake/FindEditline.cmake +++ b/pcre2/cmake/FindEditline.cmake @@ -1,17 +1,16 @@ # Modified from FindReadline.cmake (PH Feb 2012) -if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) +if(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) set(EDITLINE_FOUND TRUE) -else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) - FIND_PATH(EDITLINE_INCLUDE_DIR readline.h - /usr/include/editline - /usr/include/edit/readline - /usr/include/readline +else(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) + FIND_PATH(EDITLINE_INCLUDE_DIR readline.h PATH_SUFFIXES + editline + edit/readline ) FIND_LIBRARY(EDITLINE_LIBRARY NAMES edit) include(FindPackageHandleStandardArgs) - FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY ) + FIND_PACKAGE_HANDLE_STANDARD_ARGS(Editline DEFAULT_MSG EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) MARK_AS_ADVANCED(EDITLINE_INCLUDE_DIR EDITLINE_LIBRARY) -endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY AND NCURSES_LIBRARY) +endif(EDITLINE_INCLUDE_DIR AND EDITLINE_LIBRARY) diff --git a/pcre2/configure.ac b/pcre2/configure.ac index d3721c22a058e3eec273d56f7773fe978eb3452b..9b4d796aa329a033f052c485a82b70954725abbc 100644 --- a/pcre2/configure.ac +++ b/pcre2/configure.ac @@ -9,15 +9,15 @@ dnl The PCRE2_PRERELEASE feature is for identifying release candidates. It might dnl be defined as -RC2, for example. For real releases, it should be empty. m4_define(pcre2_major, [10]) -m4_define(pcre2_minor, [39]) +m4_define(pcre2_minor, [40]) m4_define(pcre2_prerelease, []) -m4_define(pcre2_date, [2021-10-29]) +m4_define(pcre2_date, [2022-04-14]) # Libtool shared library interface versions (current:revision:age) -m4_define(libpcre2_8_version, [10:4:10]) -m4_define(libpcre2_16_version, [10:4:10]) -m4_define(libpcre2_32_version, [10:4:10]) -m4_define(libpcre2_posix_version, [3:1:0]) +m4_define(libpcre2_8_version, [11:0:11]) +m4_define(libpcre2_16_version, [11:0:11]) +m4_define(libpcre2_32_version, [11:0:11]) +m4_define(libpcre2_posix_version, [3:2:0]) # NOTE: The CMakeLists.txt file searches for the above variables in the first # 50 lines of this file. Please update that if the variables above are moved. @@ -512,7 +512,20 @@ AC_TYPE_SIZE_T # Checks for library functions. -AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp realpath secure_getenv strerror) +AC_CHECK_FUNCS(bcopy memfd_create memmove mkostemp secure_getenv strerror) +AC_MSG_CHECKING([for realpath]) +AC_LINK_IFELSE([AC_LANG_PROGRAM([[ +#include +#include +]],[[ +char buffer[PATH_MAX]; +realpath(".", buffer); +]])], +[AC_MSG_RESULT([yes]) + AC_DEFINE([HAVE_REALPATH], 1, + [Define to 1 if you have the `realpath' function.]) +], +AC_MSG_RESULT([no])) # Check for the availability of libz (aka zlib) @@ -584,14 +597,14 @@ if test "$enable_pcre2test_libreadline" = "yes"; then fi fi - # Check for the availability of libedit. Different distributions put its # headers in different places. Try to cover the most common ones. if test "$enable_pcre2test_libedit" = "yes"; then - AC_CHECK_HEADERS([editline/readline.h], [HAVE_EDITLINE_READLINE_H=1], - [AC_CHECK_HEADERS([edit/readline/readline.h], [HAVE_READLINE_READLINE_H=1], - [AC_CHECK_HEADERS([readline/readline.h], [HAVE_READLINE_READLINE_H=1])])]) + AC_CHECK_HEADERS([editline/readline.h edit/readline/readline.h readline.h], [ + HAVE_LIBEDIT_HEADER=1 + break + ]) AC_CHECK_LIB([edit], [readline], [LIBEDIT="-ledit"]) fi @@ -927,10 +940,9 @@ if test "$enable_pcre2test_libedit" = "yes"; then echo "** Cannot use both --enable-pcre2test-libedit and --enable-pcre2test-readline" exit 1 fi - if test "$HAVE_EDITLINE_READLINE_H" != "1" -a \ - "$HAVE_READLINE_READLINE_H" != "1"; then - echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h" - echo "** nor readline/readline.h was found." + if test -z "$HAVE_LIBEDIT_HEADER"; then + echo "** Cannot --enable-pcre2test-libedit because neither editline/readline.h," + echo "** edit/readline/readline.h nor a compatible header was found." exit 1 fi if test -z "$LIBEDIT"; then diff --git a/pcre2/doc/html/README.txt b/pcre2/doc/html/README.txt index 67e46b494aa7e1433973d8b80293ca589c925b9f..78969444ffc57c33d100a31e709c1bc34a3e6247 100644 --- a/pcre2/doc/html/README.txt +++ b/pcre2/doc/html/README.txt @@ -114,12 +114,18 @@ Building PCRE2 using autotools The following instructions assume the use of the widely used "configure; make; make install" (autotools) process. -To build PCRE2 on system that supports autotools, first run the "configure" -command from the PCRE2 distribution directory, with your current directory set +If you have downloaded and unpacked a PCRE2 release tarball, run the +"configure" command from the PCRE2 directory, with your current directory set to the directory where you want the files to be created. This command is a standard GNU "autoconf" configuration script, for which generic instructions are supplied in the file INSTALL. +The files in the GitHub repository do not contain "configure". If you have +downloaded the PCRE2 source files from GitHub, before you can run "configure" +you must run the shell script called autogen.sh. This runs a number of +autotools to create a "configure" script (you must of course have the autotools +commands installed in order to do this). + Most commonly, people build PCRE2 within its own distribution directory, and in this case, on many systems, just running "./configure" is sufficient. However, the usual methods of changing standard defaults are available. For example: @@ -188,10 +194,10 @@ library. They are also documented in the pcre2build man page. As well as supporting UTF strings, Unicode support includes support for the \P, \p, and \X sequences that recognize Unicode character properties. - However, only the basic two-letter properties such as Lu are supported. - Escape sequences such as \d and \w in patterns do not by default make use of - Unicode properties, but can be made to do so by setting the PCRE2_UCP option - or starting a pattern with (*UCP). + However, only a subset of Unicode properties are supported; see the + pcre2pattern man page for details. Escape sequences such as \d and \w in + patterns do not by default make use of Unicode properties, but can be made to + do so by setting the PCRE2_UCP option or starting a pattern with (*UCP). . You can build PCRE2 to recognize either CR or LF or the sequence CRLF, or any of the preceding, or any of the Unicode newline sequences, or the NUL (zero) @@ -411,7 +417,7 @@ The "configure" script builds the following files for the basic C library: . Makefile the makefile that builds the library . src/config.h build-time configuration options for the library . src/pcre2.h the public PCRE2 header file -. pcre2-config script that shows the building settings such as CFLAGS +. pcre2-config script that shows the building settings such as CFLAGS that were set for "configure" . libpcre2-8.pc ) . libpcre2-16.pc ) data for the pkg-config command @@ -571,9 +577,9 @@ at build time" for more details. Making new tarballs ------------------- -The command "make dist" creates two PCRE2 tarballs, in tar.gz and zip formats. -The command "make distcheck" does the same, but then does a trial build of the -new distribution to ensure that it works. +The command "make dist" creates three PCRE2 tarballs, in tar.gz, tar.bz2, and +zip formats. The command "make distcheck" does the same, but then does a trial +build of the new distribution to ensure that it works. If you have modified any of the man page sources in the doc directory, you should first run the PrepareRelease script before making a distribution. This @@ -602,13 +608,13 @@ is available. RunTest outputs a comment when it skips a test. Many (but not all) of the tests that are not skipped are run twice if JIT support is available. On the second run, JIT compilation is forced. This -testing can be suppressed by putting "nojit" on the RunTest command line. +testing can be suppressed by putting "-nojit" on the RunTest command line. The entire set of tests is run once for each of the 8-bit, 16-bit and 32-bit libraries that are enabled. If you want to run just one set of tests, call RunTest with either the -8, -16 or -32 option. -If valgrind is installed, you can run the tests under it by putting "valgrind" +If valgrind is installed, you can run the tests under it by putting "-valgrind" on the RunTest command line. To run pcre2test on just one or more specific test files, give their numbers as arguments to RunTest, for example: @@ -905,4 +911,4 @@ The distribution should contain the files listed below. Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -Last updated: 29 October 2021 +Last updated: 15 April 2022 diff --git a/pcre2/doc/html/pcre2_jit_stack_create.html b/pcre2/doc/html/pcre2_jit_stack_create.html index 6200d177f80a3cc95a93b48928c65bac29cbded8..548947c6833e0fd470cc94ce763664f59331c770 100644 --- a/pcre2/doc/html/pcre2_jit_stack_create.html +++ b/pcre2/doc/html/pcre2_jit_stack_create.html @@ -34,7 +34,8 @@ allocation. The result can be passed to the JIT run-time code by calling pcre2_jit_stack_assign() to associate the stack with a compiled pattern, which can then be processed by pcre2_match() or pcre2_jit_match(). A maximum stack size of 512KiB to 1MiB should be more than enough for any -pattern. For more details, see the +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the pcre2jit page.

diff --git a/pcre2/doc/html/pcre2_set_compile_extra_options.html b/pcre2/doc/html/pcre2_set_compile_extra_options.html index b1c0a1113a4bed9604a1a31b2aee6ad6d2716f01..2f2bf61ae2bc6ef13fe7b3d23a0fd9fb83e63206 100644 --- a/pcre2/doc/html/pcre2_set_compile_extra_options.html +++ b/pcre2/doc/html/pcre2_set_compile_extra_options.html @@ -30,8 +30,8 @@ This function sets additional option bits for pcre2_compile() that are housed in a compile context. It completely replaces all the bits. The extra options are:
-  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{df800} to \x{dfff}
-                                         in UTF-8 and UTF-32 modes
+  PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     Allow \K in lookarounds
+  PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES  Allow \x{d800} to \x{dfff} in UTF-8 and UTF-32 modes
   PCRE2_EXTRA_ALT_BSUX                 Extended alternate \u, \U, and \x handling
   PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL    Treat all invalid escapes as a literal following character
   PCRE2_EXTRA_ESCAPED_CR_IS_LF         Interpret \r as \n
diff --git a/pcre2/doc/html/pcre2_substitute.html b/pcre2/doc/html/pcre2_substitute.html
index 10b2267e2d72e9858b30f35874ed6321a06181be..abf0a703044f9829dbeea051ef790dd97d3850ed 100644
--- a/pcre2/doc/html/pcre2_substitute.html
+++ b/pcre2/doc/html/pcre2_substitute.html
@@ -68,29 +68,29 @@ automatically added.
 The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for
 zero-terminated strings. The options are:
 
-  PCRE2_ANCHORED             Match only at the first position
-  PCRE2_ENDANCHORED          Pattern can match only at end of subject
-  PCRE2_NOTBOL               Subject is not the beginning of a line
-  PCRE2_NOTEOL               Subject is not the end of a line
-  PCRE2_NOTEMPTY             An empty string is not a valid match
-  PCRE2_NOTEMPTY_ATSTART     An empty string at the start of the subject is not a valid match
-  PCRE2_NO_JIT               Do not use JIT matching
-  PCRE2_NO_UTF_CHECK         Do not check the subject or replacement for UTF validity (only relevant if
-                              PCRE2_UTF was set at compile time)
-  PCRE2_SUBSTITUTE_EXTENDED  Do extended replacement processing
-  PCRE2_SUBSTITUTE_GLOBAL    Replace all occurrences in the subject
-  PCRE2_SUBSTITUTE_LITERAL   The replacement string is literal
-  PCRE2_SUBSTITUTE_MATCHED   Use pre-existing match data for 1st match
-  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH  If overflow, compute needed length
+  PCRE2_ANCHORED                     Match only at the first position
+  PCRE2_ENDANCHORED                  Match only at end of subject
+  PCRE2_NOTBOL                       Subject is not the beginning of a line
+  PCRE2_NOTEOL                       Subject is not the end of a line
+  PCRE2_NOTEMPTY                     An empty string is not a valid match
+  PCRE2_NOTEMPTY_ATSTART             An empty string at the start of the subject is not a valid match
+  PCRE2_NO_JIT                       Do not use JIT matching
+  PCRE2_NO_UTF_CHECK                 Do not check for UTF validity in the subject or replacement
+                                      (only relevant if PCRE2_UTF was set at compile time)
+  PCRE2_SUBSTITUTE_EXTENDED          Do extended replacement processing
+  PCRE2_SUBSTITUTE_GLOBAL            Replace all occurrences in the subject
+  PCRE2_SUBSTITUTE_LITERAL           The replacement string is literal
+  PCRE2_SUBSTITUTE_MATCHED           Use pre-existing match data for first match
+  PCRE2_SUBSTITUTE_OVERFLOW_LENGTH   If overflow, compute needed length
   PCRE2_SUBSTITUTE_REPLACEMENT_ONLY  Return only replacement string(s)
-  PCRE2_SUBSTITUTE_UNKNOWN_UNSET  Treat unknown group as unset
-  PCRE2_SUBSTITUTE_UNSET_EMPTY  Simple unset insert = empty string
+  PCRE2_SUBSTITUTE_UNKNOWN_UNSET     Treat unknown group as unset
+  PCRE2_SUBSTITUTE_UNSET_EMPTY       Simple unset insert = empty string
 
If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored.

-If PCRE2_SUBSTITUTE_MATCHED is set, match_data must be non-zero; its +If PCRE2_SUBSTITUTE_MATCHED is set, match_data must be non-NULL; its contents must be the result of a call to pcre2_match() using the same pattern and subject.

diff --git a/pcre2/doc/html/pcre2api.html b/pcre2/doc/html/pcre2api.html index e2237e721b9dfeaf5e030e2dbf45735c296432ab..047e242a3b4d76f9f3bb285e2d588f07f5361ccd 100644 --- a/pcre2/doc/html/pcre2api.html +++ b/pcre2/doc/html/pcre2api.html @@ -1845,7 +1845,7 @@ undefined. It may cause your program to crash or loop.

Note that this option can also be passed to pcre2_match() and -pcre_dfa_match(), to suppress UTF validity checking of the subject +pcre2_dfa_match(), to suppress UTF validity checking of the subject string.

@@ -2055,8 +2055,8 @@ point. However, this applies only to characters whose code points are less than \d.

-When PCRE2 is built with Unicode support (the default), the Unicode properties -of all characters can be tested with \p and \P, or, alternatively, the +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \p and \P, or, alternatively, the PCRE2_UCP option can be set when a pattern is compiled; this causes \w and friends to use Unicode property support instead of the built-in tables. PCRE2_UCP also causes upper/lower casing operations on characters with code @@ -2316,7 +2316,7 @@ return zero. The third argument should point to a size_t variable. PCRE2_INFO_LASTCODETYPE

Returns 1 if there is a rightmost literal code unit that must exist in any -matched string, other than at its start. The third argument should point to a +matched string, other than at its start. The third argument should point to a uint32_t variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is @@ -2640,7 +2640,9 @@ The subject string is passed to pcre2_match() as a pointer in startoffset. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not -UTF processing is enabled. +UTF processing is enabled. As a special case, if subject is NULL and +length is zero, the subject is assumed to be an empty string. If +length is non-zero, an error occurs if subject is NULL.

If startoffset is greater than the length of the subject, @@ -3394,12 +3396,17 @@ same number causes an error at compile time.

This function optionally calls pcre2_match() and then makes a copy of the subject string in outputbuffer, replacing parts that were matched with -the replacement string, whose length is supplied in rlength. This -can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an -option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the -replacement string(s). The default action is to perform just one replacement if -the pattern matches, but there is an option that requests multiple replacements -(see PCRE2_SUBSTITUTE_GLOBAL below). +the replacement string, whose length is supplied in rlength, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if replacement is NULL and rlength is zero, the +replacement is assumed to be an empty string. If rlength is non-zero, an +error occurs if replacement is NULL. +

+

+There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below).

If successful, pcre2_substitute() returns the number of substitutions @@ -3433,12 +3440,12 @@ block may or may not have been changed. As well as the usual options for pcre2_match(), a number of additional options can be set in the options argument of pcre2_substitute(). One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external -match_data block must be provided, and it must have been used for an -external call to pcre2_match(). The data in the match_data block -(return code, offset vector) is used for the first substitution instead of -calling pcre2_match() from within pcre2_substitute(). This allows -an application to check for a match before choosing to substitute, without -having to repeat the match. +match_data block must be provided, and it must have already been used for +an external call to pcre2_match() with the same pattern and subject +arguments. The data in the match_data block (return code, offset vector) +is then used for the first substitution instead of calling pcre2_match() +from within pcre2_substitute(). This allows an application to check for a +match before choosing to substitute, without having to repeat the match.

The contents of the externally supplied match data block are not changed when @@ -3583,7 +3590,7 @@ and force lower case. The escape sequences change the current state: \U and terminating a \Q quoted sequence) reverts to no case forcing. The sequences \u and \l force the next character (if it is a letter) to upper or lower case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from +forcing. Case forcing applies to all inserted characters, including those from capture groups and letters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater @@ -3655,7 +3662,9 @@ default.

PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the -match_data argument is NULL. +match_data argument is NULL or if the subject or replacement +arguments are NULL. For backward compatibility reasons an exception is made for +the replacement argument if the rlength argument is also 0.

PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the @@ -3810,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches,

The function pcre2_dfa_match() is called to match a subject string against a compiled pattern, using a matching algorithm that scans the subject -string just once (not counting lookaround assertions), and does not backtrack. -This has different characteristics to the normal algorithm, and is not -compatible with Perl. Some of the features of PCRE2 patterns are not supported. -Nevertheless, there are times when this kind of matching can be useful. For a -discussion of the two matching algorithms, and a list of features that -pcre2_dfa_match() does not support, see the +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that pcre2_dfa_match() does +not support, see the pcre2matching documentation.

@@ -3850,7 +3860,7 @@ Here is an example of a simple call to pcre2_dfa_match():


-Option bits for pcre_dfa_match() +Option bits for pcre2_dfa_match()

The unused bits of the options argument for pcre2_dfa_match() must @@ -4008,7 +4018,7 @@ Cambridge, England.


REVISION

-Last updated: 30 August 2021 +Last updated: 14 December 2021
Copyright © 1997-2021 University of Cambridge.
diff --git a/pcre2/doc/html/pcre2build.html b/pcre2/doc/html/pcre2build.html index a1c2e9595dbccc2aeb988084e944084f96e4112d..0d12155b093f764dc250520401b4b8805b73a87c 100644 --- a/pcre2/doc/html/pcre2build.html +++ b/pcre2/doc/html/pcre2build.html @@ -142,8 +142,9 @@ locked this out by setting PCRE2_NEVER_UTF. UTF support allows the libraries to process character code points up to 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \P, \p, -and \X. Only the general category properties such as Lu and Nd are -supported. Details are given in the +and \X. Only the general category properties such as Lu and Nd, +script names, and some bi-directional properties are supported. Details are +given in the pcre2pattern documentation.

@@ -307,7 +308,7 @@ You can also explicitly limit the depth of nested backtracking in the for --with-match-limit. You can set a lower default limit by adding, for example,
-  --with-match-limit_depth=10000
+  --with-match-limit-depth=10000
 
to the configure command. This value can be overridden at run time. This depth limit indirectly limits the amount of heap memory that is used, but @@ -615,9 +616,9 @@ Cambridge, England.


REVISION

-Last updated: 20 March 2020 +Last updated: 08 December 2021
-Copyright © 1997-2020 University of Cambridge. +Copyright © 1997-2021 University of Cambridge.

Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2compat.html b/pcre2/doc/html/pcre2compat.html index eb826947b87ba0849c14022da799f24a24179943..5f390c1d2dd306475335fbabe3b845be0914bb1c 100644 --- a/pcre2/doc/html/pcre2compat.html +++ b/pcre2/doc/html/pcre2compat.html @@ -18,33 +18,41 @@ DIFFERENCES BETWEEN PCRE2 AND PERL

This document describes some of the differences in the ways that PCRE2 and Perl handle regular expressions. The differences described here are with respect to -Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the +Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the information may at times be out of date.

-1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +

+

+2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does have are given in the pcre2unicode page.

-2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just asserts that the next character is not "a" three times (in principle; PCRE2 optimizes this to run the assertion just once). Perl allows some repeat quantifiers on other assertions, -for example, \b* (but not \b{3}, though oddly it does allow ^{3}), but these -do not seem to have any use. PCRE2 does not allow any kind of quantifier on -non-lookaround assertions. +for example, \b* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions.

-3. Capture groups that occur inside negative lookaround assertions are counted, +4. Capture groups that occur inside negative lookaround assertions are counted, but their entries in the offsets vector are set only when a negative assertion is a condition that has a matching branch (that is, the condition is false). Perl may set such capture groups in other circumstances.

-4. The following Perl escape sequences are not supported: \F, \l, \L, \u, +5. The following Perl escape sequences are not supported: \F, \l, \L, \u, \U, and \N when followed by a character name. \N on its own, matching a non-newline character, and \N{U+dd..}, matching a Unicode code point, are supported. The escapes that modify the case of following letters are @@ -55,26 +63,26 @@ PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript interprets them.

-5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is +6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \p and \P are limited to the general category properties such as Lu and -Nd, script names such as Greek or Han, and the derived properties Any and L&. -Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use -is limited. See the +Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the +derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs +(surrogate) property, but in PCRE2 its use is limited. See the pcre2pattern documentation for details. The long synonyms for property names that Perl supports (such as \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is".

-6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters +7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters in between are treated as literals. However, this is slightly different from Perl in that $ and @ are also handled as literals inside the quotes. In Perl, -they cause variable interpolation (but of course PCRE2 does not have -variables). Also, Perl does "double-quotish backslash interpolation" on any -backslashes between \Q and \E which, its documentation says, "may lead to -confusing results". PCRE2 treats a backslash between \Q and \E just like any -other character. Note the following examples: +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \Q +and \E which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \Q and \E just like any other character. Note the +following examples:

     Pattern            PCRE2 matches     Perl matches
 
@@ -88,19 +96,19 @@ The \Q...\E sequence is recognized both inside and outside character classes
 by both PCRE2 and Perl.
 

-7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the pcre2callout documentation for details.

-8. Subroutine calls (whether recursive or not) were treated as atomic groups up +9. Subroutine calls (whether recursive or not) were treated as atomic groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl.

-9. In PCRE2, if any of the backtracking control verbs are used in a group that +10. In PCRE2, if any of the backtracking control verbs are used in a group that is called as a subroutine (whether or not recursively), their effect is confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group @@ -109,20 +117,20 @@ the group does not contain any | characters. Note that such groups are processed as anchored at the point where they are tested.

-10. If a pattern contains more than one backtracking control verb, the first +11. If a pattern contains more than one backtracking control verb, the first one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs.

-11. There are some differences that are concerned with the settings of captured +12. There are some differences that are concerned with the settings of captured strings when part of a pattern is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to "b".

-12. PCRE2's handling of duplicate capture group numbers and names is not as +13. PCRE2's handling of duplicate capture group numbers and names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate between numbers and names. In particular, a pattern such as (?|(?<a>A)|(?<b>B)), where the two @@ -132,42 +140,43 @@ to distinguish which group matched, because both names map to capture group number 1. To avoid this confusing situation, an error is given at compile time.

-13. Perl used to recognize comments in some places that PCRE2 does not, for +14. Perl used to recognize comments in some places that PCRE2 does not, for example, between the ( and ? at the start of a group. If the /x modifier is set, Perl allowed white space between ( and ? though the latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently.

-14. Perl, when in warning mode, gives warnings for character classes such as +15. Perl, when in warning mode, gives warnings for character classes such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes.

-15. In PCRE2, the upper/lower case character properties Lu and Ll are not +16. In PCRE2, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \p{Lu} always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.32), \p{Lu} and \p{Ll} match all +in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all letters, regardless of case, when case independence is specified.

-16. From release 5.32.0, Perl locks out the use of \K in lookaround +17. From release 5.32.0, Perl locks out the use of \K in lookaround assertions. From release 10.38 PCRE2 does the same by default. However, there is an option for re-enabling the previous behaviour. When this option is set, \K is acted on when it occurs in positive assertions, but is ignored in negative assertions.

-17. PCRE2 provides some extensions to the Perl regular expression facilities. +18. PCRE2 provides some extensions to the Perl regular expression facilities. Perl 5.10 included new features that were not in earlier versions of Perl, some of which (such as named parentheses) were in PCRE2 for some time before. This -list is with respect to Perl 5.32: +list is with respect to Perl 5.34:

(a) Although lookbehind assertions in PCRE2 must match fixed length strings, each alternative toplevel branch of a lookbehind assertion can match a -different length of string. Perl requires them all to have the same length. +different length of string. Perl used to require them all to have the same +length, but the latest version has some variable length support.

(b) From PCRE2 10.23, backreferences to groups of fixed length are supported @@ -221,12 +230,12 @@ extension to the lookaround facilities. The default, Perl-compatible lookarounds are atomic.

-18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa +19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode rules. This separation cannot be represented with PCRE2_UCP.

-19. Perl has different limits than PCRE2. See the +20. Perl has different limits than PCRE2. See the pcre2limit documentation for details. Perl went with 5.10 from recursion to iteration keeping the intermediate matches on the heap, which is ~10% slower but does not @@ -248,7 +257,7 @@ Cambridge, England. REVISION

-Last updated: 30 August 2021 +Last updated: 08 December 2021
Copyright © 1997-2021 University of Cambridge.
diff --git a/pcre2/doc/html/pcre2jit.html b/pcre2/doc/html/pcre2jit.html index e73a22984c126817aca7c6d6347687c91bc1fc8b..d89fa239995e526d94e6e383d1912e26978874c6 100644 --- a/pcre2/doc/html/pcre2jit.html +++ b/pcre2/doc/html/pcre2jit.html @@ -269,11 +269,11 @@ starts another match, that match must use a different JIT stack to the one used for currently suspended match(es).

-In a multithread application, if you do not -specify a JIT stack, or if you assign or pass back NULL from a callback, that -is thread-safe, because each thread has its own machine stack. However, if you -assign or pass back a non-NULL JIT stack, this must be a different stack for -each thread so that the application is thread-safe. +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe.

Strictly speaking, even more is allowed. You can assign the same non-NULL stack @@ -382,8 +382,8 @@ out this complicated API. void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext);

-The JIT executable allocator does not free all memory when it is possible. -It expects new allocations, and keeps some free memory around to improve +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve allocation speed. However, in low memory conditions, it might be better to free all possible memory. You can cause this to happen by calling pcre2_jit_free_unused_memory(). Its argument is a general context, for custom @@ -442,10 +442,10 @@ that was not compiled.

When you call pcre2_match(), as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if -the subject pointer is NULL, an immediate error is given. Also, unless -PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the -interests of speed, these checks do not happen on the JIT fast path, and if -invalid data is passed, the result is undefined. +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path, and if invalid data is passed, the result is undefined.

Bypassing the sanity checks and the pcre2_match() wrapping can give @@ -466,9 +466,9 @@ Cambridge, England.


REVISION

-Last updated: 23 May 2019 +Last updated: 30 November 2021
-Copyright © 1997-2019 University of Cambridge. +Copyright © 1997-2021 University of Cambridge.

Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2pattern.html b/pcre2/doc/html/pcre2pattern.html index 9c2d66cf8267c5e212b2cbb88444f68ff57bbc2c..2c243010187143290d614546589e9d3c7933a075 100644 --- a/pcre2/doc/html/pcre2pattern.html +++ b/pcre2/doc/html/pcre2pattern.html @@ -534,7 +534,7 @@ for themselves. For example, outside a character class: \0113 is a tab followed by the character "3" \113 might be a backreference, otherwise the character with octal code 113 \377 might be a backreference, otherwise the value 255 (decimal) - \81 is always a backreference .sp + \81 is always a backreference

Note that octal values of 100 or greater that are specified using this syntax must not be introduced by a leading zero, because no more than three octal @@ -776,199 +776,62 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these sequences are of course limited to testing characters whose code points are less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all -treated as being in the Unknown script and with an unassigned type. The extra -escape sequences are: +treated as being in the Unknown script and with an unassigned type. +

+

+Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \d and \w do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +

+

+The extra escape sequences that provide property support are:

   \p{xx}   a character with the xx property
   \P{xx}   a character without the xx property
   \X       a Unicode extended grapheme cluster
 
-The property names represented by xx above are case-sensitive. There is -support for Unicode script names, Unicode general category properties, "Any", -which matches any character (including newline), and some special PCRE2 -properties (described in the -next section). -Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2. -Note that \P{Any} does not match any characters, so always causes a match -failure. +The property names represented by xx above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described +below). +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \P{Any} does not match any characters, so always causes a +match failure. +

+
+Script properties for \p and \P +
+

+There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \p{sc:Adlam} matches characters whose basic script is Adlam, whereas +\p{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\p{Adlam}, it is treated as \p{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40.

-Sets of Unicode characters are defined as belonging to certain scripts. A -character from one of these sets can be matched using a script name. For -example: -

-  \p{Greek}
-  \P{Han}
-
Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not part of an identified script are lumped together as "Common". The current list -of scripts is: -

-

-Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician, -Psalter_Pahlavi, -Rejang, -Runic, -Samaritan, -Saurashtra, -Sharada, -Shavian, -Siddham, -SignWriting, -Sinhala, -Sogdian, -Sora_Sompeng, -Soyombo, -Sundanese, -Syloti_Nagri, -Syriac, -Tagalog, -Tagbanwa, -Tai_Le, -Tai_Tham, -Tai_Viet, -Takri, -Tamil, -Tangsa, -Tangut, -Telugu, -Thaana, -Thai, -Tibetan, -Tifinagh, -Tirhuta, -Toto, -Ugaritic, -Unknown, -Vai, -Vithkuqi, -Wancho, -Warang_Citi, -Yezidi, -Yi, -Zanabazar_Square. +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +

+  pcre2test -LS
+
+

+
+The general category property for \p and \P +

Each character has exactly one Unicode general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be @@ -1030,9 +893,9 @@ The following general category property codes are supported: Zp Paragraph separator Zs Space separator -The special property L& is also supported: it matches a character that has -the Lu, Ll, or Lt property, in other words, a letter that is not classified as -a modifier or "other". +The special property LC, which has the synonym L&, is also supported: it +matches a character that has the Lu, Ll, or Lt property, in other words, a +letter that is not classified as a modifier or "other".

The Cs (Surrogate) property applies only to characters whose code points are in @@ -1059,12 +922,54 @@ Specifying caseless matching does not affect these escape sequences. For example, \p{Lu} always matches only upper case letters. This is different from the behaviour of current versions of Perl.

+
+Binary (yes/no) properties for \p and \P +

-Matching characters by Unicode property is not fast, because PCRE2 has to do a -multistage table lookup in order to find a character's property. That is why -the traditional escape sequences such as \d and \w do not use Unicode -properties in PCRE2 by default, though you can make them do so by setting the -PCRE2_UCP option or by starting the pattern with (*UCP). +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

+  pcre2test -LP
+
+
+

+
+The Bidi_Class property for \p and \P +
+

+

+  \p{Bidi_Class:<class>}   matches a character with the given class
+  \p{BC:<class>}           matches a character with the given class
+
+The recognized classes are: +
+  AL          Arabic letter
+  AN          Arabic number
+  B           paragraph separator
+  BN          boundary neutral
+  CS          common separator
+  EN          European number
+  ES          European separator
+  ET          European terminator
+  FSI         first strong isolate
+  L           left-to-right
+  LRE         left-to-right embedding
+  LRI         left-to-right isolate
+  LRO         left-to-right override
+  NSM         non-spacing mark
+  ON          other neutral
+  PDF         pop directional format
+  PDI         pop directional isolate
+  R           right-to-left
+  RLE         right-to-left embedding
+  RLI         right-to-left isolate
+  RLO         right-to-left override
+  S           segment separator
+  WS          which space
+
+An equals sign may be used instead of a colon. The class names are +case-insensitive; only the short names listed above are recognized.


Extended grapheme clusters @@ -1341,15 +1246,17 @@ end of the subject in both modes, and if all branches of a pattern start with

Outside a character class, a dot in the pattern matches any one character in the subject string except (by default) a character that signifies the end of a -line. +line. One or more characters may be specified as line terminators (see +"Newline conventions" +above).

-When a line ending is defined as a single character, dot never matches that -character; when the two-character sequence CRLF is used, dot does not match CR -if it is immediately followed by LF, but otherwise it matches all characters -(including isolated CRs and LFs). When any Unicode line endings are being -recognized, dot does not match CR or LF or any of the other line ending -characters. +Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters.

The behaviour of dot with regard to newlines can be changed. If the @@ -2180,10 +2087,10 @@ be easier to remember:

   (*atomic:\d+)foo
 
-This kind of parenthesized group "locks up" the part of the pattern it -contains once it has matched, and a failure further into the pattern is -prevented from backtracking into it. Backtracking past it to previous items, -however, works as normal. +This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal.

An alternative description is that a group of this type matches exactly the @@ -3859,9 +3766,9 @@ Cambridge, England.


REVISION

-Last updated: 30 August 2021 +Last updated: 12 January 2022
-Copyright © 1997-2021 University of Cambridge. +Copyright © 1997-2022 University of Cambridge.

Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2serialize.html b/pcre2/doc/html/pcre2serialize.html index 18a8d7fa8d9248833e39c249412a190e0ec77248..df4098e1d38ad26bb8d2e3991f30d83acb029cfa 100644 --- a/pcre2/doc/html/pcre2serialize.html +++ b/pcre2/doc/html/pcre2serialize.html @@ -23,12 +23,12 @@ please consult the man page, in case the conversion went wrong.
SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS

int32_t pcre2_serialize_decode(pcre2_code **codes, - int32_t number_of_codes, const uint32_t *bytes, + int32_t number_of_codes, const uint8_t *bytes, pcre2_general_context *gcontext);

-int32_t pcre2_serialize_encode(pcre2_code **codes, - int32_t number_of_codes, uint32_t **serialized_bytes, +int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext);

@@ -154,7 +154,6 @@ mangagement functions for the decoded patterns. If this argument is NULL, malloc() and free() are used. After deserialization, the byte stream is no longer needed and can be discarded.

-  int32_t number_of_codes;
   pcre2_code *list_of_codes[2];
   uint8_t *bytes = <serialized data>;
   int32_t number_of_codes =
diff --git a/pcre2/doc/html/pcre2syntax.html b/pcre2/doc/html/pcre2syntax.html
index 735eb69fb1e4d35c977081df3c53c69f1e0c176e..8364c521534093eb951a901685f52d45126b16fb 100644
--- a/pcre2/doc/html/pcre2syntax.html
+++ b/pcre2/doc/html/pcre2syntax.html
@@ -19,29 +19,31 @@ please consult the man page, in case the conversion went wrong.
 
  • CHARACTER TYPES
  • GENERAL CATEGORY PROPERTIES FOR \p and \P
  • PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P -
  • SCRIPT NAMES FOR \p AND \P -
  • CHARACTER CLASSES -
  • QUANTIFIERS -
  • ANCHORS AND SIMPLE ASSERTIONS -
  • REPORTED MATCH POINT SETTING -
  • ALTERNATION -
  • CAPTURING -
  • ATOMIC GROUPS -
  • COMMENT -
  • OPTION SETTING -
  • NEWLINE CONVENTION -
  • WHAT \R MATCHES -
  • LOOKAHEAD AND LOOKBEHIND ASSERTIONS -
  • NON-ATOMIC LOOKAROUND ASSERTIONS -
  • SCRIPT RUNS -
  • BACKREFERENCES -
  • SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) -
  • CONDITIONAL PATTERNS -
  • BACKTRACKING CONTROL -
  • CALLOUTS -
  • SEE ALSO -
  • AUTHOR -
  • REVISION +
  • BINARY PROPERTIES FOR \p AND \P +
  • SCRIPT MATCHING WITH \p AND \P +
  • THE BIDI_CLASS PROPERTY FOR \p AND \P +
  • CHARACTER CLASSES +
  • QUANTIFIERS +
  • ANCHORS AND SIMPLE ASSERTIONS +
  • REPORTED MATCH POINT SETTING +
  • ALTERNATION +
  • CAPTURING +
  • ATOMIC GROUPS +
  • COMMENT +
  • OPTION SETTING +
  • NEWLINE CONVENTION +
  • WHAT \R MATCHES +
  • LOOKAHEAD AND LOOKBEHIND ASSERTIONS +
  • NON-ATOMIC LOOKAROUND ASSERTIONS +
  • SCRIPT RUNS +
  • BACKREFERENCES +
  • SUBROUTINE REFERENCES (POSSIBLY RECURSIVE) +
  • CONDITIONAL PATTERNS +
  • BACKTRACKING CONTROL +
  • CALLOUTS +
  • SEE ALSO +
  • AUTHOR +
  • REVISION
    PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY

    @@ -136,6 +138,11 @@ happening, \s and \w may also match characters with code points in the range sequences is changed to use Unicode properties and they match many more characters.

    +

    +Property descriptions in \p and \P are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. +


    GENERAL CATEGORY PROPERTIES FOR \p and \P

    @@ -152,6 +159,7 @@ characters.
       Lo         Other letter
       Lt         Title case letter
       Lu         Upper case letter
    +  Lc         Ll, Lu, or Lt
       L&         Ll, Lu, or Lt
     
       M          Mark
    @@ -198,171 +206,58 @@ characters.
     Perl and POSIX space are now the same. Perl added VT to its space character set
     at release 5.18.
     

    -
    SCRIPT NAMES FOR \p AND \P
    -

    -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician, -Psalter_Pahlavi, -Rejang, -Runic, -Samaritan, -Saurashtra, -Sharada, -Shavian, -Siddham, -SignWriting, -Sinhala, -Sogdian, -Sora_Sompeng, -Soyombo, -Sundanese, -Syloti_Nagri, -Syriac, -Tagalog, -Tagbanwa, -Tai_Le, -Tai_Tham, -Tai_Viet, -Takri, -Tamil, -Tangsa, -Tangut, -Telugu, -Thaana, -Thai, -Tibetan, -Tifinagh, -Tirhuta, -Toto, -Ugaritic, -Vai, -Vithkuqi, -Wancho, -Warang_Citi, -Yezidi, -Yi, -Zanabazar_Square. -

    -
    CHARACTER CLASSES
    +
    BINARY PROPERTIES FOR \p AND \P
    +

    +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\p and \P, along with their abbreviations, by running this command: +

    +  pcre2test -LP
    +
    +

    +
    SCRIPT MATCHING WITH \p AND \P
    +

    +Many script names and their 4-letter abbreviations are recognized in +\p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P of +course). You can obtain a list of these scripts by running this command: +

    +  pcre2test -LS
    +
    +

    +
    THE BIDI_CLASS PROPERTY FOR \p AND \P
    +

    +

    +  \p{Bidi_Class:<class>}   matches a character with the given class
    +  \p{BC:<class>}           matches a character with the given class
    +
    +The recognized classes are: +
    +  AL          Arabic letter
    +  AN          Arabic number
    +  B           paragraph separator
    +  BN          boundary neutral
    +  CS          common separator
    +  EN          European number
    +  ES          European separator
    +  ET          European terminator
    +  FSI         first strong isolate
    +  L           left-to-right
    +  LRE         left-to-right embedding
    +  LRI         left-to-right isolate
    +  LRO         left-to-right override
    +  NSM         non-spacing mark
    +  ON          other neutral
    +  PDF         pop directional format
    +  PDI         pop directional isolate
    +  R           right-to-left
    +  RLE         right-to-left embedding
    +  RLI         right-to-left isolate
    +  RLO         right-to-left override
    +  S           segment separator
    +  WS          which space
    +
    +

    +
    CHARACTER CLASSES

       [...]       positive character class
    @@ -390,7 +285,7 @@ In PCRE2, POSIX character set names recognize only ASCII characters by default,
     but some of them use Unicode properties if PCRE2_UCP is set. You can use
     \Q...\E inside a character class.
     

    -
    QUANTIFIERS
    +
    QUANTIFIERS

       ?           0 or 1, greedy
    @@ -411,7 +306,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
       {n,}?       n or more, lazy
     

    -
    ANCHORS AND SIMPLE ASSERTIONS
    +
    ANCHORS AND SIMPLE ASSERTIONS

       \b          word boundary
    @@ -429,7 +324,7 @@ but some of them use Unicode properties if PCRE2_UCP is set. You can use
       \G          first matching position in subject
     

    -
    REPORTED MATCH POINT SETTING
    +
    REPORTED MATCH POINT SETTING

       \K          set reported start of match
    @@ -439,13 +334,13 @@ for compatibility with Perl. However, if the PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK
     option is set, the previous behaviour is re-enabled. When this option is set,
     \K is honoured in positive assertions, but ignored in negative ones.
     

    -
    ALTERNATION
    +
    ALTERNATION

       expr|expr|expr...
     

    -
    CAPTURING
    +
    CAPTURING

       (...)           capture group
    @@ -460,20 +355,20 @@ In non-UTF modes, names may contain underscores and ASCII letters and digits;
     in UTF modes, any Unicode letters and Unicode decimal digits are permitted. In
     both cases, a name must not start with a digit.
     

    -
    ATOMIC GROUPS
    +
    ATOMIC GROUPS

       (?>...)         atomic non-capture group
       (*atomic:...)   atomic non-capture group
     

    -
    COMMENT
    +
    COMMENT

       (?#....)        comment (not nestable)
     

    -
    OPTION SETTING
    +
    OPTION SETTING

    Changes of these options within a group are automatically cancelled at the end of the group. @@ -518,7 +413,7 @@ not increase them. LIMIT_RECURSION is an obsolete synonym for LIMIT_DEPTH. The application can lock out the use of (*UTF) and (*UCP) by setting the PCRE2_NEVER_UTF or PCRE2_NEVER_UCP options, respectively, at compile time.

    -
    NEWLINE CONVENTION
    +
    NEWLINE CONVENTION

    These are recognized only at the very start of the pattern or after option settings with a similar syntax. @@ -531,7 +426,7 @@ settings with a similar syntax. (*NUL) the NUL character (binary zero)

    -
    WHAT \R MATCHES
    +
    WHAT \R MATCHES

    These are recognized only at the very start of the pattern or after option setting with a similar syntax. @@ -540,7 +435,7 @@ setting with a similar syntax. (*BSR_UNICODE) any Unicode newline sequence

    -
    LOOKAHEAD AND LOOKBEHIND ASSERTIONS
    +
    LOOKAHEAD AND LOOKBEHIND ASSERTIONS

       (?=...)                     )
    @@ -561,7 +456,7 @@ setting with a similar syntax.
     
    Each top-level branch of a lookbehind must be of a fixed length.

    -
    NON-ATOMIC LOOKAROUND ASSERTIONS
    +
    NON-ATOMIC LOOKAROUND ASSERTIONS

    These assertions are specific to PCRE2 and are not Perl-compatible.

    @@ -574,7 +469,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (*non_atomic_positive_lookbehind:...)  )
     

    -
    SCRIPT RUNS
    +
    SCRIPT RUNS

       (*script_run:...)           ) script run, can be backtracked into
    @@ -584,7 +479,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (*asr:...)                  )
     

    -
    BACKREFERENCES
    +
    BACKREFERENCES

       \n              reference by number (can be ambiguous)
    @@ -601,7 +496,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       (?P=name)       reference by name (Python)
     

    -
    SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)
    +
    SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)

       (?R)            recurse whole pattern
    @@ -620,7 +515,7 @@ These assertions are specific to PCRE2 and are not Perl-compatible.
       \g'-n'          call subroutine by relative number (PCRE2 extension)
     

    -
    CONDITIONAL PATTERNS
    +
    CONDITIONAL PATTERNS

       (?(condition)yes-pattern)
    @@ -643,7 +538,7 @@ Note the ambiguity of (?(R) and (?(Rn) which might be named reference
     conditions or recursion tests. Such a condition is interpreted as a reference
     condition if the relevant named group exists.
     

    -
    BACKTRACKING CONTROL
    +
    BACKTRACKING CONTROL

    All backtracking control verbs may be in the form (*VERB:NAME). For (*MARK) the name is mandatory, for the others it is optional. (*SKIP) changes its behaviour @@ -670,7 +565,7 @@ pattern is not anchored. The effect of one of these verbs in a group called as a subroutine is confined to the subroutine call.

    -
    CALLOUTS
    +
    CALLOUTS

       (?C)            callout (assumed number 0)
    @@ -681,12 +576,12 @@ The allowed string delimiters are ` ' " ^ % # $ (which are the same for the
     start and the end), and the starting delimiter { matched with the ending
     delimiter }. To encode the ending delimiter within the string, double it.
     

    -
    SEE ALSO
    +
    SEE ALSO

    pcre2pattern(3), pcre2api(3), pcre2callout(3), pcre2matching(3), pcre2(3).

    -
    AUTHOR
    +
    AUTHOR

    Philip Hazel
    @@ -695,11 +590,11 @@ Retired from University Computing Service Cambridge, England.

    -
    REVISION
    +
    REVISION

    -Last updated: 30 August 2021 +Last updated: 12 January 2022
    -Copyright © 1997-2021 University of Cambridge. +Copyright © 1997-2022 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2test.html b/pcre2/doc/html/pcre2test.html index 3ee51cd5dd6674e7fef83ae842517bdc74081a97..373e5dff09e2f725e49d56a557714a1959a59805 100644 --- a/pcre2/doc/html/pcre2test.html +++ b/pcre2/doc/html/pcre2test.html @@ -78,7 +78,7 @@ to 8-bit code units for output.

    In the rest of this document, the names of library functions and structures -are given in generic form, for example, pcre_compile(). The actual +are given in generic form, for example, pcre2_compile(). The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate.


    INPUT ENCODING
    @@ -253,7 +253,19 @@ available, and the use of JIT for matching is verified. -LM List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit code. All other options are ignored. -If both -C and -LM are present, whichever is first is recognized. +If both -C and any -Lx options are present, whichever is first is recognized. +

    +

    +-LP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +

    +

    +-LS +List scripts: write a list of recogized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized.

    -pattern modifier-list @@ -1239,6 +1251,8 @@ pattern, but can be overridden by modifiers on the subject. match_limit=<n> set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset=<n> set starting offset offset_limit=<n> set offset limit ovector=<n> set size of output vector @@ -1668,7 +1682,7 @@ When testing pcre2_substitute(), this modifier also has the effect of passing the replacement string as zero-terminated.


    -Passing a NULL context +Passing a NULL context, subject, or replacement

    Normally, pcre2test passes a context block to pcre2_match(), @@ -1678,6 +1692,11 @@ testing that the matching and substitution functions behave correctly in this case (they use default values). This modifier cannot be used with the find_limits or substitute_callout modifiers.

    +

    +Similarly, for testing purposes, if the null_subject or +null_replacement modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. +


    THE ALTERNATIVE MATCHING FUNCTION

    By default, pcre2test uses the standard PCRE2 matching function, @@ -2122,9 +2141,9 @@ Cambridge, England.


    REVISION

    -Last updated: 30 August 2021 +Last updated: 12 January 2022
    -Copyright © 1997-2021 University of Cambridge. +Copyright © 1997-2022 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/html/pcre2unicode.html b/pcre2/doc/html/pcre2unicode.html index 76ca6ea281b78bf091ae7dc3dff6191d9ac501ce..a0d4270f447698385542c372b0182c18bbb0482b 100644 --- a/pcre2/doc/html/pcre2unicode.html +++ b/pcre2/doc/html/pcre2unicode.html @@ -50,17 +50,18 @@ UNICODE PROPERTY SUPPORT

    When PCRE2 is built with Unicode support, the escape sequences \p{..}, \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF setting. -The Unicode properties that can be tested are limited to the general category -properties such as Lu for an upper case letter or Nd for a decimal number, the -Unicode script names such as Arabic or Han, and the derived properties Any and -L&. Full lists are given in the +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the Unicode script +names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived +properties Any and LC (synonym L&). Full lists are given in the pcre2pattern and pcre2syntax -documentation. Only the short names for properties are supported. For example, -\p{L} matches a letter. Its Perl synonym, \p{Letter}, is not supported. -Furthermore, in Perl, many properties may optionally be prefixed by "Is", for -compatibility with Perl 5.6. PCRE2 does not support this. +documentation. In general, only the short names for properties are supported. +For example, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this.


    WIDE CHARACTERS AND UTF MODES @@ -477,7 +478,7 @@ AUTHOR

    Philip Hazel
    -University Computing Service +Retired from University Computing Service
    Cambridge, England.
    @@ -486,9 +487,9 @@ Cambridge, England. REVISION

    -Last updated: 23 February 2020 +Last updated: 22 December 2021
    -Copyright © 1997-2020 University of Cambridge. +Copyright © 1997-2021 University of Cambridge.

    Return to the PCRE2 index page. diff --git a/pcre2/doc/pcre2.txt b/pcre2/doc/pcre2.txt index dde66a136d9679f0fb1d402fdc75400cb82d8bcc..641a1f9d2c6c7d9f83eb42f0fd668f74f15f8ac4 100644 --- a/pcre2/doc/pcre2.txt +++ b/pcre2/doc/pcre2.txt @@ -1815,7 +1815,7 @@ COMPILING A PATTERN to crash or loop. Note that this option can also be passed to pcre2_match() and - pcre_dfa_match(), to suppress UTF validity checking of the subject + pcre2_dfa_match(), to suppress UTF validity checking of the subject string. Note also that setting PCRE2_NO_UTF_CHECK at compile time does not dis- @@ -2012,13 +2012,13 @@ LOCALE SUPPORT code points are less than 256. By default, higher-valued code points never match escapes such as \w or \d. - When PCRE2 is built with Unicode support (the default), the Unicode - properties of all characters can be tested with \p and \P, or, alterna- - tively, the PCRE2_UCP option can be set when a pattern is compiled; - this causes \w and friends to use Unicode property support instead of - the built-in tables. PCRE2_UCP also causes upper/lower casing opera- - tions on characters with code points greater than 127 to use Unicode - properties. These effects apply even when PCRE2_UTF is not set. + When PCRE2 is built with Unicode support (the default), certain Unicode + character properties can be tested with \p and \P, or, alternatively, + the PCRE2_UCP option can be set when a pattern is compiled; this causes + \w and friends to use Unicode property support instead of the built-in + tables. PCRE2_UCP also causes upper/lower casing operations on charac- + ters with code points greater than 127 to use Unicode properties. These + effects apply even when PCRE2_UTF is not set. The use of locales with Unicode is discouraged. If you are handling characters with code points greater than 127, you should either use @@ -2579,7 +2579,9 @@ MATCHING A PATTERN: THE TRADITIONAL FUNCTION and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not UTF pro- - cessing is enabled. + cessing is enabled. As a special case, if subject is NULL and length is + zero, the subject is assumed to be an empty string. If length is non- + zero, an error occurs if subject is NULL. If startoffset is greater than the length of the subject, pcre2_match() returns PCRE2_ERROR_BADOFFSET. When the starting offset is zero, the @@ -3280,8 +3282,12 @@ CREATING A NEW STRING WITH SUBSTITUTIONS This function optionally calls pcre2_match() and then makes a copy of the subject string in outputbuffer, replacing parts that were matched - with the replacement string, whose length is supplied in rlength. This - can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. + with the replacement string, whose length is supplied in rlength, which + can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As + a special case, if replacement is NULL and rlength is zero, the re- + placement is assumed to be an empty string. If rlength is non-zero, an + error occurs if replacement is NULL. + There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to re- turn just the replacement string(s). The default action is to perform just one replacement if the pattern matches, but there is an option @@ -3315,89 +3321,90 @@ CREATING A NEW STRING WITH SUBSTITUTIONS As well as the usual options for pcre2_match(), a number of additional options can be set in the options argument of pcre2_substitute(). One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external - match_data block must be provided, and it must have been used for an - external call to pcre2_match(). The data in the match_data block (re- - turn code, offset vector) is used for the first substitution instead of - calling pcre2_match() from within pcre2_substitute(). This allows an - application to check for a match before choosing to substitute, without - having to repeat the match. - - The contents of the externally supplied match data block are not - changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- - TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- - stitution to check for further matches, but this is done using an in- - ternally obtained match data block, thus always leaving the external + match_data block must be provided, and it must have already been used + for an external call to pcre2_match() with the same pattern and subject + arguments. The data in the match_data block (return code, offset vec- + tor) is then used for the first substitution instead of calling + pcre2_match() from within pcre2_substitute(). This allows an applica- + tion to check for a match before choosing to substitute, without having + to repeat the match. + + The contents of the externally supplied match data block are not + changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTI- + TUTE_GLOBAL is also set, pcre2_match() is called after the first sub- + stitution to check for further matches, but this is done using an in- + ternally obtained match data block, thus always leaving the external block unchanged. - The code argument is not used for matching before the first substitu- - tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, - even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- + The code argument is not used for matching before the first substitu- + tion when PCRE2_SUBSTITUTE_MATCHED is set, but it must be provided, + even when PCRE2_SUBSTITUTE_GLOBAL is not set, because it contains in- formation such as the UTF setting and the number of capturing parenthe- ses in the pattern. - The default action of pcre2_substitute() is to return a copy of the + The default action of pcre2_substitute() is to return a copy of the subject string with matched substrings replaced. However, if PCRE2_SUB- - STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are + STITUTE_REPLACEMENT_ONLY is set, only the replacement substrings are returned. In the global case, multiple replacements are concatenated in - the output buffer. Substitution callouts (see below) can be used to + the output buffer. Substitution callouts (see below) can be used to separate them if necessary. - The outlengthptr argument of pcre2_substitute() must point to a vari- - able that contains the length, in code units, of the output buffer. If - the function is successful, the value is updated to contain the length - in code units of the new string, excluding the trailing zero that is + The outlengthptr argument of pcre2_substitute() must point to a vari- + able that contains the length, in code units, of the output buffer. If + the function is successful, the value is updated to contain the length + in code units of the new string, excluding the trailing zero that is automatically added. - If the function is not successful, the value set via outlengthptr de- - pends on the type of error. For syntax errors in the replacement + If the function is not successful, the value set via outlengthptr de- + pends on the type of error. For syntax errors in the replacement string, the value is the offset in the replacement string where the er- - ror was detected. For other errors, the value is PCRE2_UNSET by de- + ror was detected. For other errors, the value is PCRE2_UNSET by de- fault. This includes the case of the output buffer being too small, un- less PCRE2_SUBSTITUTE_OVERFLOW_LENGTH is set. - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH changes what happens when the output buffer is too small. The default action is to return PCRE2_ERROR_NOMEM- - ORY immediately. If this option is set, however, pcre2_substitute() + ORY immediately. If this option is set, however, pcre2_substitute() continues to go through the motions of matching and substituting (with- - out, of course, writing anything) in order to compute the size of buf- - fer that is needed. This value is passed back via the outlengthptr - variable, with the result of the function still being PCRE2_ER- + out, of course, writing anything) in order to compute the size of buf- + fer that is needed. This value is passed back via the outlengthptr + variable, with the result of the function still being PCRE2_ER- ROR_NOMEMORY. - Passing a buffer size of zero is a permitted way of finding out how - much memory is needed for given substitution. However, this does mean + Passing a buffer size of zero is a permitted way of finding out how + much memory is needed for given substitution. However, this does mean that the entire operation is carried out twice. Depending on the appli- - cation, it may be more efficient to allocate a large buffer and free - the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- + cation, it may be more efficient to allocate a large buffer and free + the excess afterwards, instead of using PCRE2_SUBSTITUTE_OVER- FLOW_LENGTH. - The replacement string, which is interpreted as a UTF string in UTF - mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An + The replacement string, which is interpreted as a UTF string in UTF + mode, is checked for UTF validity unless PCRE2_NO_UTF_CHECK is set. An invalid UTF replacement string causes an immediate return with the rel- evant UTF error code. - If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- + If PCRE2_SUBSTITUTE_LITERAL is set, the replacement string is not in- terpreted in any way. By default, however, a dollar character is an es- - cape character that can specify the insertion of characters from cap- - ture groups and names from (*MARK) or other control verbs in the pat- + cape character that can specify the insertion of characters from cap- + ture groups and names from (*MARK) or other control verbs in the pat- tern. The following forms are always recognized: $$ insert a dollar character $ or ${} insert the contents of group $*MARK or ${*MARK} insert a control verb name - Either a group number or a group name can be given for . Curly - brackets are required only if the following character would be inter- + Either a group number or a group name can be given for . Curly + brackets are required only if the following character would be inter- preted as part of the number or name. The number may be zero to include - the entire matched string. For example, if the pattern a(b)c is - matched with "=abc=" and the replacement string "+$1$0$1+", the result + the entire matched string. For example, if the pattern a(b)c is + matched with "=abc=" and the replacement string "+$1$0$1+", the result is "=+babcb+=". - $*MARK inserts the name from the last encountered backtracking control - verb on the matching path that has a name. (*MARK) must always include - a name, but the other verbs need not. For example, in the case of + $*MARK inserts the name from the last encountered backtracking control + verb on the matching path that has a name. (*MARK) must always include + a name, but the other verbs need not. For example, in the case of (*MARK:A)(*PRUNE) the name inserted is "A", but for (*MARK:A)(*PRUNE:B) - the relevant name is "B". This facility can be used to perform simple + the relevant name is "B". This facility can be used to perform simple simultaneous substitutions, as this pcre2test example shows: /(*MARK:pear)apple|(*MARK:orange)lemon/g,replace=${*MARK} @@ -3405,15 +3412,15 @@ CREATING A NEW STRING WITH SUBSTITUTIONS 2: pear orange PCRE2_SUBSTITUTE_GLOBAL causes the function to iterate over the subject - string, replacing every matching substring. If this option is not set, - only the first matching substring is replaced. The search for matches - takes place in the original subject string (that is, previous replace- - ments do not affect it). Iteration is implemented by advancing the - startoffset value for each search, which is always passed the entire + string, replacing every matching substring. If this option is not set, + only the first matching substring is replaced. The search for matches + takes place in the original subject string (that is, previous replace- + ments do not affect it). Iteration is implemented by advancing the + startoffset value for each search, which is always passed the entire subject string. If an offset limit is set in the match context, search- ing stops when that limit is reached. - You can restrict the effect of a global substitution to a portion of + You can restrict the effect of a global substitution to a portion of the subject string by setting either or both of startoffset and an off- set limit. Here is a pcre2test example: @@ -3421,73 +3428,73 @@ CREATING A NEW STRING WITH SUBSTITUTIONS ABC ABC ABC ABC\=offset=3,offset_limit=12 2: ABC A!C A!C ABC - When continuing with global substitutions after matching a substring + When continuing with global substitutions after matching a substring with zero length, an attempt to find a non-empty match at the same off- set is performed. If this is not successful, the offset is advanced by one character except when CRLF is a valid newline sequence and the next - two characters are CR, LF. In this case, the offset is advanced by two + two characters are CR, LF. In this case, the offset is advanced by two characters. PCRE2_SUBSTITUTE_UNKNOWN_UNSET causes references to capture groups that do not appear in the pattern to be treated as unset groups. This option - should be used with care, because it means that a typo in a group name + should be used with care, because it means that a typo in a group name or number no longer causes the PCRE2_ERROR_NOSUBSTRING error. PCRE2_SUBSTITUTE_UNSET_EMPTY causes unset capture groups (including un- - known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated - as empty strings when inserted as described above. If this option is + known groups when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) to be treated + as empty strings when inserted as described above. If this option is not set, an attempt to insert an unset group causes the PCRE2_ERROR_UN- - SET error. This option does not influence the extended substitution + SET error. This option does not influence the extended substitution syntax described below. - PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the - replacement string. Without this option, only the dollar character is - special, and only the group insertion forms listed above are valid. + PCRE2_SUBSTITUTE_EXTENDED causes extra processing to be applied to the + replacement string. Without this option, only the dollar character is + special, and only the group insertion forms listed above are valid. When PCRE2_SUBSTITUTE_EXTENDED is set, two things change: - Firstly, backslash in a replacement string is interpreted as an escape + Firstly, backslash in a replacement string is interpreted as an escape character. The usual forms such as \n or \x{ddd} can be used to specify - particular character codes, and backslash followed by any non-alphanu- - meric character quotes that character. Extended quoting can be coded + particular character codes, and backslash followed by any non-alphanu- + meric character quotes that character. Extended quoting can be coded using \Q...\E, exactly as in pattern strings. - There are also four escape sequences for forcing the case of inserted - letters. The insertion mechanism has three states: no case forcing, + There are also four escape sequences for forcing the case of inserted + letters. The insertion mechanism has three states: no case forcing, force upper case, and force lower case. The escape sequences change the current state: \U and \L change to upper or lower case forcing, respec- - tively, and \E (when not terminating a \Q quoted sequence) reverts to - no case forcing. The sequences \u and \l force the next character (if - it is a letter) to upper or lower case, respectively, and then the + tively, and \E (when not terminating a \Q quoted sequence) reverts to + no case forcing. The sequences \u and \l force the next character (if + it is a letter) to upper or lower case, respectively, and then the state automatically reverts to no case forcing. Case forcing applies to - all inserted characters, including those from capture groups and let- - ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP - was set when the pattern was compiled, Unicode properties are used for + all inserted characters, including those from capture groups and let- + ters within \Q...\E quoted sequences. If either PCRE2_UTF or PCRE2_UCP + was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater than 127. Note that case forcing sequences such as \U...\E do not nest. For exam- - ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final - \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX- + ple, the result of processing "\Uaa\LBB\Ecc\E" is "AAbbcc"; the final + \E has no effect. Note also that the PCRE2_ALT_BSUX and PCRE2_EX- TRA_ALT_BSUX options do not apply to replacement strings. - The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more - flexibility to capture group substitution. The syntax is similar to + The second effect of setting PCRE2_SUBSTITUTE_EXTENDED is to add more + flexibility to capture group substitution. The syntax is similar to that used by Bash: ${:-} ${:+:} - As before, may be a group number or a name. The first form speci- - fies a default value. If group is set, its value is inserted; if - not, is expanded and the result inserted. The second form - specifies strings that are expanded and inserted when group is set - or unset, respectively. The first form is just a convenient shorthand + As before, may be a group number or a name. The first form speci- + fies a default value. If group is set, its value is inserted; if + not, is expanded and the result inserted. The second form + specifies strings that are expanded and inserted when group is set + or unset, respectively. The first form is just a convenient shorthand for ${:+${}:} - Backslash can be used to escape colons and closing curly brackets in - the replacement strings. A change of the case forcing state within a - replacement string remains in force afterwards, as shown in this + Backslash can be used to escape colons and closing curly brackets in + the replacement strings. A change of the case forcing state within a + replacement string remains in force afterwards, as shown in this pcre2test example: /(some)?(body)/substitute_extended,replace=${1:+\U:\L}HeLLo @@ -3496,8 +3503,8 @@ CREATING A NEW STRING WITH SUBSTITUTIONS somebody 1: HELLO - The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended - substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- + The PCRE2_SUBSTITUTE_UNSET_EMPTY option does not affect these extended + substitutions. However, PCRE2_SUBSTITUTE_UNKNOWN_UNSET does cause un- known groups in the extended syntax forms to be treated as unset. If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, @@ -3506,37 +3513,39 @@ CREATING A NEW STRING WITH SUBSTITUTIONS Substitution errors - In the event of an error, pcre2_substitute() returns a negative error - code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors + In the event of an error, pcre2_substitute() returns a negative error + code. Except for PCRE2_ERROR_NOMATCH (which is never returned), errors from pcre2_match() are passed straight back. PCRE2_ERROR_NOSUBSTRING is returned for a non-existent substring inser- tion, unless PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set. PCRE2_ERROR_UNSET is returned for an unset substring insertion (includ- - ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) - when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- + ing an unknown substring when PCRE2_SUBSTITUTE_UNKNOWN_UNSET is set) + when the simple (non-extended) syntax is used and PCRE2_SUBSTITUTE_UN- SET_EMPTY is not set. - PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big + PCRE2_ERROR_NOMEMORY is returned if the output buffer is not big enough. If the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option is set, the size - of buffer that is needed is returned via outlengthptr. Note that this + of buffer that is needed is returned via outlengthptr. Note that this does not happen by default. PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the - match_data argument is NULL. + match_data argument is NULL or if the subject or replacement arguments + are NULL. For backward compatibility reasons an exception is made for + the replacement argument if the rlength argument is also 0. - PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in - the replacement string, with more particular errors being PCRE2_ER- + PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in + the replacement string, with more particular errors being PCRE2_ER- ROR_BADREPESCAPE (invalid escape sequence), PCRE2_ERROR_REPMISSINGBRACE - (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax - error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN + (closing curly bracket not found), PCRE2_ERROR_BADSUBSTITUTION (syntax + error in extended group substitution), and PCRE2_ERROR_BADSUBSPATTERN (the pattern match ended before it started or the match started earlier - than the current position in the subject, which can happen if \K is + than the current position in the subject, which can happen if \K is used in an assertion). As for all PCRE2 errors, a text message that describes the error can be - obtained by calling the pcre2_get_error_message() function (see "Ob- + obtained by calling the pcre2_get_error_message() function (see "Ob- taining a textual error message" above). Substitution callouts @@ -3545,15 +3554,15 @@ CREATING A NEW STRING WITH SUBSTITUTIONS int (*callout_function)(pcre2_substitute_callout_block *, void *), void *callout_data); - The pcre2_set_substitution_callout() function can be used to specify a - callout function for pcre2_substitute(). This information is passed in + The pcre2_set_substitution_callout() function can be used to specify a + callout function for pcre2_substitute(). This information is passed in a match context. The callout function is called after each substitution has been processed, but it can cause the replacement not to happen. The - callout function is not called for simulated substitutions that happen + callout function is not called for simulated substitutions that happen as a result of the PCRE2_SUBSTITUTE_OVERFLOW_LENGTH option. The first argument of the callout function is a pointer to a substitute - callout block structure, which contains the following fields, not nec- + callout block structure, which contains the following fields, not nec- essarily in this order: uint32_t version; @@ -3564,34 +3573,34 @@ CREATING A NEW STRING WITH SUBSTITUTIONS uint32_t oveccount; PCRE2_SIZE output_offsets[2]; - The version field contains the version number of the block format. The - current version is 0. The version number will increase in future if - more fields are added, but the intention is never to remove any of the + The version field contains the version number of the block format. The + current version is 0. The version number will increase in future if + more fields are added, but the intention is never to remove any of the existing fields. The subscount field is the number of the current match. It is 1 for the first callout, 2 for the second, and so on. The input and output point- ers are copies of the values passed to pcre2_substitute(). - The ovector field points to the ovector, which contains the result of + The ovector field points to the ovector, which contains the result of the most recent match. The oveccount field contains the number of pairs that are set in the ovector, and is always greater than zero. - The output_offsets vector contains the offsets of the replacement in - the output string. This has already been processed for dollar and (if + The output_offsets vector contains the offsets of the replacement in + the output string. This has already been processed for dollar and (if requested) backslash substitutions as described above. - The second argument of the callout function is the value passed as - callout_data when the function was registered. The value returned by + The second argument of the callout function is the value passed as + callout_data when the function was registered. The value returned by the callout function is interpreted as follows: - If the value is zero, the replacement is accepted, and, if PCRE2_SUB- - STITUTE_GLOBAL is set, processing continues with a search for the next - match. If the value is not zero, the current replacement is not ac- - cepted. If the value is greater than zero, processing continues when - PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero - or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is - copied to the output and the call to pcre2_substitute() exits, return- + If the value is zero, the replacement is accepted, and, if PCRE2_SUB- + STITUTE_GLOBAL is set, processing continues with a search for the next + match. If the value is not zero, the current replacement is not ac- + cepted. If the value is greater than zero, processing continues when + PCRE2_SUBSTITUTE_GLOBAL is set. Otherwise (the value is less than zero + or PCRE2_SUBSTITUTE_GLOBAL is not set), the the rest of the input is + copied to the output and the call to pcre2_substitute() exits, return- ing the number of matches so far. @@ -3600,56 +3609,56 @@ DUPLICATE CAPTURE GROUP NAMES int pcre2_substring_nametable_scan(const pcre2_code *code, PCRE2_SPTR name, PCRE2_SPTR *first, PCRE2_SPTR *last); - When a pattern is compiled with the PCRE2_DUPNAMES option, names for - capture groups are not required to be unique. Duplicate names are al- - ways allowed for groups with the same number, created by using the (?| + When a pattern is compiled with the PCRE2_DUPNAMES option, names for + capture groups are not required to be unique. Duplicate names are al- + ways allowed for groups with the same number, created by using the (?| feature. Indeed, if such groups are named, they are required to use the same names. - Normally, patterns that use duplicate names are such that in any one - match, only one of each set of identically-named groups participates. + Normally, patterns that use duplicate names are such that in any one + match, only one of each set of identically-named groups participates. An example is shown in the pcre2pattern documentation. - When duplicates are present, pcre2_substring_copy_byname() and - pcre2_substring_get_byname() return the first substring corresponding - to the given name that is set. Only if none are set is PCRE2_ERROR_UN- - SET is returned. The pcre2_substring_number_from_name() function re- - turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate + When duplicates are present, pcre2_substring_copy_byname() and + pcre2_substring_get_byname() return the first substring corresponding + to the given name that is set. Only if none are set is PCRE2_ERROR_UN- + SET is returned. The pcre2_substring_number_from_name() function re- + turns the error PCRE2_ERROR_NOUNIQUESUBSTRING when there are duplicate names. - If you want to get full details of all captured substrings for a given - name, you must use the pcre2_substring_nametable_scan() function. The - first argument is the compiled pattern, and the second is the name. If - the third and fourth arguments are NULL, the function returns a group + If you want to get full details of all captured substrings for a given + name, you must use the pcre2_substring_nametable_scan() function. The + first argument is the compiled pattern, and the second is the name. If + the third and fourth arguments are NULL, the function returns a group number for a unique name, or PCRE2_ERROR_NOUNIQUESUBSTRING otherwise. When the third and fourth arguments are not NULL, they must be pointers - to variables that are updated by the function. After it has run, they + to variables that are updated by the function. After it has run, they point to the first and last entries in the name-to-number table for the - given name, and the function returns the length of each entry in code - units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are + given name, and the function returns the length of each entry in code + units. In both cases, PCRE2_ERROR_NOSUBSTRING is returned if there are no entries for the given name. The format of the name table is described above in the section entitled - Information about a pattern. Given all the relevant entries for the - name, you can extract each of their numbers, and hence the captured + Information about a pattern. Given all the relevant entries for the + name, you can extract each of their numbers, and hence the captured data. FINDING ALL POSSIBLE MATCHES AT ONE POSITION - The traditional matching function uses a similar algorithm to Perl, - which stops when it finds the first match at a given point in the sub- + The traditional matching function uses a similar algorithm to Perl, + which stops when it finds the first match at a given point in the sub- ject. If you want to find all possible matches, or the longest possible - match at a given position, consider using the alternative matching - function (see below) instead. If you cannot use the alternative func- + match at a given position, consider using the alternative matching + function (see below) instead. If you cannot use the alternative func- tion, you can kludge it up by making use of the callout facility, which is described in the pcre2callout documentation. What you have to do is to insert a callout right at the end of the pat- - tern. When your callout function is called, extract and save the cur- - rent matched substring. Then return 1, which forces pcre2_match() to - backtrack and try other alternatives. Ultimately, when it runs out of + tern. When your callout function is called, extract and save the cur- + rent matched substring. Then return 1, which forces pcre2_match() to + backtrack and try other alternatives. Ultimately, when it runs out of matches, pcre2_match() will yield PCRE2_ERROR_NOMATCH. @@ -3661,15 +3670,16 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION pcre2_match_context *mcontext, int *workspace, PCRE2_SIZE wscount); - The function pcre2_dfa_match() is called to match a subject string - against a compiled pattern, using a matching algorithm that scans the + The function pcre2_dfa_match() is called to match a subject string + against a compiled pattern, using a matching algorithm that scans the subject string just once (not counting lookaround assertions), and does - not backtrack. This has different characteristics to the normal algo- - rithm, and is not compatible with Perl. Some of the features of PCRE2 - patterns are not supported. Nevertheless, there are times when this - kind of matching can be useful. For a discussion of the two matching - algorithms, and a list of features that pcre2_dfa_match() does not sup- - port, see the pcre2matching documentation. + not backtrack (except when processing lookaround assertions). This has + different characteristics to the normal algorithm, and is not compati- + ble with Perl. Some of the features of PCRE2 patterns are not sup- + ported. Nevertheless, there are times when this kind of matching can be + useful. For a discussion of the two matching algorithms, and a list of + features that pcre2_dfa_match() does not support, see the pcre2matching + documentation. The arguments for the pcre2_dfa_match() function are the same as for pcre2_match(), plus two extras. The ovector within the match data block @@ -3698,7 +3708,7 @@ MATCHING A PATTERN: THE ALTERNATIVE FUNCTION wspace, /* working space vector */ 20); /* number of elements (NOT size in bytes) */ - Option bits for pcre_dfa_match() + Option bits for pcre2_dfa_match() The unused bits of the options argument for pcre2_dfa_match() must be zero. The only bits that may be set are PCRE2_ANCHORED, @@ -3848,7 +3858,7 @@ AUTHOR REVISION - Last updated: 30 August 2021 + Last updated: 14 December 2021 Copyright (c) 1997-2021 University of Cambridge. ------------------------------------------------------------------------------ @@ -3961,8 +3971,8 @@ UNICODE AND UTF SUPPORT 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \P, \p, and \X. Only the general category properties such as Lu - and Nd are supported. Details are given in the pcre2pattern documenta- - tion. + and Nd, script names, and some bi-directional properties are supported. + Details are given in the pcre2pattern documentation. Pattern escapes such as \d and \w do not by default make use of Unicode properties. The application can request that they do by setting the @@ -4128,7 +4138,7 @@ LIMITING PCRE2 RESOURCE USAGE for --with-match-limit. You can set a lower default limit by adding, for example, - --with-match-limit_depth=10000 + --with-match-limit-depth=10000 to the configure command. This value can be overridden at run time. This depth limit indirectly limits the amount of heap memory that is @@ -4444,8 +4454,8 @@ AUTHOR REVISION - Last updated: 20 March 2020 - Copyright (c) 1997-2020 University of Cambridge. + Last updated: 08 December 2021 + Copyright (c) 1997-2021 University of Cambridge. ------------------------------------------------------------------------------ @@ -4890,57 +4900,64 @@ DIFFERENCES BETWEEN PCRE2 AND PERL This document describes some of the differences in the ways that PCRE2 and Perl handle regular expressions. The differences described here are - with respect to Perl version 5.32.0, but as both Perl and PCRE2 are + with respect to Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the information may at times be out of date. - 1. PCRE2 has only a subset of Perl's Unicode support. Details of what + 1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, + the behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' + matches the next character unless it is the start of a newline se- + quence. This means that, if the newline setting is CR, CRLF, or NUL, + '.' will match the code point LF (0x0A) in ASCII/Unicode environments, + and NL (either 0x15 or 0x25) when using EBCDIC. In Perl, '.' appears + never to match LF, even when 0x0A is not a newline indicator. + + 2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does have are given in the pcre2unicode page. - 2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized asser- + 3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized asser- tions, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just as- serts that the next character is not "a" three times (in principle; PCRE2 optimizes this to run the assertion just once). Perl allows some - repeat quantifiers on other assertions, for example, \b* (but not - \b{3}, though oddly it does allow ^{3}), but these do not seem to have - any use. PCRE2 does not allow any kind of quantifier on non-lookaround - assertions. - - 3. Capture groups that occur inside negative lookaround assertions are - counted, but their entries in the offsets vector are set only when a - negative assertion is a condition that has a matching branch (that is, - the condition is false). Perl may set such capture groups in other + repeat quantifiers on other assertions, for example, \b* , but these do + not seem to have any use. PCRE2 does not allow any kind of quantifier + on non-lookaround assertions. + + 4. Capture groups that occur inside negative lookaround assertions are + counted, but their entries in the offsets vector are set only when a + negative assertion is a condition that has a matching branch (that is, + the condition is false). Perl may set such capture groups in other circumstances. - 4. The following Perl escape sequences are not supported: \F, \l, \L, + 5. The following Perl escape sequences are not supported: \F, \l, \L, \u, \U, and \N when followed by a character name. \N on its own, match- - ing a non-newline character, and \N{U+dd..}, matching a Unicode code - point, are supported. The escapes that modify the case of following - letters are implemented by Perl's general string-handling and are not + ing a non-newline character, and \N{U+dd..}, matching a Unicode code + point, are supported. The escapes that modify the case of following + letters are implemented by Perl's general string-handling and are not part of its pattern matching engine. If any of these are encountered by - PCRE2, an error is generated by default. However, if either of the - PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are + PCRE2, an error is generated by default. However, if either of the + PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \U and \u are interpreted as ECMAScript interprets them. - 5. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 + 6. The Perl escape sequences \p, \P, and \X are supported only if PCRE2 is built with Unicode support (the default). The properties that can be - tested with \p and \P are limited to the general category properties - such as Lu and Nd, script names such as Greek or Han, and the derived - properties Any and L&. Both PCRE2 and Perl support the Cs (surrogate) - property, but in PCRE2 its use is limited. See the pcre2pattern docu- - mentation for details. The long synonyms for property names that Perl - supports (such as \p{Letter}) are not supported by PCRE2, nor is it - permitted to prefix any of these properties with "Is". - - 6. PCRE2 supports the \Q...\E escape for quoting substrings. Characters + tested with \p and \P are limited to the general category properties + such as Lu and Nd, script names such as Greek or Han, Bidi_Class, + Bidi_Control, and the derived properties Any and LC (synonym L&). Both + PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its + use is limited. See the pcre2pattern documentation for details. The + long synonyms for property names that Perl supports (such as \p{Let- + ter}) are not supported by PCRE2, nor is it permitted to prefix any of + these properties with "Is". + + 7. PCRE2 supports the \Q...\E escape for quoting substrings. Characters in between are treated as literals. However, this is slightly different from Perl in that $ and @ are also handled as literals inside the - quotes. In Perl, they cause variable interpolation (but of course PCRE2 - does not have variables). Also, Perl does "double-quotish backslash in- - terpolation" on any backslashes between \Q and \E which, its documenta- - tion says, "may lead to confusing results". PCRE2 treats a backslash - between \Q and \E just like any other character. Note the following ex- - amples: + quotes. In Perl, they cause variable interpolation (PCRE2 does not have + variables). Also, Perl does "double-quotish backslash interpolation" on + any backslashes between \Q and \E which, its documentation says, "may + lead to confusing results". PCRE2 treats a backslash between \Q and \E + just like any other character. Note the following examples: Pattern PCRE2 matches Perl matches @@ -4951,81 +4968,82 @@ DIFFERENCES BETWEEN PCRE2 AND PERL \QA\B\E A\B A\B \Q\\E \ \\E - The \Q...\E sequence is recognized both inside and outside character + The \Q...\E sequence is recognized both inside and outside character classes by both PCRE2 and Perl. - 7. Fairly obviously, PCRE2 does not support the (?{code}) and + 8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the pcre2callout documentation for details. - 8. Subroutine calls (whether recursive or not) were treated as atomic - groups up to PCRE2 release 10.23, but from release 10.30 this changed, + 9. Subroutine calls (whether recursive or not) were treated as atomic + groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl. - 9. In PCRE2, if any of the backtracking control verbs are used in a - group that is called as a subroutine (whether or not recursively), - their effect is confined to that group; it does not extend to the sur- - rounding pattern. This is not always the case in Perl. In particular, - if (*THEN) is present in a group that is called as a subroutine, its + 10. In PCRE2, if any of the backtracking control verbs are used in a + group that is called as a subroutine (whether or not recursively), + their effect is confined to that group; it does not extend to the sur- + rounding pattern. This is not always the case in Perl. In particular, + if (*THEN) is present in a group that is called as a subroutine, its action is limited to that group, even if the group does not contain any - | characters. Note that such groups are processed as anchored at the + | characters. Note that such groups are processed as anchored at the point where they are tested. - 10. If a pattern contains more than one backtracking control verb, the - first one that is backtracked onto acts. For example, in the pattern - A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure + 11. If a pattern contains more than one backtracking control verb, the + first one that is backtracked onto acts. For example, in the pattern + A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs. - 11. There are some differences that are concerned with the settings of - captured strings when part of a pattern is repeated. For example, - matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- + 12. There are some differences that are concerned with the settings of + captured strings when part of a pattern is repeated. For example, + matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 un- set, but in PCRE2 it is set to "b". - 12. PCRE2's handling of duplicate capture group numbers and names is - not as general as Perl's. This is a consequence of the fact the PCRE2 - works internally just with numbers, using an external table to trans- - late between numbers and names. In particular, a pattern such as - (?|(?A)|(?B)), where the two capture groups have the same number - but different names, is not supported, and causes an error at compile + 13. PCRE2's handling of duplicate capture group numbers and names is + not as general as Perl's. This is a consequence of the fact the PCRE2 + works internally just with numbers, using an external table to trans- + late between numbers and names. In particular, a pattern such as + (?|(?A)|(?B)), where the two capture groups have the same number + but different names, is not supported, and causes an error at compile time. If it were allowed, it would not be possible to distinguish which - group matched, because both names map to capture group number 1. To + group matched, because both names map to capture group number 1. To avoid this confusing situation, an error is given at compile time. - 13. Perl used to recognize comments in some places that PCRE2 does not, - for example, between the ( and ? at the start of a group. If the /x - modifier is set, Perl allowed white space between ( and ? though the - latest Perls give an error (for a while it was just deprecated). There + 14. Perl used to recognize comments in some places that PCRE2 does not, + for example, between the ( and ? at the start of a group. If the /x + modifier is set, Perl allowed white space between ( and ? though the + latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently. - 14. Perl, when in warning mode, gives warnings for character classes - such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- + 15. Perl, when in warning mode, gives warnings for character classes + such as [A-\d] or [a-[:digit:]]. It then treats the hyphens as liter- als. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes. - 15. In PCRE2, the upper/lower case character properties Lu and Ll are - not affected when case-independent matching is specified. For example, + 16. In PCRE2, the upper/lower case character properties Lu and Ll are + not affected when case-independent matching is specified. For example, \p{Lu} always matches an upper case letter. I think Perl has changed in - this respect; in the release at the time of writing (5.32), \p{Lu} and + this respect; in the release at the time of writing (5.34), \p{Lu} and \p{Ll} match all letters, regardless of case, when case independence is specified. - 16. From release 5.32.0, Perl locks out the use of \K in lookaround as- - sertions. From release 10.38 PCRE2 does the same by default. However, - there is an option for re-enabling the previous behaviour. When this - option is set, \K is acted on when it occurs in positive assertions, + 17. From release 5.32.0, Perl locks out the use of \K in lookaround as- + sertions. From release 10.38 PCRE2 does the same by default. However, + there is an option for re-enabling the previous behaviour. When this + option is set, \K is acted on when it occurs in positive assertions, but is ignored in negative assertions. - 17. PCRE2 provides some extensions to the Perl regular expression fa- - cilities. Perl 5.10 included new features that were not in earlier - versions of Perl, some of which (such as named parentheses) were in - PCRE2 for some time before. This list is with respect to Perl 5.32: + 18. PCRE2 provides some extensions to the Perl regular expression fa- + cilities. Perl 5.10 included new features that were not in earlier + versions of Perl, some of which (such as named parentheses) were in + PCRE2 for some time before. This list is with respect to Perl 5.34: - (a) Although lookbehind assertions in PCRE2 must match fixed length + (a) Although lookbehind assertions in PCRE2 must match fixed length strings, each alternative toplevel branch of a lookbehind assertion can - match a different length of string. Perl requires them all to have the - same length. + match a different length of string. Perl used to require them all to + have the same length, but the latest version has some variable length + support. (b) From PCRE2 10.23, backreferences to groups of fixed length are sup- ported in lookbehinds, provided that there is no possibility of refer- @@ -5067,12 +5085,12 @@ DIFFERENCES BETWEEN PCRE2 AND PERL an extension to the lookaround facilities. The default, Perl-compatible lookarounds are atomic. - 18. The Perl /a modifier restricts /d numbers to pure ascii, and the + 19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa modifier restricts /i case-insensitive matching to pure ascii, ig- noring Unicode rules. This separation cannot be represented with PCRE2_UCP. - 19. Perl has different limits than PCRE2. See the pcre2limit documenta- + 20. Perl has different limits than PCRE2. See the pcre2limit documenta- tion for details. Perl went with 5.10 from recursion to iteration keep- ing the intermediate matches on the heap, which is ~10% slower but does not fall into any stack-overflow limit. PCRE2 made a similar change at @@ -5089,7 +5107,7 @@ AUTHOR REVISION - Last updated: 30 August 2021 + Last updated: 08 December 2021 Copyright (c) 1997-2021 University of Cambridge. ------------------------------------------------------------------------------ @@ -5434,7 +5452,7 @@ FREEING JIT SPECULATIVE MEMORY void pcre2_jit_free_unused_memory(pcre2_general_context *gcontext); The JIT executable allocator does not free all memory when it is possi- - ble. It expects new allocations, and keeps some free memory around to + ble. It expects new allocations, and keeps some free memory around to improve allocation speed. However, in low memory conditions, it might be better to free all possible memory. You can cause this to happen by calling pcre2_jit_free_unused_memory(). Its argument is a general con- @@ -5492,12 +5510,13 @@ JIT FAST PATH API When you call pcre2_match(), as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For exam- - ple, if the subject pointer is NULL, an immediate error is given. Also, - unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for - validity. In the interests of speed, these checks do not happen on the - JIT fast path, and if invalid data is passed, the result is undefined. + ple, if the subject pointer is NULL but the length is non-zero, an im- + mediate error is given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF + subject string is tested for validity. In the interests of speed, these + checks do not happen on the JIT fast path, and if invalid data is + passed, the result is undefined. - Bypassing the sanity checks and the pcre2_match() wrapping can give + Bypassing the sanity checks and the pcre2_match() wrapping can give speedups of more than 10%. @@ -5515,8 +5534,8 @@ AUTHOR REVISION - Last updated: 23 May 2019 - Copyright (c) 1997-2019 University of Cambridge. + Last updated: 30 November 2021 + Copyright (c) 1997-2021 University of Cambridge. ------------------------------------------------------------------------------ @@ -6870,68 +6889,65 @@ BACKSLASH ters whose code points are less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all treated as being in the Un- - known script and with an unassigned type. The extra escape sequences - are: + known script and with an unassigned type. + + Matching characters by Unicode property is not fast, because PCRE2 has + to do a multistage table lookup in order to find a character's prop- + erty. That is why the traditional escape sequences such as \d and \w do + not use Unicode properties in PCRE2 by default, though you can make + them do so by setting the PCRE2_UCP option or by starting the pattern + with (*UCP). + + The extra escape sequences that provide property support are: \p{xx} a character with the xx property \P{xx} a character without the xx property \X a Unicode extended grapheme cluster - The property names represented by xx above are case-sensitive. There is - support for Unicode script names, Unicode general category properties, - "Any", which matches any character (including newline), and some spe- - cial PCRE2 properties (described in the next section). Other Perl - properties such as "InMusicalSymbols" are not supported by PCRE2. Note - that \P{Any} does not match any characters, so always causes a match - failure. - - Sets of Unicode characters are defined as belonging to certain scripts. - A character from one of these sets can be matched using a script name. - For example: - - \p{Greek} - \P{Han} + The property names represented by xx above are not case-sensitive, and + in accordance with Unicode's "loose matching" rules, spaces, hyphens, + and underscores are ignored. There is support for Unicode script names, + Unicode general category properties, "Any", which matches any character + (including newline), Bidi_Class, a number of binary (yes/no) proper- + ties, and some special PCRE2 properties (described below). Certain + other Perl properties such as "InMusicalSymbols" are not supported by + PCRE2. Note that \P{Any} does not match any characters, so always + causes a match failure. + + Script properties for \p and \P + + There are three different syntax forms for matching a script. Each Uni- + code character has a basic script and, optionally, a list of other + scripts ("Script Extensions") with which it is commonly used. Using the + Adlam script as an example, \p{sc:Adlam} matches characters whose basic + script is Adlam, whereas \p{scx:Adlam} matches, in addition, characters + that have Adlam in their extensions list. The full names "script" and + "script extensions" for the property types are recognized, and a equals + sign is an alternative to the colon. If a script name is given without + a property type, for example, \p{Adlam}, it is treated as \p{scx:Ad- + lam}. Perl changed to this interpretation at release 5.26 and PCRE2 + changed at release 10.40. Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not part of an identified script are lumped together as "Com- - mon". The current list of scripts is: - - Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali- - nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi, - Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba- - nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform, - Cypriot, Cypro_Minoan, Cyrillic, Deseret, Devanagari, Dives_Akuru, Do- - gra, Duployan, Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Geor- - gian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gur- - mukhi, Han, Hangul, Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, - Imperial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscrip- - tional_Parthian, Javanese, Kaithi, Kannada, Katakana, Kayah_Li, - Kharoshthi, Khitan_Small_Script, Khmer, Khojki, Khudawadi, Lao, Latin, - Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani, - Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede- - faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero- - glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi- - nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, - Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, - Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Old_Uyghur, - Oriya, Osage, Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, - Phoenician, Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, - Sharada, Shavian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, - Soyombo, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, - Tai_Tham, Tai_Viet, Takri, Tamil, Tangsa, Tangut, Telugu, Thaana, Thai, - Tibetan, Tifinagh, Tirhuta, Toto, Ugaritic, Unknown, Vai, Vithkuqi, - Wancho, Warang_Citi, Yezidi, Yi, Zanabazar_Square. + mon". The current list of recognized script names and their 4-character + abbreviations can be obtained by running this command: + + pcre2test -LS + + + The general category property for \p and \P Each character has exactly one Unicode general category property, spec- - ified by a two-letter abbreviation. For compatibility with Perl, nega- - tion can be specified by including a circumflex between the opening - brace and the property name. For example, \p{^Lu} is the same as + ified by a two-letter abbreviation. For compatibility with Perl, nega- + tion can be specified by including a circumflex between the opening + brace and the property name. For example, \p{^Lu} is the same as \P{Lu}. If only one letter is specified with \p or \P, it includes all the gen- - eral category properties that start with that letter. In this case, in - the absence of negation, the curly brackets in the escape sequence are + eral category properties that start with that letter. In this case, in + the absence of negation, the curly brackets in the escape sequence are optional; these two examples have the same effect: \p{L} @@ -6983,36 +6999,73 @@ BACKSLASH Zp Paragraph separator Zs Space separator - The special property L& is also supported: it matches a character that - has the Lu, Ll, or Lt property, in other words, a letter that is not - classified as a modifier or "other". + The special property LC, which has the synonym L&, is also supported: + it matches a character that has the Lu, Ll, or Lt property, in other + words, a letter that is not classified as a modifier or "other". - The Cs (Surrogate) property applies only to characters whose code - points are in the range U+D800 to U+DFFF. These characters are no dif- - ferent to any other character when PCRE2 is not in UTF mode (using the - 16-bit or 32-bit library). However, they are not valid in Unicode + The Cs (Surrogate) property applies only to characters whose code + points are in the range U+D800 to U+DFFF. These characters are no dif- + ferent to any other character when PCRE2 is not in UTF mode (using the + 16-bit or 32-bit library). However, they are not valid in Unicode strings and so cannot be tested by PCRE2 in UTF mode, unless UTF valid- - ity checking has been turned off (see the discussion of + ity checking has been turned off (see the discussion of PCRE2_NO_UTF_CHECK in the pcre2api page). - The long synonyms for property names that Perl supports (such as - \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix + The long synonyms for property names that Perl supports (such as + \p{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is". No character that is in the Unicode table has the Cn (unassigned) prop- erty. Instead, this property is assumed for any code point that is not in the Unicode table. - Specifying caseless matching does not affect these escape sequences. - For example, \p{Lu} always matches only upper case letters. This is + Specifying caseless matching does not affect these escape sequences. + For example, \p{Lu} always matches only upper case letters. This is different from the behaviour of current versions of Perl. - Matching characters by Unicode property is not fast, because PCRE2 has - to do a multistage table lookup in order to find a character's prop- - erty. That is why the traditional escape sequences such as \d and \w do - not use Unicode properties in PCRE2 by default, though you can make - them do so by setting the PCRE2_UCP option or by starting the pattern - with (*UCP). + Binary (yes/no) properties for \p and \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + + The Bidi_Class property for \p and \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space + + An equals sign may be used instead of a colon. The class names are + case-insensitive; only the short names listed above are recognized. Extended grapheme clusters @@ -7267,14 +7320,16 @@ FULL STOP (PERIOD, DOT) AND \N Outside a character class, a dot in the pattern matches any one charac- ter in the subject string except (by default) a character that signi- - fies the end of a line. + fies the end of a line. One or more characters may be specified as line + terminators (see "Newline conventions" above). - When a line ending is defined as a single character, dot never matches - that character; when the two-character sequence CRLF is used, dot does - not match CR if it is immediately followed by LF, but otherwise it - matches all characters (including isolated CRs and LFs). When any Uni- - code line endings are being recognized, dot does not match CR or LF or - any of the other line ending characters. + Dot never matches a single line-ending character. When the two-charac- + ter sequence CRLF is the only line ending, dot does not match CR if it + is immediately followed by LF, but otherwise it matches all characters + (including isolated CRs and LFs). When ANYCRLF is selected for line + endings, no occurences of CR of LF match dot. When all Unicode line + endings are being recognized, dot does not match CR or LF or any of the + other line ending characters. The behaviour of dot with regard to newlines can be changed. If the PCRE2_DOTALL option is set, a dot matches any one character, without @@ -8068,7 +8123,7 @@ ATOMIC GROUPING AND POSSESSIVE QUANTIFIERS (*atomic:\d+)foo - This kind of parenthesized group "locks up" the part of the pattern it + This kind of parenthesized group "locks up" the part of the pattern it contains once it has matched, and a failure further into the pattern is prevented from backtracking into it. Backtracking past it to previous items, however, works as normal. @@ -9640,8 +9695,8 @@ AUTHOR REVISION - Last updated: 30 August 2021 - Copyright (c) 1997-2021 University of Cambridge. + Last updated: 12 January 2022 + Copyright (c) 1997-2022 University of Cambridge. ------------------------------------------------------------------------------ @@ -10312,11 +10367,11 @@ NAME SAVING AND RE-USING PRECOMPILED PCRE2 PATTERNS int32_t pcre2_serialize_decode(pcre2_code **codes, - int32_t number_of_codes, const uint32_t *bytes, + int32_t number_of_codes, const uint8_t *bytes, pcre2_general_context *gcontext); - int32_t pcre2_serialize_encode(pcre2_code **codes, - int32_t number_of_codes, uint32_t **serialized_bytes, + int32_t pcre2_serialize_encode(const pcre2_code **codes, + int32_t number_of_codes, uint8_t **serialized_bytes, PCRE2_SIZE *serialized_size, pcre2_general_context *gcontext); void pcre2_serialize_free(uint8_t *bytes); @@ -10440,7 +10495,6 @@ RE-USING PRECOMPILED PATTERNS If this argument is NULL, malloc() and free() are used. After deserial- ization, the byte stream is no longer needed and can be discarded. - int32_t number_of_codes; pcre2_code *list_of_codes[2]; uint8_t *bytes = ; int32_t number_of_codes = @@ -10588,6 +10642,10 @@ CHARACTER TYPES iour of these escape sequences is changed to use Unicode properties and they match many more characters. + Property descriptions in \p and \P are matched caselessly; hyphens, un- + derscores, and white space are ignored, in accordance with Unicode's + "loose matching" rules. + GENERAL CATEGORY PROPERTIES FOR \p and \P @@ -10604,6 +10662,7 @@ GENERAL CATEGORY PROPERTIES FOR \p and \P Lo Other letter Lt Title case letter Lu Upper case letter + Lc Ll, Lu, or Lt L& Ll, Lu, or Lt M Mark @@ -10650,33 +10709,56 @@ PCRE2 SPECIAL CATEGORY PROPERTIES FOR \p and \P acter set at release 5.18. -SCRIPT NAMES FOR \p AND \P - - Adlam, Ahom, Anatolian_Hieroglyphs, Arabic, Armenian, Avestan, Bali- - nese, Bamum, Bassa_Vah, Batak, Bengali, Bhaiksuki, Bopomofo, Brahmi, - Braille, Buginese, Buhid, Canadian_Aboriginal, Carian, Caucasian_Alba- - nian, Chakma, Cham, Cherokee, Chorasmian, Common, Coptic, Cuneiform, - Cypriot, Cypro_Minoan, Cyrillic, Deseret, Devanagari, Dives_Akuru, Do- - gra, Duployan, Egyptian_Hieroglyphs, Elbasan, Elymaic, Ethiopic, Geor- - gian, Glagolitic, Gothic, Grantha, Greek, Gujarati, Gunjala_Gondi, Gur- - mukhi, Han, Hangul, Hanifi_Rohingya, Hanunoo, Hatran, Hebrew, Hiragana, - Imperial_Aramaic, Inherited, Inscriptional_Pahlavi, Inscrip- - tional_Parthian, Javanese, Kaithi, Kannada, Katakana, Kayah_Li, - Kharoshthi, Khitan_Small_Script, Khmer, Khojki, Khudawadi, Lao, Latin, - Lepcha, Limbu, Linear_A, Linear_B, Lisu, Lycian, Lydian, Mahajani, - Makasar, Malayalam, Mandaic, Manichaean, Marchen, Masaram_Gondi, Mede- - faidrin, Meetei_Mayek, Mende_Kikakui, Meroitic_Cursive, Meroitic_Hiero- - glyphs, Miao, Modi, Mongolian, Mro, Multani, Myanmar, Nabataean, Nandi- - nagari, New_Tai_Lue, Newa, Nko, Nushu, Nyakeng_Puachue_Hmong, Ogham, - Ol_Chiki, Old_Hungarian, Old_Italic, Old_North_Arabian, Old_Permic, - Old_Persian, Old_Sogdian, Old_South_Arabian, Old_Turkic, Old_Uyghur, - Oriya, Osage, Osmanya, Pahawh_Hmong, Palmyrene, Pau_Cin_Hau, Phags_Pa, - Phoenician, Psalter_Pahlavi, Rejang, Runic, Samaritan, Saurashtra, - Sharada, Shavian, Siddham, SignWriting, Sinhala, Sogdian, Sora_Sompeng, - Soyombo, Sundanese, Syloti_Nagri, Syriac, Tagalog, Tagbanwa, Tai_Le, - Tai_Tham, Tai_Viet, Takri, Tamil, Tangsa, Tangut, Telugu, Thaana, Thai, - Tibetan, Tifinagh, Tirhuta, Toto, Ugaritic, Vai, Vithkuqi, Wancho, - Warang_Citi, Yezidi, Yi, Zanabazar_Square. +BINARY PROPERTIES FOR \p AND \P + + Unicode defines a number of binary properties, that is, properties + whose only values are true or false. You can obtain a list of those + that are recognized by \p and \P, along with their abbreviations, by + running this command: + + pcre2test -LP + + +SCRIPT MATCHING WITH \p AND \P + + Many script names and their 4-letter abbreviations are recognized in + \p{sc:...} or \p{scx:...} items, or on their own with \p (and also \P + of course). You can obtain a list of these scripts by running this com- + mand: + + pcre2test -LS + + +THE BIDI_CLASS PROPERTY FOR \p AND \P + + \p{Bidi_Class:} matches a character with the given class + \p{BC:} matches a character with the given class + + The recognized classes are: + + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space CHARACTER CLASSES @@ -11008,8 +11090,8 @@ AUTHOR REVISION - Last updated: 30 August 2021 - Copyright (c) 1997-2021 University of Cambridge. + Last updated: 12 January 2022 + Copyright (c) 1997-2022 University of Cambridge. ------------------------------------------------------------------------------ @@ -11051,15 +11133,17 @@ UNICODE PROPERTY SUPPORT When PCRE2 is built with Unicode support, the escape sequences \p{..}, \P{..}, and \X can be used. This is not dependent on the PCRE2_UTF set- - ting. The Unicode properties that can be tested are limited to the - general category properties such as Lu for an upper case letter or Nd - for a decimal number, the Unicode script names such as Arabic or Han, - and the derived properties Any and L&. Full lists are given in the - pcre2pattern and pcre2syntax documentation. Only the short names for - properties are supported. For example, \p{L} matches a letter. Its Perl - synonym, \p{Letter}, is not supported. Furthermore, in Perl, many - properties may optionally be prefixed by "Is", for compatibility with - Perl 5.6. PCRE2 does not support this. + ting. The Unicode properties that can be tested are a subset of those + that Perl supports. Currently they are limited to the general category + properties such as Lu for an upper case letter or Nd for a decimal num- + ber, the Unicode script names such as Arabic or Han, Bidi_Class, + Bidi_Control, and the derived properties Any and LC (synonym L&). Full + lists are given in the pcre2pattern and pcre2syntax documentation. In + general, only the short names for properties are supported. For exam- + ple, \p{L} matches a letter. Its longer synonym, \p{Letter}, is not + supported. Furthermore, in Perl, many properties may optionally be pre- + fixed by "Is", for compatibility with Perl 5.6. PCRE2 does not support + this. WIDE CHARACTERS AND UTF MODES @@ -11437,14 +11521,14 @@ MATCHING IN INVALID UTF STRINGS AUTHOR Philip Hazel - University Computing Service + Retired from University Computing Service Cambridge, England. REVISION - Last updated: 23 February 2020 - Copyright (c) 1997-2020 University of Cambridge. + Last updated: 22 December 2021 + Copyright (c) 1997-2021 University of Cambridge. ------------------------------------------------------------------------------ diff --git a/pcre2/doc/pcre2_jit_stack_create.3 b/pcre2/doc/pcre2_jit_stack_create.3 index f0b29f0dc4fa88b87162f046100044f2d2ef7097..d332b72d7cdc733e2fbafdd93a6e6d7c1a38f234 100644 --- a/pcre2/doc/pcre2_jit_stack_create.3 +++ b/pcre2/doc/pcre2_jit_stack_create.3 @@ -22,7 +22,8 @@ allocation. The result can be passed to the JIT run-time code by calling \fBpcre2_jit_stack_assign()\fP to associate the stack with a compiled pattern, which can then be processed by \fBpcre2_match()\fP or \fBpcre2_jit_match()\fP. A maximum stack size of 512KiB to 1MiB should be more than enough for any -pattern. For more details, see the +pattern. If the stack couldn't be allocated or the values passed were not +reasonable, NULL will be returned. For more details, see the .\" HREF \fBpcre2jit\fP .\" diff --git a/pcre2/doc/pcre2_set_compile_extra_options.3 b/pcre2/doc/pcre2_set_compile_extra_options.3 index 58cefe57c369b49fe5beda5e4da9ef8cf3ce4293..0dcc8de86d764c3af019f9bbd76c74648c43450b 100644 --- a/pcre2/doc/pcre2_set_compile_extra_options.3 +++ b/pcre2/doc/pcre2_set_compile_extra_options.3 @@ -18,9 +18,9 @@ This function sets additional option bits for \fBpcre2_compile()\fP that are housed in a compile context. It completely replaces all the bits. The extra options are: .sp -.\" JOIN PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK Allow \eK in lookarounds - PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{df800} to \ex{dfff} +.\" JOIN + PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES Allow \ex{d800} to \ex{dfff} in UTF-8 and UTF-32 modes .\" JOIN PCRE2_EXTRA_ALT_BSUX Extended alternate \eu, \eU, and diff --git a/pcre2/doc/pcre2_substitute.3 b/pcre2/doc/pcre2_substitute.3 index cceb7846bdd5d2be9e0d95e2ccbb93fcb1f3790a..7ee4b6ac73f95d172aeee83f30fab015d2c4bfb8 100644 --- a/pcre2/doc/pcre2_substitute.3 +++ b/pcre2/doc/pcre2_substitute.3 @@ -55,32 +55,42 @@ automatically added. The subject and replacement lengths can be given as PCRE2_ZERO_TERMINATED for zero-terminated strings. The options are: .sp - PCRE2_ANCHORED Match only at the first position - PCRE2_ENDANCHORED Pattern can match only at end of subject - PCRE2_NOTBOL Subject is not the beginning of a line - PCRE2_NOTEOL Subject is not the end of a line - PCRE2_NOTEMPTY An empty string is not a valid match + PCRE2_ANCHORED Match only at the first position + PCRE2_ENDANCHORED Match only at end of subject .\" JOIN - PCRE2_NOTEMPTY_ATSTART An empty string at the start of the - subject is not a valid match - PCRE2_NO_JIT Do not use JIT matching + PCRE2_NOTBOL Subject is not the beginning of a + line + PCRE2_NOTEOL Subject is not the end of a line .\" JOIN - PCRE2_NO_UTF_CHECK Do not check the subject or replacement - for UTF validity (only relevant if - PCRE2_UTF was set at compile time) - PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing - PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the subject - PCRE2_SUBSTITUTE_LITERAL The replacement string is literal - PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for 1st match - PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length + PCRE2_NOTEMPTY An empty string is not a + valid match +.\" JOIN + PCRE2_NOTEMPTY_ATSTART An empty string at the start of + the subject is not a valid match + PCRE2_NO_JIT Do not use JIT matching +.\" JOIN + PCRE2_NO_UTF_CHECK Do not check for UTF validity in + the subject or replacement +.\" JOIN + (only relevant if PCRE2_UTF was + set at compile time) + PCRE2_SUBSTITUTE_EXTENDED Do extended replacement processing +.\" JOIN + PCRE2_SUBSTITUTE_GLOBAL Replace all occurrences in the + subject + PCRE2_SUBSTITUTE_LITERAL The replacement string is literal +.\" JOIN + PCRE2_SUBSTITUTE_MATCHED Use pre-existing match data for + first match + PCRE2_SUBSTITUTE_OVERFLOW_LENGTH If overflow, compute needed length PCRE2_SUBSTITUTE_REPLACEMENT_ONLY Return only replacement string(s) - PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset - PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string + PCRE2_SUBSTITUTE_UNKNOWN_UNSET Treat unknown group as unset + PCRE2_SUBSTITUTE_UNSET_EMPTY Simple unset insert = empty string .sp If PCRE2_SUBSTITUTE_LITERAL is set, PCRE2_SUBSTITUTE_EXTENDED, PCRE2_SUBSTITUTE_UNKNOWN_UNSET, and PCRE2_SUBSTITUTE_UNSET_EMPTY are ignored. .P -If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-zero; its +If PCRE2_SUBSTITUTE_MATCHED is set, \fImatch_data\fP must be non-NULL; its contents must be the result of a call to \fBpcre2_match()\fP using the same pattern and subject. .P diff --git a/pcre2/doc/pcre2api.3 b/pcre2/doc/pcre2api.3 index 1ad6e261a1dcf4c8cc4ab7a0c9305ab403ed3468..edde3db7784291b5684d27ed26c2cd2521dbd79c 100644 --- a/pcre2/doc/pcre2api.3 +++ b/pcre2/doc/pcre2api.3 @@ -1,4 +1,4 @@ -.TH PCRE2API 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2API 3 "14 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .sp @@ -1794,7 +1794,7 @@ it is set, the effect of passing an invalid UTF string as a pattern is undefined. It may cause your program to crash or loop. .P Note that this option can also be passed to \fBpcre2_match()\fP and -\fBpcre_dfa_match()\fP, to suppress UTF validity checking of the subject +\fBpcre2_dfa_match()\fP, to suppress UTF validity checking of the subject string. .P Note also that setting PCRE2_NO_UTF_CHECK at compile time does not disable the @@ -2015,8 +2015,8 @@ point. However, this applies only to characters whose code points are less than 256. By default, higher-valued code points never match escapes such as \ew or \ed. .P -When PCRE2 is built with Unicode support (the default), the Unicode properties -of all characters can be tested with \ep and \eP, or, alternatively, the +When PCRE2 is built with Unicode support (the default), certain Unicode +character properties can be tested with \ep and \eP, or, alternatively, the PCRE2_UCP option can be set when a pattern is compiled; this causes \ew and friends to use Unicode property support instead of the built-in tables. PCRE2_UCP also causes upper/lower casing operations on characters with code @@ -2279,7 +2279,7 @@ return zero. The third argument should point to a \fBsize_t\fP variable. PCRE2_INFO_LASTCODETYPE .sp Returns 1 if there is a rightmost literal code unit that must exist in any -matched string, other than at its start. The third argument should point to a +matched string, other than at its start. The third argument should point to a \fBuint32_t\fP variable. If there is no such value, 0 is returned. When 1 is returned, the code unit value itself can be retrieved using PCRE2_INFO_LASTCODEUNIT. For anchored patterns, a last literal value is @@ -2624,7 +2624,9 @@ The subject string is passed to \fBpcre2_match()\fP as a pointer in \fIstartoffset\fP. The length and offset are in code units, not characters. That is, they are in bytes for the 8-bit library, 16-bit code units for the 16-bit library, and 32-bit code units for the 32-bit library, whether or not -UTF processing is enabled. +UTF processing is enabled. As a special case, if \fIsubject\fP is NULL and +\fIlength\fP is zero, the subject is assumed to be an empty string. If +\fIlength\fP is non-zero, an error occurs if \fIsubject\fP is NULL. .P If \fIstartoffset\fP is greater than the length of the subject, \fBpcre2_match()\fP returns PCRE2_ERROR_BADOFFSET. When the starting offset is @@ -3413,12 +3415,16 @@ same number causes an error at compile time. .P This function optionally calls \fBpcre2_match()\fP and then makes a copy of the subject string in \fIoutputbuffer\fP, replacing parts that were matched with -the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP. This -can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. There is an -option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just the -replacement string(s). The default action is to perform just one replacement if -the pattern matches, but there is an option that requests multiple replacements -(see PCRE2_SUBSTITUTE_GLOBAL below). +the \fIreplacement\fP string, whose length is supplied in \fBrlength\fP, which +can be given as PCRE2_ZERO_TERMINATED for a zero-terminated string. As a +special case, if \fIreplacement\fP is NULL and \fIrlength\fP is zero, the +replacement is assumed to be an empty string. If \fIrlength\fP is non-zero, an +error occurs if \fIreplacement\fP is NULL. +.P +There is an option (see PCRE2_SUBSTITUTE_REPLACEMENT_ONLY below) to return just +the replacement string(s). The default action is to perform just one +replacement if the pattern matches, but there is an option that requests +multiple replacements (see PCRE2_SUBSTITUTE_GLOBAL below). .P If successful, \fBpcre2_substitute()\fP returns the number of substitutions that were carried out. This may be zero if no match was found, and is never @@ -3447,12 +3453,12 @@ block may or may not have been changed. As well as the usual options for \fBpcre2_match()\fP, a number of additional options can be set in the \fIoptions\fP argument of \fBpcre2_substitute()\fP. One such option is PCRE2_SUBSTITUTE_MATCHED. When this is set, an external -\fImatch_data\fP block must be provided, and it must have been used for an -external call to \fBpcre2_match()\fP. The data in the \fImatch_data\fP block -(return code, offset vector) is used for the first substitution instead of -calling \fBpcre2_match()\fP from within \fBpcre2_substitute()\fP. This allows -an application to check for a match before choosing to substitute, without -having to repeat the match. +\fImatch_data\fP block must be provided, and it must have already been used for +an external call to \fBpcre2_match()\fP with the same pattern and subject +arguments. The data in the \fImatch_data\fP block (return code, offset vector) +is then used for the first substitution instead of calling \fBpcre2_match()\fP +from within \fBpcre2_substitute()\fP. This allows an application to check for a +match before choosing to substitute, without having to repeat the match. .P The contents of the externally supplied match data block are not changed when PCRE2_SUBSTITUTE_MATCHED is set. If PCRE2_SUBSTITUTE_GLOBAL is also set, @@ -3584,7 +3590,7 @@ and force lower case. The escape sequences change the current state: \eU and terminating a \eQ quoted sequence) reverts to no case forcing. The sequences \eu and \el force the next character (if it is a letter) to upper or lower case, respectively, and then the state automatically reverts to no case -forcing. Case forcing applies to all inserted characters, including those from +forcing. Case forcing applies to all inserted characters, including those from capture groups and letters within \eQ...\eE quoted sequences. If either PCRE2_UTF or PCRE2_UCP was set when the pattern was compiled, Unicode properties are used for case forcing characters whose code points are greater @@ -3649,7 +3655,9 @@ needed is returned via \fIoutlengthptr\fP. Note that this does not happen by default. .P PCRE2_ERROR_NULL is returned if PCRE2_SUBSTITUTE_MATCHED is set but the -\fImatch_data\fP argument is NULL. +\fImatch_data\fP argument is NULL or if the \fIsubject\fP or \fIreplacement\fP +arguments are NULL. For backward compatibility reasons an exception is made for +the \fIreplacement\fP argument if the \fIrlength\fP argument is also 0. .P PCRE2_ERROR_BADREPLACEMENT is used for miscellaneous syntax errors in the replacement string, with more particular errors being PCRE2_ERROR_BADREPESCAPE @@ -3811,12 +3819,13 @@ other alternatives. Ultimately, when it runs out of matches, .P The function \fBpcre2_dfa_match()\fP is called to match a subject string against a compiled pattern, using a matching algorithm that scans the subject -string just once (not counting lookaround assertions), and does not backtrack. -This has different characteristics to the normal algorithm, and is not -compatible with Perl. Some of the features of PCRE2 patterns are not supported. -Nevertheless, there are times when this kind of matching can be useful. For a -discussion of the two matching algorithms, and a list of features that -\fBpcre2_dfa_match()\fP does not support, see the +string just once (not counting lookaround assertions), and does not backtrack +(except when processing lookaround assertions). This has different +characteristics to the normal algorithm, and is not compatible with Perl. Some +of the features of PCRE2 patterns are not supported. Nevertheless, there are +times when this kind of matching can be useful. For a discussion of the two +matching algorithms, and a list of features that \fBpcre2_dfa_match()\fP does +not support, see the .\" HREF \fBpcre2matching\fP .\" @@ -3848,7 +3857,7 @@ Here is an example of a simple call to \fBpcre2_dfa_match()\fP: wspace, /* working space vector */ 20); /* number of elements (NOT size in bytes) */ . -.SS "Option bits for \fBpcre_dfa_match()\fP" +.SS "Option bits for \fBpcre2_dfa_match()\fP" .rs .sp The unused bits of the \fIoptions\fP argument for \fBpcre2_dfa_match()\fP must @@ -4016,6 +4025,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 August 2021 +Last updated: 14 December 2021 Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2build.3 b/pcre2/doc/pcre2build.3 index 60931bfc5870f457b0224afc7c1fd18a870e9cc7..5fca3dce062477133acb3ee11aa7262682483c40 100644 --- a/pcre2/doc/pcre2build.3 +++ b/pcre2/doc/pcre2build.3 @@ -1,4 +1,4 @@ -.TH PCRE2BUILD 3 "20 March 2020" "PCRE2 10.35" +.TH PCRE2BUILD 3 "08 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) . @@ -122,8 +122,9 @@ locked this out by setting PCRE2_NEVER_UTF. UTF support allows the libraries to process character code points up to 0x10ffff in the strings that they handle. Unicode support also gives access to the Unicode properties of characters, using pattern escapes such as \eP, \ep, -and \eX. Only the general category properties such as \fILu\fP and \fINd\fP are -supported. Details are given in the +and \eX. Only the general category properties such as \fILu\fP and \fINd\fP, +script names, and some bi-directional properties are supported. Details are +given in the .\" HREF \fBpcre2pattern\fP .\" @@ -302,7 +303,7 @@ You can also explicitly limit the depth of nested backtracking in the for --with-match-limit. You can set a lower default limit by adding, for example, .sp - --with-match-limit_depth=10000 + --with-match-limit-depth=10000 .sp to the \fBconfigure\fP command. This value can be overridden at run time. This depth limit indirectly limits the amount of heap memory that is used, but @@ -633,6 +634,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 20 March 2020 -Copyright (c) 1997-2020 University of Cambridge. +Last updated: 08 December 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2compat.3 b/pcre2/doc/pcre2compat.3 index 311d6ebe82b10f80398f01f8f7f8743dbd8bac17..8333d3e363b24f4cc26ed74b6fc2178ac4d02ee7 100644 --- a/pcre2/doc/pcre2compat.3 +++ b/pcre2/doc/pcre2compat.3 @@ -1,4 +1,4 @@ -.TH PCRE2COMPAT 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2COMPAT 3 "08 December 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "DIFFERENCES BETWEEN PCRE2 AND PERL" @@ -6,31 +6,38 @@ PCRE2 - Perl-compatible regular expressions (revised API) .sp This document describes some of the differences in the ways that PCRE2 and Perl handle regular expressions. The differences described here are with respect to -Perl version 5.32.0, but as both Perl and PCRE2 are continually changing, the +Perl version 5.34.0, but as both Perl and PCRE2 are continually changing, the information may at times be out of date. .P -1. PCRE2 has only a subset of Perl's Unicode support. Details of what it does +1. When PCRE2_DOTALL (equivalent to Perl's /s qualifier) is not set, the +behaviour of the '.' metacharacter differs from Perl. In PCRE2, '.' matches the +next character unless it is the start of a newline sequence. This means that, +if the newline setting is CR, CRLF, or NUL, '.' will match the code point LF +(0x0A) in ASCII/Unicode environments, and NL (either 0x15 or 0x25) when using +EBCDIC. In Perl, '.' appears never to match LF, even when 0x0A is not a newline +indicator. +.P +2. PCRE2 has only a subset of Perl's Unicode support. Details of what it does have are given in the .\" HREF \fBpcre2unicode\fP .\" page. .P -2. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but +3. Like Perl, PCRE2 allows repeat quantifiers on parenthesized assertions, but they do not mean what you might think. For example, (?!a){3} does not assert that the next three characters are not "a". It just asserts that the next character is not "a" three times (in principle; PCRE2 optimizes this to run the assertion just once). Perl allows some repeat quantifiers on other assertions, -for example, \eb* (but not \eb{3}, though oddly it does allow ^{3}), but these -do not seem to have any use. PCRE2 does not allow any kind of quantifier on -non-lookaround assertions. +for example, \eb* , but these do not seem to have any use. PCRE2 does not allow +any kind of quantifier on non-lookaround assertions. .P -3. Capture groups that occur inside negative lookaround assertions are counted, +4. Capture groups that occur inside negative lookaround assertions are counted, but their entries in the offsets vector are set only when a negative assertion is a condition that has a matching branch (that is, the condition is false). Perl may set such capture groups in other circumstances. .P -4. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, +5. The following Perl escape sequences are not supported: \eF, \el, \eL, \eu, \eU, and \eN when followed by a character name. \eN on its own, matching a non-newline character, and \eN{U+dd..}, matching a Unicode code point, are supported. The escapes that modify the case of following letters are @@ -40,12 +47,12 @@ generated by default. However, if either of the PCRE2_ALT_BSUX or PCRE2_EXTRA_ALT_BSUX options is set, \eU and \eu are interpreted as ECMAScript interprets them. .P -5. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is +6. The Perl escape sequences \ep, \eP, and \eX are supported only if PCRE2 is built with Unicode support (the default). The properties that can be tested with \ep and \eP are limited to the general category properties such as Lu and -Nd, script names such as Greek or Han, and the derived properties Any and L&. -Both PCRE2 and Perl support the Cs (surrogate) property, but in PCRE2 its use -is limited. See the +Nd, script names such as Greek or Han, Bidi_Class, Bidi_Control, and the +derived properties Any and LC (synonym L&). Both PCRE2 and Perl support the Cs +(surrogate) property, but in PCRE2 its use is limited. See the .\" HREF \fBpcre2pattern\fP .\" @@ -53,14 +60,14 @@ documentation for details. The long synonyms for property names that Perl supports (such as \ep{Letter}) are not supported by PCRE2, nor is it permitted to prefix any of these properties with "Is". .P -6. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters +7. PCRE2 supports the \eQ...\eE escape for quoting substrings. Characters in between are treated as literals. However, this is slightly different from Perl in that $ and @ are also handled as literals inside the quotes. In Perl, -they cause variable interpolation (but of course PCRE2 does not have -variables). Also, Perl does "double-quotish backslash interpolation" on any -backslashes between \eQ and \eE which, its documentation says, "may lead to -confusing results". PCRE2 treats a backslash between \eQ and \eE just like any -other character. Note the following examples: +they cause variable interpolation (PCRE2 does not have variables). Also, Perl +does "double-quotish backslash interpolation" on any backslashes between \eQ +and \eE which, its documentation says, "may lead to confusing results". PCRE2 +treats a backslash between \eQ and \eE just like any other character. Note the +following examples: .sp Pattern PCRE2 matches Perl matches .sp @@ -75,7 +82,7 @@ other character. Note the following examples: The \eQ...\eE sequence is recognized both inside and outside character classes by both PCRE2 and Perl. .P -7. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) +8. Fairly obviously, PCRE2 does not support the (?{code}) and (??{code}) constructions. However, PCRE2 does have a "callout" feature, which allows an external function to be called during pattern matching. See the .\" HREF @@ -83,11 +90,11 @@ external function to be called during pattern matching. See the .\" documentation for details. .P -8. Subroutine calls (whether recursive or not) were treated as atomic groups up +9. Subroutine calls (whether recursive or not) were treated as atomic groups up to PCRE2 release 10.23, but from release 10.30 this changed, and backtracking into subroutine calls is now supported, as in Perl. .P -9. In PCRE2, if any of the backtracking control verbs are used in a group that +10. In PCRE2, if any of the backtracking control verbs are used in a group that is called as a subroutine (whether or not recursively), their effect is confined to that group; it does not extend to the surrounding pattern. This is not always the case in Perl. In particular, if (*THEN) is present in a group @@ -95,18 +102,18 @@ that is called as a subroutine, its action is limited to that group, even if the group does not contain any | characters. Note that such groups are processed as anchored at the point where they are tested. .P -10. If a pattern contains more than one backtracking control verb, the first +11. If a pattern contains more than one backtracking control verb, the first one that is backtracked onto acts. For example, in the pattern A(*COMMIT)B(*PRUNE)C a failure in B triggers (*COMMIT), but a failure in C triggers (*PRUNE). Perl's behaviour is more complex; in many cases it is the same as PCRE2, but there are cases where it differs. .P -11. There are some differences that are concerned with the settings of captured +12. There are some differences that are concerned with the settings of captured strings when part of a pattern is repeated. For example, matching "aba" against the pattern /^(a(b)?)+$/ in Perl leaves $2 unset, but in PCRE2 it is set to "b". .P -12. PCRE2's handling of duplicate capture group numbers and names is not as +13. PCRE2's handling of duplicate capture group numbers and names is not as general as Perl's. This is a consequence of the fact the PCRE2 works internally just with numbers, using an external table to translate between numbers and names. In particular, a pattern such as (?|(?A)|(?B)), where the two @@ -115,37 +122,38 @@ causes an error at compile time. If it were allowed, it would not be possible to distinguish which group matched, because both names map to capture group number 1. To avoid this confusing situation, an error is given at compile time. .P -13. Perl used to recognize comments in some places that PCRE2 does not, for +14. Perl used to recognize comments in some places that PCRE2 does not, for example, between the ( and ? at the start of a group. If the /x modifier is set, Perl allowed white space between ( and ? though the latest Perls give an error (for a while it was just deprecated). There may still be some cases where Perl behaves differently. .P -14. Perl, when in warning mode, gives warnings for character classes such as +15. Perl, when in warning mode, gives warnings for character classes such as [A-\ed] or [a-[:digit:]]. It then treats the hyphens as literals. PCRE2 has no warning features, so it gives an error in these cases because they are almost certainly user mistakes. .P -15. In PCRE2, the upper/lower case character properties Lu and Ll are not +16. In PCRE2, the upper/lower case character properties Lu and Ll are not affected when case-independent matching is specified. For example, \ep{Lu} always matches an upper case letter. I think Perl has changed in this respect; -in the release at the time of writing (5.32), \ep{Lu} and \ep{Ll} match all +in the release at the time of writing (5.34), \ep{Lu} and \ep{Ll} match all letters, regardless of case, when case independence is specified. .P -16. From release 5.32.0, Perl locks out the use of \eK in lookaround +17. From release 5.32.0, Perl locks out the use of \eK in lookaround assertions. From release 10.38 PCRE2 does the same by default. However, there is an option for re-enabling the previous behaviour. When this option is set, \eK is acted on when it occurs in positive assertions, but is ignored in negative assertions. .P -17. PCRE2 provides some extensions to the Perl regular expression facilities. +18. PCRE2 provides some extensions to the Perl regular expression facilities. Perl 5.10 included new features that were not in earlier versions of Perl, some of which (such as named parentheses) were in PCRE2 for some time before. This -list is with respect to Perl 5.32: +list is with respect to Perl 5.34: .sp (a) Although lookbehind assertions in PCRE2 must match fixed length strings, each alternative toplevel branch of a lookbehind assertion can match a -different length of string. Perl requires them all to have the same length. +different length of string. Perl used to require them all to have the same +length, but the latest version has some variable length support. .sp (b) From PCRE2 10.23, backreferences to groups of fixed length are supported in lookbehinds, provided that there is no possibility of referencing a @@ -186,11 +194,11 @@ the pattern. extension to the lookaround facilities. The default, Perl-compatible lookarounds are atomic. .P -18. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa +19. The Perl /a modifier restricts /d numbers to pure ascii, and the /aa modifier restricts /i case-insensitive matching to pure ascii, ignoring Unicode rules. This separation cannot be represented with PCRE2_UCP. .P -19. Perl has different limits than PCRE2. See the +20. Perl has different limits than PCRE2. See the .\" HREF \fBpcre2limit\fP .\" @@ -214,6 +222,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 August 2021 +Last updated: 08 December 2021 Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2jit.3 b/pcre2/doc/pcre2jit.3 index 9b7755054b94dcb78cbb05aab1aed100b84e36f8..f0b3b151226553477658a8a63b5b42b54f7b1fb1 100644 --- a/pcre2/doc/pcre2jit.3 +++ b/pcre2/doc/pcre2jit.3 @@ -1,4 +1,4 @@ -.TH PCRE2JIT 3 "23 May 2019" "PCRE2 10.34" +.TH PCRE2JIT 3 "30 November 2021" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 JUST-IN-TIME COMPILER SUPPORT" @@ -251,11 +251,11 @@ non-sequential matches in one thread is to use callouts: if a callout function starts another match, that match must use a different JIT stack to the one used for currently suspended match(es). .P -In a multithread application, if you do not -specify a JIT stack, or if you assign or pass back NULL from a callback, that -is thread-safe, because each thread has its own machine stack. However, if you -assign or pass back a non-NULL JIT stack, this must be a different stack for -each thread so that the application is thread-safe. +In a multithread application, if you do not specify a JIT stack, or if you +assign or pass back NULL from a callback, that is thread-safe, because each +thread has its own machine stack. However, if you assign or pass back a +non-NULL JIT stack, this must be a different stack for each thread so that the +application is thread-safe. .P Strictly speaking, even more is allowed. You can assign the same non-NULL stack to a match context that is used by any number of patterns, as long as they are @@ -355,8 +355,8 @@ out this complicated API. .B void pcre2_jit_free_unused_memory(pcre2_general_context *\fIgcontext\fP); .fi .P -The JIT executable allocator does not free all memory when it is possible. -It expects new allocations, and keeps some free memory around to improve +The JIT executable allocator does not free all memory when it is possible. It +expects new allocations, and keeps some free memory around to improve allocation speed. However, in low memory conditions, it might be better to free all possible memory. You can cause this to happen by calling pcre2_jit_free_unused_memory(). Its argument is a general context, for custom @@ -416,10 +416,10 @@ that was not compiled. .P When you call \fBpcre2_match()\fP, as well as testing for invalid options, a number of other sanity checks are performed on the arguments. For example, if -the subject pointer is NULL, an immediate error is given. Also, unless -PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested for validity. In the -interests of speed, these checks do not happen on the JIT fast path, and if -invalid data is passed, the result is undefined. +the subject pointer is NULL but the length is non-zero, an immediate error is +given. Also, unless PCRE2_NO_UTF_CHECK is set, a UTF subject string is tested +for validity. In the interests of speed, these checks do not happen on the JIT +fast path, and if invalid data is passed, the result is undefined. .P Bypassing the sanity checks and the \fBpcre2_match()\fP wrapping can give speedups of more than 10%. @@ -445,6 +445,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 May 2019 -Copyright (c) 1997-2019 University of Cambridge. +Last updated: 30 November 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2pattern.3 b/pcre2/doc/pcre2pattern.3 index 627f229a44f91264c3c2aabb8224d1bbd01c28c1..3088ec0fb28578987eab3c1a71d0fcf4e8207df7 100644 --- a/pcre2/doc/pcre2pattern.3 +++ b/pcre2/doc/pcre2pattern.3 @@ -1,4 +1,4 @@ -.TH PCRE2PATTERN 3 "3o0 August 2021" "PCRE2 10.38" +.TH PCRE2PATTERN 3 "12 January 2022" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION DETAILS" @@ -509,7 +509,6 @@ for themselves. For example, outside a character class: .\" JOIN \e377 might be a backreference, otherwise the value 255 (decimal) -.\" JOIN \e81 is always a backreference .sp Note that octal values of 100 or greater that are specified using this syntax @@ -773,200 +772,64 @@ can be used in any mode, though in 8-bit and 16-bit non-UTF modes these sequences are of course limited to testing characters whose code points are less than U+0100 and U+10000, respectively. In 32-bit non-UTF mode, code points greater than 0x10ffff (the Unicode limit) may be encountered. These are all -treated as being in the Unknown script and with an unassigned type. The extra -escape sequences are: +treated as being in the Unknown script and with an unassigned type. +.P +Matching characters by Unicode property is not fast, because PCRE2 has to do a +multistage table lookup in order to find a character's property. That is why +the traditional escape sequences such as \ed and \ew do not use Unicode +properties in PCRE2 by default, though you can make them do so by setting the +PCRE2_UCP option or by starting the pattern with (*UCP). +.P +The extra escape sequences that provide property support are: .sp \ep{\fIxx\fP} a character with the \fIxx\fP property \eP{\fIxx\fP} a character without the \fIxx\fP property \eX a Unicode extended grapheme cluster .sp -The property names represented by \fIxx\fP above are case-sensitive. There is -support for Unicode script names, Unicode general category properties, "Any", -which matches any character (including newline), and some special PCRE2 -properties (described in the +The property names represented by \fIxx\fP above are not case-sensitive, and in +accordance with Unicode's "loose matching" rules, spaces, hyphens, and +underscores are ignored. There is support for Unicode script names, Unicode +general category properties, "Any", which matches any character (including +newline), Bidi_Class, a number of binary (yes/no) properties, and some special +PCRE2 properties (described .\" HTML .\" -next section). +below). .\" -Other Perl properties such as "InMusicalSymbols" are not supported by PCRE2. -Note that \eP{Any} does not match any characters, so always causes a match -failure. -.P -Sets of Unicode characters are defined as belonging to certain scripts. A -character from one of these sets can be matched using a script name. For -example: -.sp - \ep{Greek} - \eP{Han} +Certain other Perl properties such as "InMusicalSymbols" are not supported by +PCRE2. Note that \eP{Any} does not match any characters, so always causes a +match failure. +. +. +. +.SS "Script properties for \ep and \eP" +.rs .sp +There are three different syntax forms for matching a script. Each Unicode +character has a basic script and, optionally, a list of other scripts ("Script +Extensions") with which it is commonly used. Using the Adlam script as an +example, \ep{sc:Adlam} matches characters whose basic script is Adlam, whereas +\ep{scx:Adlam} matches, in addition, characters that have Adlam in their +extensions list. The full names "script" and "script extensions" for the +property types are recognized, and a equals sign is an alternative to the +colon. If a script name is given without a property type, for example, +\ep{Adlam}, it is treated as \ep{scx:Adlam}. Perl changed to this +interpretation at release 5.26 and PCRE2 changed at release 10.40. +.P Unassigned characters (and in non-UTF 32-bit mode, characters with code points greater than 0x10FFFF) are assigned the "Unknown" script. Others that are not part of an identified script are lumped together as "Common". The current list -of scripts is: -.P -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician, -Psalter_Pahlavi, -Rejang, -Runic, -Samaritan, -Saurashtra, -Sharada, -Shavian, -Siddham, -SignWriting, -Sinhala, -Sogdian, -Sora_Sompeng, -Soyombo, -Sundanese, -Syloti_Nagri, -Syriac, -Tagalog, -Tagbanwa, -Tai_Le, -Tai_Tham, -Tai_Viet, -Takri, -Tamil, -Tangsa, -Tangut, -Telugu, -Thaana, -Thai, -Tibetan, -Tifinagh, -Tirhuta, -Toto, -Ugaritic, -Unknown, -Vai, -Vithkuqi, -Wancho, -Warang_Citi, -Yezidi, -Yi, -Zanabazar_Square. -.P +of recognized script names and their 4-character abbreviations can be obtained +by running this command: +.sp + pcre2test -LS +.sp +. +. +. +.SS "The general category property for \ep and \eP" +.rs +.sp Each character has exactly one Unicode general category property, specified by a two-letter abbreviation. For compatibility with Perl, negation can be specified by including a circumflex between the opening brace and the property @@ -1026,9 +889,9 @@ The following general category property codes are supported: Zp Paragraph separator Zs Space separator .sp -The special property L& is also supported: it matches a character that has -the Lu, Ll, or Lt property, in other words, a letter that is not classified as -a modifier or "other". +The special property LC, which has the synonym L&, is also supported: it +matches a character that has the Lu, Ll, or Lt property, in other words, a +letter that is not classified as a modifier or "other". .P The Cs (Surrogate) property applies only to characters whose code points are in the range U+D800 to U+DFFF. These characters are no different to any other @@ -1052,12 +915,53 @@ Unicode table. Specifying caseless matching does not affect these escape sequences. For example, \ep{Lu} always matches only upper case letters. This is different from the behaviour of current versions of Perl. -.P -Matching characters by Unicode property is not fast, because PCRE2 has to do a -multistage table lookup in order to find a character's property. That is why -the traditional escape sequences such as \ed and \ew do not use Unicode -properties in PCRE2 by default, though you can make them do so by setting the -PCRE2_UCP option or by starting the pattern with (*UCP). +. +. +.SS "Binary (yes/no) properties for \ep and \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +.sp +. +. +.SS "The Bidi_Class property for \ep and \eP" +.rs +.sp + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space +.sp +An equals sign may be used instead of a colon. The class names are +case-insensitive; only the short names listed above are recognized. . . .SS Extended grapheme clusters @@ -1336,14 +1240,19 @@ end of the subject in both modes, and if all branches of a pattern start with .sp Outside a character class, a dot in the pattern matches any one character in the subject string except (by default) a character that signifies the end of a -line. +line. One or more characters may be specified as line terminators (see +.\" HTML +.\" +"Newline conventions" +.\" +above). .P -When a line ending is defined as a single character, dot never matches that -character; when the two-character sequence CRLF is used, dot does not match CR -if it is immediately followed by LF, but otherwise it matches all characters -(including isolated CRs and LFs). When any Unicode line endings are being -recognized, dot does not match CR or LF or any of the other line ending -characters. +Dot never matches a single line-ending character. When the two-character +sequence CRLF is the only line ending, dot does not match CR if it is +immediately followed by LF, but otherwise it matches all characters (including +isolated CRs and LFs). When ANYCRLF is selected for line endings, no occurences +of CR of LF match dot. When all Unicode line endings are being recognized, dot +does not match CR or LF or any of the other line ending characters. .P The behaviour of dot with regard to newlines can be changed. If the PCRE2_DOTALL option is set, a dot matches any one character, without exception. @@ -2186,10 +2095,10 @@ be easier to remember: .sp (*atomic:\ed+)foo .sp -This kind of parenthesized group "locks up" the part of the pattern it -contains once it has matched, and a failure further into the pattern is -prevented from backtracking into it. Backtracking past it to previous items, -however, works as normal. +This kind of parenthesized group "locks up" the part of the pattern it contains +once it has matched, and a failure further into the pattern is prevented from +backtracking into it. Backtracking past it to previous items, however, works as +normal. .P An alternative description is that a group of this type matches exactly the string of characters that an identical standalone pattern would match, if @@ -3905,6 +3814,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 August 2021 -Copyright (c) 1997-2021 University of Cambridge. +Last updated: 12 January 2022 +Copyright (c) 1997-2022 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2serialize.3 b/pcre2/doc/pcre2serialize.3 index 85aee9ba9706f45ac5b40716684973caf8a67cda..987bc3a4c6f8fd482df37ffa104a6846fcb57139 100644 --- a/pcre2/doc/pcre2serialize.3 +++ b/pcre2/doc/pcre2serialize.3 @@ -6,11 +6,11 @@ PCRE2 - Perl-compatible regular expressions (revised API) .sp .nf .B int32_t pcre2_serialize_decode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, const uint32_t *\fIbytes\fP," +.B " int32_t \fInumber_of_codes\fP, const uint8_t *\fIbytes\fP," .B " pcre2_general_context *\fIgcontext\fP);" .sp -.B int32_t pcre2_serialize_encode(pcre2_code **\fIcodes\fP, -.B " int32_t \fInumber_of_codes\fP, uint32_t **\fIserialized_bytes\fP," +.B int32_t pcre2_serialize_encode(const pcre2_code **\fIcodes\fP, +.B " int32_t \fInumber_of_codes\fP, uint8_t **\fIserialized_bytes\fP," .B " PCRE2_SIZE *\fIserialized_size\fP, pcre2_general_context *\fIgcontext\fP);" .sp .B void pcre2_serialize_free(uint8_t *\fIbytes\fP); @@ -141,7 +141,6 @@ mangagement functions for the decoded patterns. If this argument is NULL, \fBmalloc()\fP and \fBfree()\fP are used. After deserialization, the byte stream is no longer needed and can be discarded. .sp - int32_t number_of_codes; pcre2_code *list_of_codes[2]; uint8_t *bytes = ; int32_t number_of_codes = diff --git a/pcre2/doc/pcre2syntax.3 b/pcre2/doc/pcre2syntax.3 index 937c8172d06daa8e3fc148787bfd47e580f7291f..c0a496f4f42e0d8f431bc6491fdcc252dcea7c59 100644 --- a/pcre2/doc/pcre2syntax.3 +++ b/pcre2/doc/pcre2syntax.3 @@ -1,4 +1,4 @@ -.TH PCRE2SYNTAX 3 "30 August 2021" "PCRE2 10.38" +.TH PCRE2SYNTAX 3 "12 January 2022" "PCRE2 10.40" .SH NAME PCRE2 - Perl-compatible regular expressions (revised API) .SH "PCRE2 REGULAR EXPRESSION SYNTAX SUMMARY" @@ -102,6 +102,10 @@ happening, \es and \ew may also match characters with code points in the range 128-255. If the PCRE2_UCP option is set, the behaviour of these escape sequences is changed to use Unicode properties and they match many more characters. +.P +Property descriptions in \ep and \eP are matched caselessly; hyphens, +underscores, and white space are ignored, in accordance with Unicode's "loose +matching" rules. . . .SH "GENERAL CATEGORY PROPERTIES FOR \ep and \eP" @@ -120,6 +124,7 @@ characters. Lo Other letter Lt Title case letter Lu Upper case letter + Lc Ll, Lu, or Lt L& Ll, Lu, or Lt .sp M Mark @@ -167,170 +172,59 @@ Perl and POSIX space are now the same. Perl added VT to its space character set at release 5.18. . . -.SH "SCRIPT NAMES FOR \ep AND \eP" +.SH "BINARY PROPERTIES FOR \ep AND \eP" +.rs +.sp +Unicode defines a number of binary properties, that is, properties whose only +values are true or false. You can obtain a list of those that are recognized by +\ep and \eP, along with their abbreviations, by running this command: +.sp + pcre2test -LP +. +. +. +.SH "SCRIPT MATCHING WITH \ep AND \eP" +.rs +.sp +Many script names and their 4-letter abbreviations are recognized in +\ep{sc:...} or \ep{scx:...} items, or on their own with \ep (and also \eP of +course). You can obtain a list of these scripts by running this command: +.sp + pcre2test -LS +. +. +. +.SH "THE BIDI_CLASS PROPERTY FOR \ep AND \eP" .rs .sp -Adlam, -Ahom, -Anatolian_Hieroglyphs, -Arabic, -Armenian, -Avestan, -Balinese, -Bamum, -Bassa_Vah, -Batak, -Bengali, -Bhaiksuki, -Bopomofo, -Brahmi, -Braille, -Buginese, -Buhid, -Canadian_Aboriginal, -Carian, -Caucasian_Albanian, -Chakma, -Cham, -Cherokee, -Chorasmian, -Common, -Coptic, -Cuneiform, -Cypriot, -Cypro_Minoan, -Cyrillic, -Deseret, -Devanagari, -Dives_Akuru, -Dogra, -Duployan, -Egyptian_Hieroglyphs, -Elbasan, -Elymaic, -Ethiopic, -Georgian, -Glagolitic, -Gothic, -Grantha, -Greek, -Gujarati, -Gunjala_Gondi, -Gurmukhi, -Han, -Hangul, -Hanifi_Rohingya, -Hanunoo, -Hatran, -Hebrew, -Hiragana, -Imperial_Aramaic, -Inherited, -Inscriptional_Pahlavi, -Inscriptional_Parthian, -Javanese, -Kaithi, -Kannada, -Katakana, -Kayah_Li, -Kharoshthi, -Khitan_Small_Script, -Khmer, -Khojki, -Khudawadi, -Lao, -Latin, -Lepcha, -Limbu, -Linear_A, -Linear_B, -Lisu, -Lycian, -Lydian, -Mahajani, -Makasar, -Malayalam, -Mandaic, -Manichaean, -Marchen, -Masaram_Gondi, -Medefaidrin, -Meetei_Mayek, -Mende_Kikakui, -Meroitic_Cursive, -Meroitic_Hieroglyphs, -Miao, -Modi, -Mongolian, -Mro, -Multani, -Myanmar, -Nabataean, -Nandinagari, -New_Tai_Lue, -Newa, -Nko, -Nushu, -Nyakeng_Puachue_Hmong, -Ogham, -Ol_Chiki, -Old_Hungarian, -Old_Italic, -Old_North_Arabian, -Old_Permic, -Old_Persian, -Old_Sogdian, -Old_South_Arabian, -Old_Turkic, -Old_Uyghur, -Oriya, -Osage, -Osmanya, -Pahawh_Hmong, -Palmyrene, -Pau_Cin_Hau, -Phags_Pa, -Phoenician, -Psalter_Pahlavi, -Rejang, -Runic, -Samaritan, -Saurashtra, -Sharada, -Shavian, -Siddham, -SignWriting, -Sinhala, -Sogdian, -Sora_Sompeng, -Soyombo, -Sundanese, -Syloti_Nagri, -Syriac, -Tagalog, -Tagbanwa, -Tai_Le, -Tai_Tham, -Tai_Viet, -Takri, -Tamil, -Tangsa, -Tangut, -Telugu, -Thaana, -Thai, -Tibetan, -Tifinagh, -Tirhuta, -Toto, -Ugaritic, -Vai, -Vithkuqi, -Wancho, -Warang_Citi, -Yezidi, -Yi, -Zanabazar_Square. + \ep{Bidi_Class:} matches a character with the given class + \ep{BC:} matches a character with the given class +.sp +The recognized classes are: +.sp + AL Arabic letter + AN Arabic number + B paragraph separator + BN boundary neutral + CS common separator + EN European number + ES European separator + ET European terminator + FSI first strong isolate + L left-to-right + LRE left-to-right embedding + LRI left-to-right isolate + LRO left-to-right override + NSM non-spacing mark + ON other neutral + PDF pop directional format + PDI pop directional isolate + R right-to-left + RLE right-to-left embedding + RLI right-to-left isolate + RLO right-to-left override + S segment separator + WS which space . . .SH "CHARACTER CLASSES" @@ -684,6 +578,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 August 2021 -Copyright (c) 1997-2021 University of Cambridge. +Last updated: 12 January 2022 +Copyright (c) 1997-2022 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2test.1 b/pcre2/doc/pcre2test.1 index d98e97441b998a4d1822146d5427048bdec09a5e..d374f3ea46413180f0785dd3e36ab134fe30ab8b 100644 --- a/pcre2/doc/pcre2test.1 +++ b/pcre2/doc/pcre2test.1 @@ -1,4 +1,4 @@ -.TH PCRE2TEST 1 "30 August 2021" "PCRE 10.38" +.TH PCRE2TEST 1 "12 January 2022" "PCRE 10.40" .SH NAME pcre2test - a program for testing Perl-compatible regular expressions. .SH SYNOPSIS @@ -47,7 +47,7 @@ format before being passed to the library functions. Results are converted back to 8-bit code units for output. .P In the rest of this document, the names of library functions and structures -are given in generic form, for example, \fBpcre_compile()\fP. The actual +are given in generic form, for example, \fBpcre2_compile()\fP. The actual names used in the libraries have a suffix _8, _16, or _32, as appropriate. . . @@ -211,7 +211,17 @@ available, and the use of JIT for matching is verified. \fB-LM\fP List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit code. All other options are ignored. -If both -C and -LM are present, whichever is first is recognized. +If both -C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LP\fP +List properties: write a list of recognized Unicode properties to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. +.TP 10 +\fB-LS\fP +List scripts: write a list of recogized Unicode script names to the standard +output, then exit with zero exit code. All other options are ignored. If both +-C and any -Lx options are present, whichever is first is recognized. .TP 10 \fB-pattern\fP \fImodifier-list\fP Behave as if each pattern line contains the given modifiers. @@ -1206,6 +1216,8 @@ pattern, but can be overridden by modifiers on the subject. match_limit= set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset= set starting offset offset_limit= set offset limit ovector= set size of output vector @@ -1629,7 +1641,7 @@ When testing \fBpcre2_substitute()\fP, this modifier also has the effect of passing the replacement string as zero-terminated. . . -.SS "Passing a NULL context" +.SS "Passing a NULL context, subject, or replacement" .rs .sp Normally, \fBpcre2test\fP passes a context block to \fBpcre2_match()\fP, @@ -1638,6 +1650,10 @@ If the \fBnull_context\fP modifier is set, however, NULL is passed. This is for testing that the matching and substitution functions behave correctly in this case (they use default values). This modifier cannot be used with the \fBfind_limits\fP or \fBsubstitute_callout\fP modifiers. +.P +Similarly, for testing purposes, if the \fBnull_subject\fP or +\fBnull_replacement\fP modifier is set, the subject or replacement string +pointers are passed as NULL, respectively, to the relevant functions. . . .SH "THE ALTERNATIVE MATCHING FUNCTION" @@ -2103,6 +2119,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 30 August 2021 -Copyright (c) 1997-2021 University of Cambridge. +Last updated: 12 January 2022 +Copyright (c) 1997-2022 University of Cambridge. .fi diff --git a/pcre2/doc/pcre2test.txt b/pcre2/doc/pcre2test.txt index 217bed5f03afe99e3c00962c47a2a4fb5ef3c8aa..ed7dd20ec92a68263a1a63d826d1a1eea3c324f7 100644 --- a/pcre2/doc/pcre2test.txt +++ b/pcre2/doc/pcre2test.txt @@ -44,7 +44,7 @@ PCRE2's 8-BIT, 16-BIT AND 32-BIT LIBRARIES output. In the rest of this document, the names of library functions and struc- - tures are given in generic form, for example, pcre_compile(). The ac- + tures are given in generic form, for example, pcre2_compile(). The ac- tual names used in the libraries have a suffix _8, _16, or _32, as ap- propriate. @@ -197,7 +197,17 @@ COMMAND LINE OPTIONS -LM List modifiers: write a list of available pattern and subject modifiers to the standard output, then exit with zero exit - code. All other options are ignored. If both -C and -LM are + code. All other options are ignored. If both -C and any -Lx + options are present, whichever is first is recognized. + + -LP List properties: write a list of recognized Unicode proper- + ties to the standard output, then exit with zero exit code. + All other options are ignored. If both -C and any -Lx options + are present, whichever is first is recognized. + + -LS List scripts: write a list of recogized Unicode script names + to the standard output, then exit with zero exit code. All + other options are ignored. If both -C and any -Lx options are present, whichever is first is recognized. -pattern modifier-list @@ -1111,6 +1121,8 @@ SUBJECT MODIFIERS match_limit= set a match limit memory show heap memory usage null_context match with a NULL context + null_replacement substitute with NULL replacement + null_subject match with NULL subject offset= set starting offset offset_limit= set offset limit ovector= set size of output vector @@ -1499,7 +1511,7 @@ SUBJECT MODIFIERS When testing pcre2_substitute(), this modifier also has the effect of passing the replacement string as zero-terminated. - Passing a NULL context + Passing a NULL context, subject, or replacement Normally, pcre2test passes a context block to pcre2_match(), pcre2_dfa_match(), pcre2_jit_match() or pcre2_substitute(). If the @@ -1508,6 +1520,10 @@ SUBJECT MODIFIERS in this case (they use default values). This modifier cannot be used with the find_limits or substitute_callout modifiers. + Similarly, for testing purposes, if the null_subject or null_replace- + ment modifier is set, the subject or replacement string pointers are + passed as NULL, respectively, to the relevant functions. + THE ALTERNATIVE MATCHING FUNCTION @@ -1933,5 +1949,5 @@ AUTHOR REVISION - Last updated: 30 August 2021 - Copyright (c) 1997-2021 University of Cambridge. + Last updated: 12 January 2022 + Copyright (c) 1997-2022 University of Cambridge. diff --git a/pcre2/doc/pcre2unicode.3 b/pcre2/doc/pcre2unicode.3 index 055a4ce42e59cacf3c42ca7083de61aa1de9465f..e7e37a395eb5798f65a28f9e5aff0ecab02f554a 100644 --- a/pcre2/doc/pcre2unicode.3 +++ b/pcre2/doc/pcre2unicode.3 @@ -1,4 +1,4 @@ -.TH PCRE2UNICODE 3 "23 February 2020" "PCRE2 10.35" +.TH PCRE2UNICODE 3 "22 December 2021" "PCRE2 10.40" .SH NAME PCRE - Perl-compatible regular expressions (revised API) .SH "UNICODE AND UTF SUPPORT" @@ -40,10 +40,11 @@ handled, as documented below. .sp When PCRE2 is built with Unicode support, the escape sequences \ep{..}, \eP{..}, and \eX can be used. This is not dependent on the PCRE2_UTF setting. -The Unicode properties that can be tested are limited to the general category -properties such as Lu for an upper case letter or Nd for a decimal number, the -Unicode script names such as Arabic or Han, and the derived properties Any and -L&. Full lists are given in the +The Unicode properties that can be tested are a subset of those that Perl +supports. Currently they are limited to the general category properties such as +Lu for an upper case letter or Nd for a decimal number, the Unicode script +names such as Arabic or Han, Bidi_Class, Bidi_Control, and the derived +properties Any and LC (synonym L&). Full lists are given in the .\" HREF \fBpcre2pattern\fP .\" @@ -51,10 +52,10 @@ and .\" HREF \fBpcre2syntax\fP .\" -documentation. Only the short names for properties are supported. For example, -\ep{L} matches a letter. Its Perl synonym, \ep{Letter}, is not supported. -Furthermore, in Perl, many properties may optionally be prefixed by "Is", for -compatibility with Perl 5.6. PCRE2 does not support this. +documentation. In general, only the short names for properties are supported. +For example, \ep{L} matches a letter. Its longer synonym, \ep{Letter}, is not +supported. Furthermore, in Perl, many properties may optionally be prefixed by +"Is", for compatibility with Perl 5.6. PCRE2 does not support this. . . .SH "WIDE CHARACTERS AND UTF MODES" @@ -448,7 +449,7 @@ can be useful when searching for UTF text in executable or other binary files. .sp .nf Philip Hazel -University Computing Service +Retired from University Computing Service Cambridge, England. .fi . @@ -457,6 +458,6 @@ Cambridge, England. .rs .sp .nf -Last updated: 23 February 2020 -Copyright (c) 1997-2020 University of Cambridge. +Last updated: 22 December 2021 +Copyright (c) 1997-2021 University of Cambridge. .fi diff --git a/pcre2/maint/GenerateCommon.py b/pcre2/maint/GenerateCommon.py new file mode 100644 index 0000000000000000000000000000000000000000..03f9ac559111b0779047933bfc63f206d7040604 --- /dev/null +++ b/pcre2/maint/GenerateCommon.py @@ -0,0 +1,355 @@ +#! /usr/bin/python + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This file is a Python module containing common lists and functions for the +# GenerateXXX scripts that create various.c and .h files from Unicode data +# files. It was created as part of a re-organizaton of these scripts in +# December 2021. + + +import re + + +# --------------------------------------------------------------------------- +# DATA LISTS +# --------------------------------------------------------------------------- + +# BIDI classes in the DerivedBidiClass.txt file, with comments. + +bidi_classes = [ + 'AL', 'Arabic letter', + 'AN', 'Arabic number', + 'B', 'Paragraph separator', + 'BN', 'Boundary neutral', + 'CS', 'Common separator', + 'EN', 'European number', + 'ES', 'European separator', + 'ET', 'European terminator', + 'FSI', 'First strong isolate', + 'L', 'Left to right', + 'LRE', 'Left to right embedding', + 'LRI', 'Left to right isolate', + 'LRO', 'Left to right override', + 'NSM', 'Non-spacing mark', + 'ON', 'Other neutral', + 'PDF', 'Pop directional format', + 'PDI', 'Pop directional isolate', + 'R', 'Right to left', + 'RLE', 'Right to left embedding', + 'RLI', 'Right to left isolate', + 'RLO', 'Right to left override', + 'S', 'Segment separator', + 'WS', 'White space' + ] + +# Particular category property names, with comments. NOTE: If ever this list +# is changed, the table called "catposstab" in the pcre2_auto_possess.c file +# must be edited to keep in step. + +category_names = [ + 'Cc', 'Control', + 'Cf', 'Format', + 'Cn', 'Unassigned', + 'Co', 'Private use', + 'Cs', 'Surrogate', + 'Ll', 'Lower case letter', + 'Lm', 'Modifier letter', + 'Lo', 'Other letter', + 'Lt', 'Title case letter', + 'Lu', 'Upper case letter', + 'Mc', 'Spacing mark', + 'Me', 'Enclosing mark', + 'Mn', 'Non-spacing mark', + 'Nd', 'Decimal number', + 'Nl', 'Letter number', + 'No', 'Other number', + 'Pc', 'Connector punctuation', + 'Pd', 'Dash punctuation', + 'Pe', 'Close punctuation', + 'Pf', 'Final punctuation', + 'Pi', 'Initial punctuation', + 'Po', 'Other punctuation', + 'Ps', 'Open punctuation', + 'Sc', 'Currency symbol', + 'Sk', 'Modifier symbol', + 'Sm', 'Mathematical symbol', + 'So', 'Other symbol', + 'Zl', 'Line separator', + 'Zp', 'Paragraph separator', + 'Zs', 'Space separator' + ] + +# The Extended_Pictographic property is not found in the file where all the +# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt +# file, but we list it here so that the name has the correct index value. + +break_properties = [ + 'CR', ' 0', + 'LF', ' 1', + 'Control', ' 2', + 'Extend', ' 3', + 'Prepend', ' 4', + 'SpacingMark', ' 5', + 'L', ' 6 Hangul syllable type L', + 'V', ' 7 Hangul syllable type V', + 'T', ' 8 Hangul syllable type T', + 'LV', ' 9 Hangul syllable type LV', + 'LVT', '10 Hangul syllable type LVT', + 'Regional_Indicator', '11', + 'Other', '12', + 'ZWJ', '13', + 'Extended_Pictographic', '14' + ] + +# List of files from which the names of Boolean properties are obtained, along +# with a list of regex patterns for properties to be ignored, and a list of +# extra pattern names to add. + +bool_propsfiles = ['PropList.txt', 'DerivedCoreProperties.txt', 'emoji-data.txt'] +bool_propsignore = [r'^Other_', r'^Hyphen$'] +bool_propsextras = ['ASCII', 'Bidi_Mirrored'] + + +# --------------------------------------------------------------------------- +# GET BOOLEAN PROPERTY NAMES +# --------------------------------------------------------------------------- + +# Get a list of Boolean property names from a number of files. + +def getbpropslist(): + bplist = [] + bplast = "" + + for filename in bool_propsfiles: + try: + file = open('Unicode.tables/' + filename, 'r') + except IOError: + print(f"** Couldn't open {'Unicode.tables/' + filename}\n") + sys.exit(1) + + for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1 or data[1] == bplast: + continue + bplast = data[1] + for pat in bool_propsignore: + if re.match(pat, bplast) != None: + break + else: + bplist.append(bplast) + + file.close() + + bplist.extend(bool_propsextras) + bplist.sort() + return bplist + +bool_properties = getbpropslist() +bool_props_list_item_size = (len(bool_properties) + 31) // 32 + + + +# --------------------------------------------------------------------------- +# COLLECTING PROPERTY NAMES AND ALIASES +# --------------------------------------------------------------------------- + +script_names = ['Unknown'] +abbreviations = {} + +def collect_property_names(): + global script_names + global abbreviations + + names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_]+) #') + + last_script_name = "" + with open("Unicode.tables/Scripts.txt") as f: + for line in f: + match_obj = names_re.match(line) + + if match_obj == None or match_obj.group(1) == last_script_name: + continue + + last_script_name = match_obj.group(1) + script_names.append(last_script_name) + + # Sometimes there is comment in the line + # so splitting around semicolon is not enough + value_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_ ]+))?') + + with open("Unicode.tables/PropertyValueAliases.txt") as f: + for line in f: + match_obj = value_alias_re.match(line) + + if match_obj == None: + continue + + if match_obj.group(1) == "sc": + if match_obj.group(2) == match_obj.group(3): + abbreviations[match_obj.group(3)] = () + elif match_obj.group(4) == None: + abbreviations[match_obj.group(3)] = (match_obj.group(2),) + else: + abbreviations[match_obj.group(3)] = (match_obj.group(2), match_obj.group(4)) + + # We can also collect Boolean property abbreviations into the same dictionary + + bin_alias_re = re.compile(r' *([A-Za-z_]+) *; *([A-Za-z_]+)(?: *; *([A-Za-z_]+))?') + with open("Unicode.tables/PropertyAliases.txt") as f: + for line in f: + match_obj = bin_alias_re.match(line) + if match_obj == None: + continue + + if match_obj.group(2) in bool_properties: + if match_obj.group(3) == None: + abbreviations[match_obj.group(2)] = (match_obj.group(1),) + else: + abbreviations[match_obj.group(2)] = (match_obj.group(1), match_obj.group(3)) + +collect_property_names() + + + +# --------------------------------------------------------------------------- +# REORDERING SCRIPT NAMES +# --------------------------------------------------------------------------- + +script_abbrevs = [] + +def reorder_scripts(): + global script_names + global script_abbrevs + global abbreviations + + for name in script_names: + abbrevs = abbreviations[name] + script_abbrevs.append(name if len(abbrevs) == 0 else abbrevs[0]) + + extended_script_abbrevs = set() + with open("Unicode.tables/ScriptExtensions.txt") as f: + names_re = re.compile(r'^[0-9A-F]{4,6}(?:\.\.[0-9A-F]{4,6})? +; ([A-Za-z_ ]+) #') + + for line in f: + match_obj = names_re.match(line) + + if match_obj == None: + continue + + for name in match_obj.group(1).split(" "): + extended_script_abbrevs.add(name) + + new_script_names = [] + new_script_abbrevs = [] + + for idx, abbrev in enumerate(script_abbrevs): + if abbrev in extended_script_abbrevs: + new_script_names.append(script_names[idx]) + new_script_abbrevs.append(abbrev) + + for idx, abbrev in enumerate(script_abbrevs): + if abbrev not in extended_script_abbrevs: + new_script_names.append(script_names[idx]) + new_script_abbrevs.append(abbrev) + + script_names = new_script_names + script_abbrevs = new_script_abbrevs + +reorder_scripts() +script_list_item_size = (script_names.index('Unknown') + 31) // 32 + + +# --------------------------------------------------------------------------- +# DERIVED LISTS +# --------------------------------------------------------------------------- + +# Create general character property names from the first letters of the +# particular categories. + +gcn_set = set(category_names[i][0] for i in range(0, len(category_names), 2)) +general_category_names = list(gcn_set) +general_category_names.sort() + + +# --------------------------------------------------------------------------- +# FUNCTIONS +# --------------------------------------------------------------------------- + +import sys + +# Open an output file, using the command's argument or a default. Write common +# preliminary header information. + +def open_output(default): + if len(sys.argv) > 2: + print('** Too many arguments: just give a file name') + sys.exit(1) + if len(sys.argv) == 2: + output_name = sys.argv[1] + else: + output_name = default + try: + file = open(output_name, "w") + except IOError: + print ("** Couldn't open %s" % output_name) + sys.exit(1) + + script_name = sys.argv[0] + i = script_name.rfind('/') + if i >= 0: + script_name = script_name[i+1:] + + file.write("""\ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Original API code Copyright (c) 1997-2012 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge + +This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY! +""") + + file.write("Instead, modify the maint/%s script and run it to generate\n" + "a new version of this code.\n\n" % script_name) + + file.write("""\ +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ +\n""") + return file + +# End of UcpCommon.py diff --git a/pcre2/maint/GenerateTest26.py b/pcre2/maint/GenerateTest26.py new file mode 100644 index 0000000000000000000000000000000000000000..2afdf25e5d041d1d6a721aefc308a6c368c95c00 --- /dev/null +++ b/pcre2/maint/GenerateTest26.py @@ -0,0 +1,188 @@ +#! /usr/bin/python + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ +# +# This file auto-generates unicode property tests and their expected output. +# It is recommended to re-run this generator after the unicode files are +# updated. The names of the generated files are `testinput26` and `testoutput26` + +import re +import sys + +from GenerateCommon import \ + script_names, \ + script_abbrevs + +def write_both(text): + input_file.write(text) + output_file.write(text) + +def to_string_char(ch_idx): + if ch_idx < 128: + if ch_idx < 16: + return "\\x{0%x}" % ch_idx + if ch_idx >= 32: + return chr(ch_idx) + return "\\x{%x}" % ch_idx + +output_directory = "" + +if len(sys.argv) > 2: + print('** Too many arguments: just give a directory name') + sys.exit(1) +if len(sys.argv) == 2: + output_directory = sys.argv[1] + if not output_directory.endswith("/"): + output_directory += "/" + +try: + input_file = open(output_directory + "testinput26", "w") + output_file = open(output_directory + "testoutput26", "w") +except IOError: + print ("** Couldn't open output files") + sys.exit(1) + +write_both("# These tests are generated by maint/GenerateTest26.py, do not edit.\n\n") + +# --------------------------------------------------------------------------- +# UNICODE SCRIPT EXTENSION TESTS +# --------------------------------------------------------------------------- + +write_both("# Unicode Script Extension tests.\n\n") + +def gen_script_tests(): + script_data = [None] * len(script_names) + char_data = [None] * 0x110000 + + property_re = re.compile("^([0-9A-F]{4,6})(?:\\.\\.([0-9A-F]{4,6}))? +; ([A-Za-z_ ]+) #") + prev_name = "" + script_idx = -1 + + with open("Unicode.tables/Scripts.txt") as f: + for line in f: + match_obj = property_re.match(line) + + if match_obj == None: + continue + + name = match_obj.group(3) + if name != prev_name: + script_idx = script_names.index(name) + prev_name = name + + low = int(match_obj.group(1), 16) + high = low + char_data[low] = name + + if match_obj.group(2) != None: + high = int(match_obj.group(2), 16) + for idx in range(low + 1, high + 1): + char_data[idx] = name + + if script_data[script_idx] == None: + script_data[script_idx] = [low, None, None, None, None] + script_data[script_idx][1] = high + + extended_script_indicies = {} + + with open("Unicode.tables/ScriptExtensions.txt") as f: + for line in f: + match_obj = property_re.match(line) + + if match_obj == None: + continue + + low = int(match_obj.group(1), 16) + high = low + if match_obj.group(2) != None: + high = int(match_obj.group(2), 16) + + for abbrev in match_obj.group(3).split(" "): + if abbrev not in extended_script_indicies: + idx = script_abbrevs.index(abbrev) + extended_script_indicies[abbrev] = idx + rec = script_data[idx] + rec[2] = low + rec[3] = high + else: + idx = extended_script_indicies[abbrev] + rec = script_data[idx] + if rec[2] > low: + rec[2] = low + if rec[3] < high: + rec[3] = high + + if rec[4] == None: + name = script_names[idx] + for idx in range(low, high + 1): + if char_data[idx] != name: + rec[4] = idx + break + + long_property_name = False + + for idx, rec in enumerate(script_data): + script_name = script_names[idx] + + if script_name == "Unknown": + continue + + script_abbrev = script_abbrevs[idx] + + write_both("# Base script check\n") + write_both("/^\\p{sc=%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[0])) + output_file.write(" 0: %s\n" % to_string_char(rec[0])) + write_both("\n") + + write_both("/^\\p{Script=%s}/utf\n" % script_abbrev) + write_both(" %s\n" % to_string_char(rec[1])) + output_file.write(" 0: %s\n" % to_string_char(rec[1])) + write_both("\n") + + if rec[2] != None: + property_name = "scx" + if long_property_name: + property_name = "Script_Extensions" + + write_both("# Script extension check\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[2])) + output_file.write(" 0: %s\n" % to_string_char(rec[2])) + write_both("\n") + + write_both("/^\\p{%s=%s}/utf\n" % (property_name, script_abbrev)) + write_both(" %s\n" % to_string_char(rec[3])) + output_file.write(" 0: %s\n" % to_string_char(rec[3])) + write_both("\n") + + long_property_name = not long_property_name + + if rec[4] != None: + write_both("# Script extension only character\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[4])) + output_file.write(" 0: %s\n" % to_string_char(rec[4])) + write_both("\n") + + write_both("/^\\p{sc=%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(rec[4])) + output_file.write("No match\n") + write_both("\n") + else: + print("External character has not found for %s" % script_name) + + high = rec[1] + if rec[3] != None and rec[3] > rec[1]: + high = rec[3] + write_both("# Character not in script\n") + write_both("/^\\p{%s}/utf\n" % script_name) + write_both(" %s\n" % to_string_char(high + 1)) + output_file.write("No match\n") + write_both("\n") + + +gen_script_tests() + +write_both("# End of testinput26\n") diff --git a/pcre2/maint/GenerateUcd.py b/pcre2/maint/GenerateUcd.py new file mode 100644 index 0000000000000000000000000000000000000000..6081800d0ce2a56295408e6252b77da262e26318 --- /dev/null +++ b/pcre2/maint/GenerateUcd.py @@ -0,0 +1,923 @@ +#! /usr/bin/python + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ +# +# This script generates the pcre2_ucd.c file from Unicode data files. This is +# the compressed Unicode property data used by PCRE2. The script was created in +# December 2021 as part of the Unicode data generation refactoring. It is +# basically a re-working of the MultiStage2.py script that was submitted to the +# PCRE project by Peter Kankowski in 2008 as part of a previous upgrading of +# Unicode property support. A number of extensions have since been added. The +# main difference in the 2021 upgrade (apart from comments and layout) is that +# the data tables (e.g. list of script names) are now listed in or generated by +# a separate Python module that is shared with the other Generate scripts. +# +# This script must be run in the "maint" directory. It requires the following +# Unicode data tables: BidiMirrorring.txt, CaseFolding.txt, +# DerivedBidiClass.txt, DerivedCoreProperties.txt, DerivedGeneralCategory.txt, +# GraphemeBreakProperty.txt, PropList.txt, PropertyAliases.txt, +# PropertyValueAliases.txt, ScriptExtensions.txt, Scripts.txt, and +# emoji-data.txt. These must be in the Unicode.tables subdirectory. +# +# The emoji-data.txt file is found in the "emoji" subdirectory even though it +# is technically part of a different (but coordinated) standard as shown +# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"), +# for example: +# +# http://unicode.org/Public/emoji/13.0/ReadMe.txt +# +# DerivedBidiClass.txt and DerivedGeneralCategory.txt are in the "extracted" +# subdirectory of the Unicode database (UCD) on the Unicode web site; +# GraphemeBreakProperty.txt is in the "auxiliary" subdirectory. The other files +# are in the top-level UCD directory. +# +# ----------------------------------------------------------------------------- +# Minor modifications made to the original script: +# Added #! line at start +# Removed tabs +# Made it work with Python 2.4 by rewriting two statements that needed 2.5 +# Consequent code tidy +# Adjusted data file names to take from the Unicode.tables directory +# Adjusted global table names by prefixing _pcre_. +# Commented out stuff relating to the casefolding table, which isn't used; +# removed completely in 2012. +# Corrected size calculation +# Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed. +# Update for PCRE2: name changes, and SUPPORT_UCP is abolished. +# +# Major modifications made to the original script: +# Added code to add a grapheme break property field to records. +# +# Added code to search for sets of more than two characters that must match +# each other caselessly. A new table is output containing these sets, and +# offsets into the table are added to the main output records. This new +# code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer +# used. +# +# Update for Python3: +# . Processed with 2to3, but that didn't fix everything +# . Changed string.strip to str.strip +# . Added encoding='utf-8' to the open() call +# . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is +# required and the result of the division is a float +# +# Added code to scan the emoji-data.txt file to find the Extended Pictographic +# property, which is used by PCRE2 as a grapheme breaking property. This was +# done when updating to Unicode 11.0.0 (July 2018). +# +# Added code to add a Script Extensions field to records. This has increased +# their size from 8 to 12 bytes, only 10 of which are currently used. +# +# Added code to add a bidi class field to records by scanning the +# DerivedBidiClass.txt and PropList.txt files. This uses one of the two spare +# bytes, so now 11 out of 12 are in use. +# +# 01-March-2010: Updated list of scripts for Unicode 5.2.0 +# 30-April-2011: Updated list of scripts for Unicode 6.0.0 +# July-2012: Updated list of scripts for Unicode 6.1.0 +# 20-August-2012: Added scan of GraphemeBreakProperty.txt and added a new +# field in the record to hold the value. Luckily, the +# structure had a hole in it, so the resulting table is +# not much bigger than before. +# 18-September-2012: Added code for multiple caseless sets. This uses the +# final hole in the structure. +# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0 +# 13-May-2014: Updated for PCRE2 +# 03-June-2014: Updated for Python 3 +# 20-June-2014: Updated for Unicode 7.0.0 +# 12-August-2014: Updated to put Unicode version into the file +# 19-June-2015: Updated for Unicode 8.0.0 +# 02-July-2017: Updated for Unicode 10.0.0 +# 03-July-2018: Updated for Unicode 11.0.0 +# 07-July-2018: Added code to scan emoji-data.txt for the Extended +# Pictographic property. +# 01-October-2018: Added the 'Unknown' script name +# 03-October-2018: Added new field for Script Extensions +# 27-July-2019: Updated for Unicode 12.1.0 +# 10-March-2020: Updated for Unicode 13.0.0 +# PCRE2-10.39: Updated for Unicode 14.0.0 +# 05-December-2021: Added code to scan DerivedBidiClass.txt for bidi class, +# and also PropList.txt for the Bidi_Control property +# 19-December-2021: Reworked script extensions lists to be bit maps instead +# of zero-terminated lists of script numbers. +# ---------------------------------------------------------------------------- +# +# Changes to the refactored script: +# +# 26-December-2021: Refactoring completed +# 10-January-2022: Addition of general Boolean property support +# 12-January-2022: Merge scriptx and bidiclass fields +# 14-January-2022: Enlarge Boolean property offset to 12 bits +# +# ---------------------------------------------------------------------------- +# +# +# The main tables generated by this script are used by macros defined in +# pcre2_internal.h. They look up Unicode character properties using short +# sequences of code that contains no branches, which makes for greater speed. +# +# Conceptually, there is a table of records (of type ucd_record), one for each +# Unicode character. Each record contains the script number, script extension +# value, character type, grapheme break type, offset to caseless matching set, +# offset to the character's other case, the bidi class, and offset to bitmap of +# Boolean properties. +# +# A real table covering all Unicode characters would be far too big. It can be +# efficiently compressed by observing that many characters have the same +# record, and many blocks of characters (taking 128 characters in a block) have +# the same set of records as other blocks. This leads to a 2-stage lookup +# process. +# +# This script constructs seven tables. The ucd_caseless_sets table contains +# lists of characters that all match each other caselessly. Each list is +# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than +# any valid character. The first list is empty; this is used for characters +# that are not part of any list. +# +# The ucd_digit_sets table contains the code points of the '9' characters in +# each set of 10 decimal digits in Unicode. This is used to ensure that digits +# in script runs all come from the same set. The first element in the vector +# contains the number of subsequent elements, which are in ascending order. +# +# Scripts are partitioned into two groups. Scripts that appear in at least one +# character's script extension list come first, followed by "Unknown" and then +# all the rest. This sorting is done automatically in the GenerateCommon.py +# script. A script's number is its index in the script_names list. +# +# The ucd_script_sets table contains bitmaps that represent lists of scripts +# for Script Extensions properties. Each bitmap consists of a fixed number of +# unsigned 32-bit numbers, enough to allocate a bit for every script that is +# used in any character's extension list, that is, enough for every script +# whose number is less than ucp_Unknown. A character's script extension value +# in its ucd record is an offset into the ucd_script_sets vector. The first +# bitmap has no bits set; characters that have no script extensions have zero +# as their script extensions value so that they use this map. +# +# The ucd_boolprop_sets table contains bitmaps that represent lists of Boolean +# properties. Each bitmap consists of a fixed number of unsigned 32-bit +# numbers, enough to allocate a bit for each supported Boolean property. +# +# The ucd_records table contains one instance of every unique character record +# that is required. The ucd_stage1 table is indexed by a character's block +# number, which is the character's code point divided by 128, since 128 is the +# size of each block. The result of a lookup in ucd_stage1 a "virtual" block +# number. +# +# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by +# the offset of a character within its own block, and the result is the index +# number of the required record in the ucd_records vector. +# +# The following examples are correct for the Unicode 14.0.0 database. Future +# updates may make change the actual lookup values. +# +# Example: lowercase "a" (U+0061) is in block 0 +# lookup 0 in stage1 table yields 0 +# lookup 97 (0x61) in the first table in stage2 yields 35 +# record 35 is { 0, 5, 12, 0, -32, 18432, 44 } +# 0 = ucp_Latin => Latin script +# 5 = ucp_Ll => Lower case letter +# 12 = ucp_gbOther => Grapheme break property "Other" +# 0 => Not part of a caseless set +# -32 (-0x20) => Other case is U+0041 +# 18432 = 0x4800 => Combined Bidi class + script extension values +# 44 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 9 = ucp_bidiL => Bidi class left-to-right +# 0 => No special script extension property +# +# Almost all lowercase latin characters resolve to the same record. One or two +# are different because they are part of a multi-character caseless set (for +# example, k, K and the Kelvin symbol are such a set). +# +# Example: hiragana letter A (U+3042) is in block 96 (0x60) +# lookup 96 in stage1 table yields 93 +# lookup 66 (0x42) in table 93 in stage2 yields 819 +# record 819 is { 20, 7, 12, 0, 0, 18432, 82 } +# 20 = ucp_Hiragana => Hiragana script +# 7 = ucp_Lo => Other letter +# 12 = ucp_gbOther => Grapheme break property "Other" +# 0 => Not part of a caseless set +# 0 => No other case +# 18432 = 0x4800 => Combined Bidi class + script extension values +# 82 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 9 = ucp_bidiL => Bidi class left-to-right +# 0 => No special script extension property +# +# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39) +# lookup 57 in stage1 table yields 55 +# lookup 80 (0x50) in table 55 in stage2 yields 621 +# record 621 is { 84, 12, 3, 0, 0, 26762, 96 } +# 84 = ucp_Inherited => Script inherited from predecessor +# 12 = ucp_Mn => Non-spacing mark +# 3 = ucp_gbExtend => Grapheme break property "Extend" +# 0 => Not part of a caseless set +# 0 => No other case +# 26762 = 0x688A => Combined Bidi class + script extension values +# 96 => Offset to Boolean properties +# +# The top 5 bits of the sixth field are the Bidi class, with the rest being the +# script extension value, giving: +# +# 13 = ucp_bidiNSM => Bidi class non-spacing mark +# 138 => Script Extension list offset = 138 +# +# At offset 138 in the ucd_script_sets vector we find a bitmap with bits 1, 8, +# 18, and 47 set. This means that this character is expected to be used with +# any of those scripts, which are Bengali, Devanagari, Kannada, and Grantha. +# +# Philip Hazel, last updated 14 January 2022. +############################################################################## + + +# Import standard modules + +import re +import string +import sys + +# Import common data lists and functions + +from GenerateCommon import \ + bidi_classes, \ + bool_properties, \ + bool_propsfiles, \ + bool_props_list_item_size, \ + break_properties, \ + category_names, \ + general_category_names, \ + script_abbrevs, \ + script_list_item_size, \ + script_names, \ + open_output + +# Some general parameters + +MAX_UNICODE = 0x110000 +NOTACHAR = 0xffffffff + + +# --------------------------------------------------------------------------- +# DEFINE FUNCTIONS +# --------------------------------------------------------------------------- + + +# Parse a line of Scripts.txt, GraphemeBreakProperty.txt, DerivedBidiClass.txt +# or DerivedGeneralCategory.txt + +def make_get_names(enum): + return lambda chardata: enum.index(chardata[1]) + + +# Parse a line of CaseFolding.txt + +def get_other_case(chardata): + if chardata[1] == 'C' or chardata[1] == 'S': + return int(chardata[2], 16) - int(chardata[0], 16) + return 0 + + +# Parse a line of ScriptExtensions.txt + +def get_script_extension(chardata): + global last_script_extension + + offset = len(script_lists) * script_list_item_size + if last_script_extension == chardata[1]: + return offset - script_list_item_size + + last_script_extension = chardata[1] + script_lists.append(tuple(script_abbrevs.index(abbrev) for abbrev in last_script_extension.split(' '))) + return offset + + +# Read a whole table in memory, setting/checking the Unicode version + +def read_table(file_name, get_value, default_value): + global unicode_version + + f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name) + file_base = f.group(1) + version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$" + file = open(file_name, 'r', encoding='utf-8') + f = re.match(version_pat, file.readline()) + version = f.group(1) + if unicode_version == "": + unicode_version = version + elif unicode_version != version: + print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr) + + table = [default_value] * MAX_UNICODE + for line in file: + line = re.sub(r'#.*', '', line) + chardata = list(map(str.strip, line.split(';'))) + if len(chardata) <= 1: + continue + value = get_value(chardata) + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + for i in range(char, last + 1): + # It is important not to overwrite a previously set value because in the + # CaseFolding file there are lines to be ignored (returning the default + # value of 0) which often come after a line which has already set data. + if table[i] == default_value: + table[i] = value + file.close() + return table + + +# Get the smallest possible C language type for the values in a table + +def get_type_size(table): + type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4), + ("signed char", 1), ("int16_t", 2), ("int32_t", 4)] + limits = [(0, 255), (0, 65535), (0, 4294967295), (-128, 127), + (-32768, 32767), (-2147483648, 2147483647)] + minval = min(table) + maxval = max(table) + for num, (minlimit, maxlimit) in enumerate(limits): + if minlimit <= minval and maxval <= maxlimit: + return type_size[num] + raise OverflowError("Too large to fit into C types") + + +# Get the total size of a list of tables + +def get_tables_size(*tables): + total_size = 0 + for table in tables: + type, size = get_type_size(table) + total_size += size * len(table) + return total_size + + +# Compress a table into the two stages + +def compress_table(table, block_size): + blocks = {} # Dictionary for finding identical blocks + stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table) + stage2 = [] # Stage 2 table contains the blocks with property values + table = tuple(table) + for i in range(0, len(table), block_size): + block = table[i:i+block_size] + start = blocks.get(block) + if start is None: + # Allocate a new block + start = len(stage2) / block_size + stage2 += block + blocks[block] = start + stage1.append(start) + return stage1, stage2 + + +# Output a table + +def write_table(table, table_name, block_size = None): + type, size = get_type_size(table) + ELEMS_PER_LINE = 16 + + s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table)) + if block_size: + s += ", block = %d" % block_size + f.write(s + " */\n") + table = tuple(table) + if block_size is None: + fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */\n" + mult = MAX_UNICODE / len(table) + for i in range(0, len(table), ELEMS_PER_LINE): + f.write(fmt % (table[i:i+ELEMS_PER_LINE] + (int(i * mult),))) + else: + if block_size > ELEMS_PER_LINE: + el = ELEMS_PER_LINE + else: + el = block_size + fmt = "%3d," * el + "\n" + if block_size > ELEMS_PER_LINE: + fmt = fmt * int(block_size / ELEMS_PER_LINE) + for i in range(0, len(table), block_size): + f.write(("\n/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size])) + f.write("};\n\n") + + +# Extract the unique combinations of properties into records + +def combine_tables(*tables): + records = {} + index = [] + for t in zip(*tables): + i = records.get(t) + if i is None: + i = records[t] = len(records) + index.append(i) + return index, records + + +# Create a record struct + +def get_record_size_struct(records): + size = 0 + structure = 'typedef struct {\n' + for i in range(len(records[0])): + record_slice = [record[i] for record in records] + slice_type, slice_size = get_type_size(record_slice) + # add padding: round up to the nearest power of slice_size + size = (size + slice_size - 1) & -slice_size + size += slice_size + structure += '%s property_%d;\n' % (slice_type, i) + + # round up to the first item of the next structure in array + record_slice = [record[0] for record in records] + slice_type, slice_size = get_type_size(record_slice) + size = (size + slice_size - 1) & -slice_size + + structure += '} ucd_record;\n*/\n' + return size, structure + + +# Write records + +def write_records(records, record_size): + f.write('const ucd_record PRIV(ucd_records)[] = { ' + \ + '/* %d bytes, record size %d */\n' % (len(records) * record_size, record_size)) + records = list(zip(list(records.keys()), list(records.values()))) + records.sort(key = lambda x: x[1]) + for i, record in enumerate(records): + f.write((' {' + '%6d, ' * len(record[0]) + '}, /* %3d */\n') % (record[0] + (i,))) + f.write('};\n\n') + + +# Write a bit set + +def write_bitsets(list, item_size): + for d in list: + bitwords = [0] * item_size + for idx in d: + bitwords[idx // 32] |= 1 << (idx & 31) + s = " " + for x in bitwords: + f.write("%s" % s) + s = ", " + f.write("0x%08xu" % x) + f.write(",\n") + f.write("};\n\n") + + +# --------------------------------------------------------------------------- +# This bit of code must have been useful when the original script was being +# developed. Retain it just in case it is ever needed again. + +# def test_record_size(): +# tests = [ \ +# ( [(3,), (6,), (6,), (1,)], 1 ), \ +# ( [(300,), (600,), (600,), (100,)], 2 ), \ +# ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \ +# ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \ +# ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ +# ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ +# ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \ +# ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \ +# ] +# for test in tests: +# size, struct = get_record_size_struct(test[0]) +# assert(size == test[1]) +# test_record_size() +# --------------------------------------------------------------------------- + + + +# --------------------------------------------------------------------------- +# MAIN CODE FOR CREATING TABLES +# --------------------------------------------------------------------------- + +unicode_version = "" + +# Some of the tables imported from GenerateCommon.py have alternate comment +# strings for use by GenerateUcpHeader. The comments are not wanted here, so +# remove them. + +bidi_classes = bidi_classes[::2] +break_properties = break_properties[::2] +category_names = category_names[::2] + +# Create the various tables from Unicode data files + +script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown')) +category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn')) +break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_properties), break_properties.index('Other')) +other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0) +bidi_class = read_table('Unicode.tables/DerivedBidiClass.txt', make_get_names(bidi_classes), bidi_classes.index('L')) + +# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now +# we need to find the Extended_Pictographic property for emoji characters. This +# can be set as an additional grapheme break property, because the default for +# all the emojis is "other". We scan the emoji-data.txt file and modify the +# break-props table. + +file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8') +for line in file: + line = re.sub(r'#.*', '', line) + chardata = list(map(str.strip, line.split(';'))) + if len(chardata) <= 1: + continue + if chardata[1] != "Extended_Pictographic": + continue + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + for i in range(char, last + 1): + if break_props[i] != break_properties.index('Other'): + print("WARNING: Emoji 0x%x has break property %s, not 'Other'", + i, break_properties[break_props[i]], file=sys.stderr) + break_props[i] = break_properties.index('Extended_Pictographic') +file.close() + +# Handle script extensions. The get_script_extesion() function maintains a +# list of unique bitmaps representing lists of scripts, returning the offset +# in that list. Initialize the list with an empty set, which is used for +# characters that have no script extensions. + +script_lists = [[]] +last_script_extension = "" +scriptx_bidi_class = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, 0) + +for idx in range(len(scriptx_bidi_class)): + scriptx_bidi_class[idx] = scriptx_bidi_class[idx] | (bidi_class[idx] << 11) +bidi_class = None + +# Find the Boolean properties of each character. This next bit of magic creates +# a list of empty lists. Using [[]] * MAX_UNICODE gives a list of references to +# the *same* list, which is not what we want. + +bprops = [[] for _ in range(MAX_UNICODE)] + +# Collect the properties from the various files + +for filename in bool_propsfiles: + try: + file = open('Unicode.tables/' + filename, 'r') + except IOError: + print(f"** Couldn't open {'Unicode.tables/' + filename}\n") + sys.exit(1) + + for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1: + continue + + try: + ix = bool_properties.index(data[1]) + except ValueError: + continue + + m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', data[0]) + char = int(m.group(1), 16) + if m.group(3) is None: + last = char + else: + last = int(m.group(3), 16) + + for i in range(char, last + 1): + bprops[i].append(ix) + + file.close() + +# The ASCII property isn't listed in any files, but it is easy enough to add +# it manually. + +ix = bool_properties.index("ASCII") +for i in range(128): + bprops[i].append(ix) + +# The Bidi_Mirrored property isn't listed in any property files. We have to +# deduce it from the file that lists the mirrored characters. + +ix = bool_properties.index("Bidi_Mirrored") + +try: + file = open('Unicode.tables/BidiMirroring.txt', 'r') +except IOError: + print(f"** Couldn't open {'Unicode.tables/BidiMirroring.txt'}\n") + sys.exit(1) + +for line in file: + line = re.sub(r'#.*', '', line) + data = list(map(str.strip, line.split(';'))) + if len(data) <= 1: + continue + c = int(data[0], 16) + bprops[c].append(ix) + +file.close() + +# Scan each character's boolean property list and created a list of unique +# lists, at the same time, setting the index in that list for each property in +# the bool_props vector. + +bool_props = [0] * MAX_UNICODE +bool_props_lists = [[]] + +for c in range(MAX_UNICODE): + s = set(bprops[c]) + for i in range(len(bool_props_lists)): + if s == set(bool_props_lists[i]): + break; + else: + bool_props_lists.append(bprops[c]) + i += 1 + + bool_props[c] = i * bool_props_list_item_size + +# This block of code was added by PH in September 2012. It scans the other_case +# table to find sets of more than two characters that must all match each other +# caselessly. Later in this script a table of these sets is written out. +# However, we have to do this work here in order to compute the offsets in the +# table that are inserted into the main table. + +# The CaseFolding.txt file lists pairs, but the common logic for reading data +# sets only one value, so first we go through the table and set "return" +# offsets for those that are not already set. + +for c in range(MAX_UNICODE): + if other_case[c] != 0 and other_case[c + other_case[c]] == 0: + other_case[c + other_case[c]] = -other_case[c] + +# Now scan again and create equivalence sets. + +caseless_sets = [] + +for c in range(MAX_UNICODE): + o = c + other_case[c] + + # Trigger when this character's other case does not point back here. We + # now have three characters that are case-equivalent. + + if other_case[o] != -other_case[c]: + t = o + other_case[o] + + # Scan the existing sets to see if any of the three characters are already + # part of a set. If so, unite the existing set with the new set. + + appended = 0 + for s in caseless_sets: + found = 0 + for x in s: + if x == c or x == o or x == t: + found = 1 + + # Add new characters to an existing set + + if found: + found = 0 + for y in [c, o, t]: + for x in s: + if x == y: + found = 1 + if not found: + s.append(y) + appended = 1 + + # If we have not added to an existing set, create a new one. + + if not appended: + caseless_sets.append([c, o, t]) + +# End of loop looking for caseless sets. + +# Now scan the sets and set appropriate offsets for the characters. + +caseless_offsets = [0] * MAX_UNICODE + +offset = 1; +for s in caseless_sets: + for x in s: + caseless_offsets[x] = offset + offset += len(s) + 1 + +# End of block of code for creating offsets for caseless matching sets. + + +# Combine all the tables + +table, records = combine_tables(script, category, break_props, + caseless_offsets, other_case, scriptx_bidi_class, bool_props) + +# Find the record size and create a string definition of the structure for +# outputting as a comment. + +record_size, record_struct = get_record_size_struct(list(records.keys())) + +# Find the optimum block size for the two-stage table + +min_size = sys.maxsize +for block_size in [2 ** i for i in range(5,10)]: + size = len(records) * record_size + stage1, stage2 = compress_table(table, block_size) + size += get_tables_size(stage1, stage2) + #print "/* block size %5d => %5d bytes */" % (block_size, size) + if size < min_size: + min_size = size + min_stage1, min_stage2 = stage1, stage2 + min_block_size = block_size + + +# --------------------------------------------------------------------------- +# MAIN CODE FOR WRITING THE OUTPUT FILE +# --------------------------------------------------------------------------- + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucd.c") + +# Output this file's heading text + +f.write("""\ +/* This file contains tables of Unicode properties that are extracted from +Unicode data files. See the comments at the start of maint/GenerateUcd.py for +details. + +As well as being part of the PCRE2 library, this file is #included by the +pcre2test program, which redefines the PRIV macro to change table names from +_pcre2_xxx to xxxx, thereby avoiding name clashes with the library. At present, +just one of these tables is actually needed. When compiling the library, some +headers are needed. */ + +#ifndef PCRE2_PCRE2TEST +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "pcre2_internal.h" +#endif /* PCRE2_PCRE2TEST */ + +/* The tables herein are needed only when UCP support is built, and in PCRE2 +that happens automatically with UTF support. This module should not be +referenced otherwise, so it should not matter whether it is compiled or not. +However a comment was received about space saving - maybe the guy linked all +the modules rather than using a library - so we include a condition to cut out +the tables when not needed. But don't leave a totally empty module because some +compilers barf at that. Instead, just supply some small dummy tables. */ + +#ifndef SUPPORT_UNICODE +const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}}; +const uint16_t PRIV(ucd_stage1)[] = {0}; +const uint16_t PRIV(ucd_stage2)[] = {0}; +const uint32_t PRIV(ucd_caseless_sets)[] = {0}; +#else +\n""") + +# --- Output some variable heading stuff --- + +f.write("/* Total size: %d bytes, block size: %d. */\n\n" % (min_size, min_block_size)) +f.write('const char *PRIV(unicode_version) = "{}";\n\n'.format(unicode_version)) + +f.write("""\ +/* When recompiling tables with a new Unicode version, please check the types +in this structure definition with those in pcre2_internal.h (the actual field +names will be different). +\n""") + +f.write(record_struct) + +f.write(""" +/* If the 32-bit library is run in non-32-bit mode, character values greater +than 0x10ffff may be encountered. For these we set up a special record. */ + +#if PCRE2_CODE_UNIT_WIDTH == 32 +const ucd_record PRIV(dummy_ucd_record)[] = {{ + ucp_Unknown, /* script */ + ucp_Cn, /* type unassigned */ + ucp_gbOther, /* grapheme break property */ + 0, /* case set */ + 0, /* other case */ + 0 | (ucp_bidiL << UCD_BIDICLASS_SHIFT), /* script extension and bidi class */ + 0, /* bool properties offset */ + }}; +#endif +\n""") + +# --- Output the table of caseless character sets --- + +f.write("""\ +/* This table contains lists of characters that are caseless sets of +more than one character. Each list is terminated by NOTACHAR. */ + +const uint32_t PRIV(ucd_caseless_sets)[] = { + NOTACHAR, +""") + +for s in caseless_sets: + s = sorted(s) + for x in s: + f.write(' 0x%04x,' % x) + f.write(' NOTACHAR,\n') +f.write('};\n\n') + +# --- Other tables are not needed by pcre2test --- + +f.write("""\ +/* When #included in pcre2test, we don't need the table of digit sets, nor the +the large main UCD tables. */ + +#ifndef PCRE2_PCRE2TEST +\n""") + +# --- Read Scripts.txt again for the sets of 10 digits. --- + +digitsets = [] +file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8') + +for line in file: + m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line) + if m is None: + continue + first = int(m.group(1),16) + last = int(m.group(2),16) + if ((last - first + 1) % 10) != 0: + f.write("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last), + file=sys.stderr) + while first < last: + digitsets.append(first + 9) + first += 10 +file.close() +digitsets.sort() + +f.write("""\ +/* This table lists the code points for the '9' characters in each set of +decimal digits. It is used to ensure that all the digits in a script run come +from the same set. */ + +const uint32_t PRIV(ucd_digit_sets)[] = { +""") + +f.write(" %d, /* Number of subsequent values */" % len(digitsets)) +count = 8 +for d in digitsets: + if count == 8: + f.write("\n ") + count = 0 + f.write(" 0x%05x," % d) + count += 1 +f.write("\n};\n\n") + +f.write("""\ +/* This vector is a list of script bitsets for the Script Extension property. +The number of 32-bit words in each bitset is #defined in pcre2_ucp.h as +ucd_script_sets_item_size. */ + +const uint32_t PRIV(ucd_script_sets)[] = { +""") +write_bitsets(script_lists, script_list_item_size) + +f.write("""\ +/* This vector is a list of bitsets for Boolean properties. The number of +32_bit words in each bitset is #defined as ucd_boolprop_sets_item_size in +pcre2_ucp.h. */ + +const uint32_t PRIV(ucd_boolprop_sets)[] = { +""") +write_bitsets(bool_props_lists, bool_props_list_item_size) + + +# Output the main UCD tables. + +f.write("""\ +/* These are the main two-stage UCD tables. The fields in each record are: +script (8 bits), character type (8 bits), grapheme break property (8 bits), +offset to multichar other cases or zero (8 bits), offset to other case or zero +(32 bits, signed), bidi class (5 bits) and script extension (11 bits) packed +into a 16-bit field, and offset in binary properties table (16 bits). */ +\n""") + +write_records(records, record_size) +write_table(min_stage1, 'PRIV(ucd_stage1)') +write_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size) + +f.write("#if UCD_BLOCK_SIZE != %d\n" % min_block_size) +f.write("""\ +#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h +#endif +#endif /* SUPPORT_UNICODE */ + +#endif /* PCRE2_PCRE2TEST */ + +/* End of pcre2_ucd.c */ +""") + +f.close + +# End diff --git a/pcre2/maint/GenerateUcpHeader.py b/pcre2/maint/GenerateUcpHeader.py new file mode 100644 index 0000000000000000000000000000000000000000..4fe43d5eb68fb084989a8a7f37810357d1f05816 --- /dev/null +++ b/pcre2/maint/GenerateUcpHeader.py @@ -0,0 +1,98 @@ +#! /usr/bin/python + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This script generates the pcre2_ucp.h file from Unicode data files. This +# header uses enumerations to give names to Unicode property types and script +# names. + +# This script was created in December 2021 as part of the Unicode data +# generation refactoring. + + +# Import common data lists and functions + +from GenerateCommon import \ + bidi_classes, \ + bool_properties, \ + bool_props_list_item_size, \ + break_properties, \ + category_names, \ + general_category_names, \ + script_list_item_size, \ + script_names, \ + open_output + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucp.h") + +# Output this file's heading text + +f.write("""\ +#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD +#define PCRE2_UCP_H_IDEMPOTENT_GUARD + +/* This file contains definitions of the Unicode property values that are +returned by the UCD access macros and used throughout PCRE2. + +IMPORTANT: The specific values of the first two enums (general and particular +character categories) are assumed by the table called catposstab in the file +pcre2_auto_possess.c. They are unlikely to change, but should be checked after +an update. */ +\n""") + +f.write("/* These are the general character categories. */\n\nenum {\n") +for i in general_category_names: + f.write(" ucp_%s,\n" % i) +f.write("};\n\n") + +f.write("/* These are the particular character categories. */\n\nenum {\n") +for i in range(0, len(category_names), 2): + f.write(" ucp_%s, /* %s */\n" % (category_names[i], category_names[i+1])) +f.write("};\n\n") + +f.write("/* These are Boolean properties. */\n\nenum {\n") +for i in bool_properties: + f.write(" ucp_%s,\n" % i) + +f.write(" /* This must be last */\n") +f.write(" ucp_Bprop_Count\n};\n\n") + +f.write("/* Size of entries in ucd_boolprop_sets[] */\n\n") +f.write("#define ucd_boolprop_sets_item_size %d\n\n" % bool_props_list_item_size) + +f.write("/* These are the bidi class values. */\n\nenum {\n") +for i in range(0, len(bidi_classes), 2): + sp = ' ' * (4 - len(bidi_classes[i])) + f.write(" ucp_bidi%s,%s /* %s */\n" % (bidi_classes[i], sp, bidi_classes[i+1])) +f.write("};\n\n") + +f.write("/* These are grapheme break properties. The Extended Pictographic " + "property\ncomes from the emoji-data.txt file. */\n\nenum {\n") +for i in range(0, len(break_properties), 2): + sp = ' ' * (21 - len(break_properties[i])) + f.write(" ucp_gb%s,%s /* %s */\n" % (break_properties[i], sp, break_properties[i+1])) +f.write("};\n\n") + +f.write("/* These are the script identifications. */\n\nenum {\n /* Scripts which has characters in other scripts. */\n") +for i in script_names: + if i == "Unknown": + f.write("\n /* Scripts which has no characters in other scripts. */\n") + f.write(" ucp_%s,\n" % i) +f.write("\n") + +f.write(" /* This must be last */\n") +f.write(" ucp_Script_Count\n};\n\n") + +f.write("/* Size of entries in ucd_script_sets[] */\n\n") +f.write("#define ucd_script_sets_item_size %d\n\n" % script_list_item_size) + +f.write("#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */\n\n") +f.write("/* End of pcre2_ucp.h */\n") + +f.close() + +# End diff --git a/pcre2/maint/GenerateUcpTables.py b/pcre2/maint/GenerateUcpTables.py new file mode 100644 index 0000000000000000000000000000000000000000..528ff91669245a6bfd959c469b552c20d6dbe226 --- /dev/null +++ b/pcre2/maint/GenerateUcpTables.py @@ -0,0 +1,203 @@ +#! /usr/bin/python + +# PCRE2 UNICODE PROPERTY SUPPORT +# ------------------------------ + +# This script generates the pcre2_ucptables.c file, which contains tables for +# recognizing Unicode property names. It is #included by pcre2_tables.c. In +# order to reduce the number of relocations when loading the PCRE2 library, the +# names are held as a single large string, with offsets in the table. This is +# tedious to maintain by hand. Therefore, a script is used to generate the +# table. + +# This script was created in December 2021 based on the previous GenerateUtt +# script, whose output had to be manually edited into pcre2_tables.c. Here is +# the history of the original script: + +# ----------------------------------------------------------------------------- +# Modified by PH 17-March-2009 to generate the more verbose form that works +# for UTF-support in EBCDIC as well as ASCII environments. +# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0. +# Modified by PH 04-May-2010 to add new "X.." special categories. +# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0 +# Modified by ChPe 30-September-2012 to add this note; no other changes were +# necessary for Unicode 6.2.0 support. +# Modfied by PH 26-February-2013 to add the Xuc special category. +# Comment modified by PH 13-May-2014 to update to PCRE2 file names. +# Script updated to Python 3 by running it through the 2to3 converter. +# Added script names for Unicode 7.0.0, 20-June-2014. +# Added script names for Unicode 8.0.0, 19-June-2015. +# Added script names for Unicode 10.0.0, 02-July-2017. +# Added script names for Unicode 11.0.0, 03-July-2018. +# Added 'Unknown' script, 01-October-2018. +# Added script names for Unicode 12.1.0, 27-July-2019. +# Added script names for Unicode 13.0.0, 10-March-2020. +# Added Script names for Unicode 14.0.0, PCRE2-10.39 +# Added support for bidi class and bidi control, 06-December-2021 +# This also involved lower casing strings and removing underscores, in +# accordance with Unicode's "loose matching" rules, which Perl observes. +# Changed default script type from PT_SC to PT_SCX, 18-December-2021 +# ----------------------------------------------------------------------------- +# +# Note subsequent changes here: +# +# 27-December-2021: Added support for 4-letter script abbreviations. +# 10-January-2022: Further updates for Boolean property support +# ----------------------------------------------------------------------------- + + +# Import common data lists and functions + +from GenerateCommon import \ + abbreviations, \ + bool_properties, \ + bidi_classes, \ + category_names, \ + general_category_names, \ + script_names, \ + open_output + +# Open the output file (no return on failure). This call also writes standard +# header boilerplate. + +f = open_output("pcre2_ucptables.c") + +# The list in bidi_classes contains just the Unicode classes such as AN, LRE, +# etc., along with comments. We need to add "bidi" in front of each value, in +# order to create names that don't clash with other types of property. + +bidi_class_names = [] +for i in range(0, len(bidi_classes), 2): + bidi_class_names.append("bidi" + bidi_classes[i]) + +# Remove the comments from other lists that contain them. + +category_names = category_names[::2] + +# Create standardized versions of the names by lowercasing and removing +# underscores. + +def stdname(x): + return x.lower().replace('_', '') + +def stdnames(x): + y = [''] * len(x) + for i in range(len(x)): + y[i] = stdname(x[i]) + return y + +std_category_names = stdnames(category_names) +std_general_category_names = stdnames(general_category_names) +std_bidi_class_names = stdnames(bidi_class_names) +std_bool_properties = stdnames(bool_properties) + +# Create the table, starting with the Unicode script, category and bidi class +# names. We keep both the standardized name and the original, because the +# latter is used for the ucp_xx names. NOTE: for the script abbreviations, we +# still use the full original names. + +utt_table = [] + +scx_end = script_names.index('Unknown') + +for idx, name in enumerate(script_names): + pt_type = 'PT_SCX' if idx < scx_end else 'PT_SC' + utt_table.append((stdname(name), name, pt_type)) + for abbrev in abbreviations[name]: + utt_table.append((stdname(abbrev), name, pt_type)) + +# Add the remaining property lists + +utt_table += list(zip(std_category_names, category_names, ['PT_PC'] * len(category_names))) +utt_table += list(zip(std_general_category_names, general_category_names, ['PT_GC'] * len(general_category_names))) +utt_table += list(zip(std_bidi_class_names, bidi_class_names, ['PT_BIDICL'] * len(bidi_class_names))) + +for name in bool_properties: + utt_table.append((stdname(name), name, 'PT_BOOL')) + if name in abbreviations: + for abbrev in abbreviations[name]: + utt_table.append((stdname(abbrev), name, 'PT_BOOL')) + +# Now add specials and synonyms. Note both the standardized and capitalized +# forms are needed. + +utt_table.append(('any', 'Any', 'PT_ANY')) +utt_table.append(('l&', 'L&', 'PT_LAMP')) +utt_table.append(('lc', 'LC', 'PT_LAMP')) +utt_table.append(('xan', 'Xan', 'PT_ALNUM')) +utt_table.append(('xps', 'Xps', 'PT_PXSPACE')) +utt_table.append(('xsp', 'Xsp', 'PT_SPACE')) +utt_table.append(('xuc', 'Xuc', 'PT_UCNC')) +utt_table.append(('xwd', 'Xwd', 'PT_WORD')) + +# Remove duplicates from the table and then sort it. + +utt_table = list(set(utt_table)) +utt_table.sort() + +# Output file-specific heading + +f.write("""\ +#ifdef SUPPORT_UNICODE + +/* The PRIV(utt)[] table below translates Unicode property names into type and +code values. It is searched by binary chop, so must be in collating sequence of +name. Originally, the table contained pointers to the name strings in the first +field of each entry. However, that leads to a large number of relocations when +a shared library is dynamically loaded. A significant reduction is made by +putting all the names into a single, large string and using offsets instead. +All letters are lower cased, and underscores are removed, in accordance with +the "loose matching" rules that Unicode advises and Perl uses. */ +\n""") + +# We have to use STR_ macros to define the strings so that it all works in +# UTF-8 mode on EBCDIC platforms. + +for utt in utt_table: + f.write('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND'))) + for c in utt[0]: + if c == '&': + f.write(' STR_AMPERSAND') + else: + f.write(' STR_%s' % c); + f.write(' "\\0"\n') + +# Output the long string of concatenated names + +f.write('\nconst char PRIV(utt_names)[] =\n'); +last = '' +for utt in utt_table: + if utt == utt_table[-1]: + last = ';' + f.write(' STRING_%s0%s\n' % (utt[0].replace('&', '_AMPERSAND'), last)) + +# Output the property type table + +f.write('\nconst ucp_type_table PRIV(utt)[] = {\n') +offset = 0 +last = ',' +for utt in utt_table: + if utt[2] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', + 'PT_SPACE', 'PT_UCNC', 'PT_WORD'): + value = '0' + else: + value = 'ucp_' + utt[1] + if utt == utt_table[-1]: + last = '' + f.write(' { %3d, %s, %s }%s\n' % (offset, utt[2], value, last)) + offset += len(utt[0]) + 1 +f.write('};\n\n') + +# Ending text + +f.write("""\ +const size_t PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); + +#endif /* SUPPORT_UNICODE */ + +/* End of pcre2_ucptables.c */ +""") + +f.close + +# End diff --git a/pcre2/maint/GenerateUtt.py b/pcre2/maint/GenerateUtt.py deleted file mode 100755 index eea6efce83fef3556e812639e4bc61ece21cae4c..0000000000000000000000000000000000000000 --- a/pcre2/maint/GenerateUtt.py +++ /dev/null @@ -1,140 +0,0 @@ -#! /usr/bin/python - -# Generate utt tables. Note: this script has now been converted to Python 3. - -# The source file pcre2_tables.c contains (amongst other things), a table that -# is indexed by script name. In order to reduce the number of relocations when -# loading the library, the names are held as a single large string, with -# offsets in the table. This is tedious to maintain by hand. Therefore, this -# script is used to generate the table. The output is sent to stdout; usually -# that should be directed to a temporary file. Then pcre2_tables.c can be -# edited by replacing the relevant definitions and table therein with the -# temporary file. - -# Modified by PH 17-March-2009 to generate the more verbose form that works -# for UTF-support in EBCDIC as well as ASCII environments. -# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0. -# Modified by PH 04-May-2010 to add new "X.." special categories. -# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0 -# Modified by ChPe 30-September-2012 to add this note; no other changes were -# necessary for Unicode 6.2.0 support. -# Modfied by PH 26-February-2013 to add the Xuc special category. -# Comment modified by PH 13-May-2014 to update to PCRE2 file names. -# Script updated to Python 3 by running it through the 2to3 converter. -# Added script names for Unicode 7.0.0, 20-June-2014. -# Added script names for Unicode 8.0.0, 19-June-2015. -# Added script names for Unicode 10.0.0, 02-July-2017. -# Added script names for Unicode 11.0.0, 03-July-2018. -# Added 'Unknown' script, 01-October-2018. -# Added script names for Unicode 12.1.0, 27-July-2019. -# Added script names for Unicode 13.0.0, 10-March-2020. -# Added Script names for Unicode 14.0.0, PCRE2-10.39 - -script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ - 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ - 'Glagolitic', 'Gothic', 'Greek', 'Gujarati', 'Gurmukhi', 'Han', 'Hangul', 'Hanunoo', 'Hebrew', 'Hiragana', \ - 'Inherited', 'Kannada', 'Katakana', 'Kharoshthi', 'Khmer', 'Lao', 'Latin', 'Limbu', 'Linear_B', 'Malayalam', \ - 'Mongolian', 'Myanmar', 'New_Tai_Lue', 'Ogham', 'Old_Italic', 'Old_Persian', 'Oriya', 'Osmanya', 'Runic', \ - 'Shavian', 'Sinhala', 'Syloti_Nagri', 'Syriac', 'Tagalog', 'Tagbanwa', 'Tai_Le', 'Tamil', 'Telugu', 'Thaana', \ - 'Thai', 'Tibetan', 'Tifinagh', 'Ugaritic', 'Yi', \ - # New for Unicode 5.0 - 'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician', \ - # New for Unicode 5.1 - 'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai', \ - # New for Unicode 5.2 - 'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', \ - 'Inscriptional_Pahlavi', 'Inscriptional_Parthian', \ - 'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \ - 'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', \ - # New for Unicode 6.0.0 - 'Batak', 'Brahmi', 'Mandaic', \ -# New for Unicode 6.1.0 - 'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri', -# New for Unicode 7.0.0 - 'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi', - 'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean', - 'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi', - 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi', -# New for Unicode 8.0.0 - 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian', - 'SignWriting', -# New for Unicode 10.0.0 - 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi', - 'Nushu', 'Soyombo', 'Zanabazar_Square', -# New for Unicode 11.0.0 - 'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin', - 'Old_Sogdian', 'Sogdian', -# New for Unicode 12.0.0 - 'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho', -# New for Unicode 13.0.0 - 'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi', -# New for Unicode 14.0.0 - 'Cypro_Minoan', 'Old_Uyghur', 'Tangsa', 'Toto', 'Vithkuqi' - ] - -category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', - 'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps', - 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ] - -general_category_names = ['C', 'L', 'M', 'N', 'P', 'S', 'Z'] - -# First add the Unicode script and category names. - -utt_table = list(zip(script_names, ['PT_SC'] * len(script_names))) -utt_table += list(zip(category_names, ['PT_PC'] * len(category_names))) -utt_table += list(zip(general_category_names, ['PT_GC'] * len(general_category_names))) - -# Now add our own specials. - -utt_table.append(('Any', 'PT_ANY')) -utt_table.append(('L&', 'PT_LAMP')) -utt_table.append(('Xan', 'PT_ALNUM')) -utt_table.append(('Xps', 'PT_PXSPACE')) -utt_table.append(('Xsp', 'PT_SPACE')) -utt_table.append(('Xuc', 'PT_UCNC')) -utt_table.append(('Xwd', 'PT_WORD')) - -# Sort the table. - -utt_table.sort() - -# We have to use STR_ macros to define the strings so that it all works in -# UTF-8 mode on EBCDIC platforms. - -for utt in utt_table: - print('#define STRING_%s0' % (utt[0].replace('&', '_AMPERSAND')), end=' ') - for c in utt[0]: - if c == '_': - print('STR_UNDERSCORE', end=' ') - elif c == '&': - print('STR_AMPERSAND', end=' ') - else: - print('STR_%s' % c, end=' '); - print('"\\0"') - -# Print the actual table, using the string names - -print('') -print('const char PRIV(utt_names)[] ='); -last = '' -for utt in utt_table: - if utt == utt_table[-1]: - last = ';' - print(' STRING_%s0%s' % (utt[0].replace('&', '_AMPERSAND'), last)) -# This was how it was done before the EBCDIC-compatible modification. -# print ' "%s\\0"%s' % (utt[0], last) - -print('\nconst ucp_type_table PRIV(utt)[] = {') -offset = 0 -last = ',' -for utt in utt_table: - if utt[1] in ('PT_ANY', 'PT_LAMP', 'PT_ALNUM', 'PT_PXSPACE', - 'PT_SPACE', 'PT_UCNC', 'PT_WORD'): - value = '0' - else: - value = 'ucp_' + utt[0] - if utt == utt_table[-1]: - last = '' - print(' { %3d, %s, %s }%s' % (offset, utt[1], value, last)) - offset += len(utt[0]) + 1 -print('};') diff --git a/pcre2/maint/ManyConfigTests b/pcre2/maint/ManyConfigTests old mode 100755 new mode 100644 diff --git a/pcre2/maint/MultiStage2.py b/pcre2/maint/MultiStage2.py deleted file mode 100755 index 10fa412905ea564429f77416b51f68642f132ca7..0000000000000000000000000000000000000000 --- a/pcre2/maint/MultiStage2.py +++ /dev/null @@ -1,819 +0,0 @@ -#! /usr/bin/python - -# Multistage table builder -# (c) Peter Kankowski, 2008 - -############################################################################## -# This script was submitted to the PCRE project by Peter Kankowski as part of -# the upgrading of Unicode property support. The new code speeds up property -# matching many times. The script is for the use of PCRE maintainers, to -# generate the pcre2_ucd.c file that contains a digested form of the Unicode -# data tables. A number of extensions have been added to the original script. -# -# The script has now been upgraded to Python 3 for PCRE2, and should be run in -# the maint subdirectory, using the command -# -# [python3] ./MultiStage2.py >../src/pcre2_ucd.c -# -# It requires six Unicode data tables: DerivedGeneralCategory.txt, -# GraphemeBreakProperty.txt, Scripts.txt, ScriptExtensions.txt, -# CaseFolding.txt, and emoji-data.txt. These must be in the -# maint/Unicode.tables subdirectory. -# -# DerivedGeneralCategory.txt is found in the "extracted" subdirectory of the -# Unicode database (UCD) on the Unicode web site; GraphemeBreakProperty.txt is -# in the "auxiliary" subdirectory. Scripts.txt, ScriptExtensions.txt, and -# CaseFolding.txt are directly in the UCD directory. -# -# The emoji-data.txt file is found in the "emoji" subdirectory even though it -# is technically part of a different (but coordinated) standard as shown -# in files associated with Unicode Technical Standard #51 ("Unicode Emoji"), -# for example: -# -# http://unicode.org/Public/emoji/13.0/ReadMe.txt -# -# ----------------------------------------------------------------------------- -# Minor modifications made to this script: -# Added #! line at start -# Removed tabs -# Made it work with Python 2.4 by rewriting two statements that needed 2.5 -# Consequent code tidy -# Adjusted data file names to take from the Unicode.tables directory -# Adjusted global table names by prefixing _pcre_. -# Commented out stuff relating to the casefolding table, which isn't used; -# removed completely in 2012. -# Corrected size calculation -# Add #ifndef SUPPORT_UCP to use dummy tables when no UCP support is needed. -# Update for PCRE2: name changes, and SUPPORT_UCP is abolished. -# -# Major modifications made to this script: -# Added code to add a grapheme break property field to records. -# -# Added code to search for sets of more than two characters that must match -# each other caselessly. A new table is output containing these sets, and -# offsets into the table are added to the main output records. This new -# code scans CaseFolding.txt instead of UnicodeData.txt, which is no longer -# used. -# -# Update for Python3: -# . Processed with 2to3, but that didn't fix everything -# . Changed string.strip to str.strip -# . Added encoding='utf-8' to the open() call -# . Inserted 'int' before blocksize/ELEMS_PER_LINE because an int is -# required and the result of the division is a float -# -# Added code to scan the emoji-data.txt file to find the Extended Pictographic -# property, which is used by PCRE2 as a grapheme breaking property. This was -# done when updating to Unicode 11.0.0 (July 2018). -# -# Added code to add a Script Extensions field to records. This has increased -# their size from 8 to 12 bytes, only 10 of which are currently used. -# -# 01-March-2010: Updated list of scripts for Unicode 5.2.0 -# 30-April-2011: Updated list of scripts for Unicode 6.0.0 -# July-2012: Updated list of scripts for Unicode 6.1.0 -# 20-August-2012: Added scan of GraphemeBreakProperty.txt and added a new -# field in the record to hold the value. Luckily, the -# structure had a hole in it, so the resulting table is -# not much bigger than before. -# 18-September-2012: Added code for multiple caseless sets. This uses the -# final hole in the structure. -# 30-September-2012: Added RegionalIndicator break property from Unicode 6.2.0 -# 13-May-2014: Updated for PCRE2 -# 03-June-2014: Updated for Python 3 -# 20-June-2014: Updated for Unicode 7.0.0 -# 12-August-2014: Updated to put Unicode version into the file -# 19-June-2015: Updated for Unicode 8.0.0 -# 02-July-2017: Updated for Unicode 10.0.0 -# 03-July-2018: Updated for Unicode 11.0.0 -# 07-July-2018: Added code to scan emoji-data.txt for the Extended -# Pictographic property. -# 01-October-2018: Added the 'Unknown' script name -# 03-October-2018: Added new field for Script Extensions -# 27-July-2019: Updated for Unicode 12.1.0 -# 10-March-2020: Updated for Unicode 13.0.0 -# PCRE2-10.39: Updated for Unicode 14.0.0 -# ---------------------------------------------------------------------------- -# -# -# The main tables generated by this script are used by macros defined in -# pcre2_internal.h. They look up Unicode character properties using short -# sequences of code that contains no branches, which makes for greater speed. -# -# Conceptually, there is a table of records (of type ucd_record), containing a -# script number, script extension value, character type, grapheme break type, -# offset to caseless matching set, offset to the character's other case, for -# every Unicode character. However, a real table covering all Unicode -# characters would be far too big. It can be efficiently compressed by -# observing that many characters have the same record, and many blocks of -# characters (taking 128 characters in a block) have the same set of records as -# other blocks. This leads to a 2-stage lookup process. -# -# This script constructs six tables. The ucd_caseless_sets table contains -# lists of characters that all match each other caselessly. Each list is -# in order, and is terminated by NOTACHAR (0xffffffff), which is larger than -# any valid character. The first list is empty; this is used for characters -# that are not part of any list. -# -# The ucd_digit_sets table contains the code points of the '9' characters in -# each set of 10 decimal digits in Unicode. This is used to ensure that digits -# in script runs all come from the same set. The first element in the vector -# contains the number of subsequent elements, which are in ascending order. -# -# The ucd_script_sets vector contains lists of script numbers that are the -# Script Extensions properties of certain characters. Each list is terminated -# by zero (ucp_Unknown). A character with more than one script listed for its -# Script Extension property has a negative value in its record. This is the -# negated offset to the start of the relevant list in the ucd_script_sets -# vector. -# -# The ucd_records table contains one instance of every unique record that is -# required. The ucd_stage1 table is indexed by a character's block number, -# which is the character's code point divided by 128, since 128 is the size -# of each block. The result of a lookup in ucd_stage1 a "virtual" block number. -# -# The ucd_stage2 table is a table of "virtual" blocks; each block is indexed by -# the offset of a character within its own block, and the result is the index -# number of the required record in the ucd_records vector. -# -# The following examples are correct for the Unicode 11.0.0 database. Future -# updates may make change the actual lookup values. -# -# Example: lowercase "a" (U+0061) is in block 0 -# lookup 0 in stage1 table yields 0 -# lookup 97 (0x61) in the first table in stage2 yields 17 -# record 17 is { 34, 5, 12, 0, -32, 34, 0 } -# 34 = ucp_Latin => Latin script -# 5 = ucp_Ll => Lower case letter -# 12 = ucp_gbOther => Grapheme break property "Other" -# 0 => Not part of a caseless set -# -32 (-0x20) => Other case is U+0041 -# 34 = ucp_Latin => No special Script Extension property -# 0 => Dummy value, unused at present -# -# Almost all lowercase latin characters resolve to the same record. One or two -# are different because they are part of a multi-character caseless set (for -# example, k, K and the Kelvin symbol are such a set). -# -# Example: hiragana letter A (U+3042) is in block 96 (0x60) -# lookup 96 in stage1 table yields 90 -# lookup 66 (0x42) in table 90 in stage2 yields 564 -# record 564 is { 27, 7, 12, 0, 0, 27, 0 } -# 27 = ucp_Hiragana => Hiragana script -# 7 = ucp_Lo => Other letter -# 12 = ucp_gbOther => Grapheme break property "Other" -# 0 => Not part of a caseless set -# 0 => No other case -# 27 = ucp_Hiragana => No special Script Extension property -# 0 => Dummy value, unused at present -# -# Example: vedic tone karshana (U+1CD0) is in block 57 (0x39) -# lookup 57 in stage1 table yields 55 -# lookup 80 (0x50) in table 55 in stage2 yields 458 -# record 458 is { 28, 12, 3, 0, 0, -101, 0 } -# 28 = ucp_Inherited => Script inherited from predecessor -# 12 = ucp_Mn => Non-spacing mark -# 3 = ucp_gbExtend => Grapheme break property "Extend" -# 0 => Not part of a caseless set -# 0 => No other case -# -101 => Script Extension list offset = 101 -# 0 => Dummy value, unused at present -# -# At offset 101 in the ucd_script_sets vector we find the list 3, 15, 107, 29, -# and terminator 0. This means that this character is expected to be used with -# any of those scripts, which are Bengali, Devanagari, Grantha, and Kannada. -# -# Philip Hazel, 03 July 2008 -############################################################################## - - -import re -import string -import sys - -MAX_UNICODE = 0x110000 -NOTACHAR = 0xffffffff - - -# Parse a line of Scripts.txt, GraphemeBreakProperty.txt or DerivedGeneralCategory.txt -def make_get_names(enum): - return lambda chardata: enum.index(chardata[1]) - -# Parse a line of CaseFolding.txt -def get_other_case(chardata): - if chardata[1] == 'C' or chardata[1] == 'S': - return int(chardata[2], 16) - int(chardata[0], 16) - return 0 - -# Parse a line of ScriptExtensions.txt -def get_script_extension(chardata): - this_script_list = list(chardata[1].split(' ')) - if len(this_script_list) == 1: - return script_abbrevs.index(this_script_list[0]) - - script_numbers = [] - for d in this_script_list: - script_numbers.append(script_abbrevs.index(d)) - script_numbers.append(0) - script_numbers_length = len(script_numbers) - - for i in range(1, len(script_lists) - script_numbers_length + 1): - for j in range(0, script_numbers_length): - found = True - if script_lists[i+j] != script_numbers[j]: - found = False - break - if found: - return -i - - # Not found in existing lists - - return_value = len(script_lists) - script_lists.extend(script_numbers) - return -return_value - -# Read the whole table in memory, setting/checking the Unicode version -def read_table(file_name, get_value, default_value): - global unicode_version - - f = re.match(r'^[^/]+/([^.]+)\.txt$', file_name) - file_base = f.group(1) - version_pat = r"^# " + re.escape(file_base) + r"-(\d+\.\d+\.\d+)\.txt$" - file = open(file_name, 'r', encoding='utf-8') - f = re.match(version_pat, file.readline()) - version = f.group(1) - if unicode_version == "": - unicode_version = version - elif unicode_version != version: - print("WARNING: Unicode version differs in %s", file_name, file=sys.stderr) - - table = [default_value] * MAX_UNICODE - for line in file: - line = re.sub(r'#.*', '', line) - chardata = list(map(str.strip, line.split(';'))) - if len(chardata) <= 1: - continue - value = get_value(chardata) - m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) - char = int(m.group(1), 16) - if m.group(3) is None: - last = char - else: - last = int(m.group(3), 16) - for i in range(char, last + 1): - # It is important not to overwrite a previously set - # value because in the CaseFolding file there are lines - # to be ignored (returning the default value of 0) - # which often come after a line which has already set - # data. - if table[i] == default_value: - table[i] = value - file.close() - return table - -# Get the smallest possible C language type for the values -def get_type_size(table): - type_size = [("uint8_t", 1), ("uint16_t", 2), ("uint32_t", 4), - ("signed char", 1), ("pcre_int16", 2), ("pcre_int32", 4)] - limits = [(0, 255), (0, 65535), (0, 4294967295), - (-128, 127), (-32768, 32767), (-2147483648, 2147483647)] - minval = min(table) - maxval = max(table) - for num, (minlimit, maxlimit) in enumerate(limits): - if minlimit <= minval and maxval <= maxlimit: - return type_size[num] - else: - raise OverflowError("Too large to fit into C types") - -def get_tables_size(*tables): - total_size = 0 - for table in tables: - type, size = get_type_size(table) - total_size += size * len(table) - return total_size - -# Compress the table into the two stages -def compress_table(table, block_size): - blocks = {} # Dictionary for finding identical blocks - stage1 = [] # Stage 1 table contains block numbers (indices into stage 2 table) - stage2 = [] # Stage 2 table contains the blocks with property values - table = tuple(table) - for i in range(0, len(table), block_size): - block = table[i:i+block_size] - start = blocks.get(block) - if start is None: - # Allocate a new block - start = len(stage2) / block_size - stage2 += block - blocks[block] = start - stage1.append(start) - - return stage1, stage2 - -# Print a table -def print_table(table, table_name, block_size = None): - type, size = get_type_size(table) - ELEMS_PER_LINE = 16 - - s = "const %s %s[] = { /* %d bytes" % (type, table_name, size * len(table)) - if block_size: - s += ", block = %d" % block_size - print(s + " */") - table = tuple(table) - if block_size is None: - fmt = "%3d," * ELEMS_PER_LINE + " /* U+%04X */" - mult = MAX_UNICODE / len(table) - for i in range(0, len(table), ELEMS_PER_LINE): - print(fmt % (table[i:i+ELEMS_PER_LINE] + - (int(i * mult),))) - else: - if block_size > ELEMS_PER_LINE: - el = ELEMS_PER_LINE - else: - el = block_size - fmt = "%3d," * el + "\n" - if block_size > ELEMS_PER_LINE: - fmt = fmt * int(block_size / ELEMS_PER_LINE) - for i in range(0, len(table), block_size): - print(("/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size])) - print("};\n") - -# Extract the unique combinations of properties into records -def combine_tables(*tables): - records = {} - index = [] - for t in zip(*tables): - i = records.get(t) - if i is None: - i = records[t] = len(records) - index.append(i) - return index, records - -def get_record_size_struct(records): - size = 0 - structure = '/* When recompiling tables with a new Unicode version, please check the\n' + \ - 'types in this structure definition from pcre2_internal.h (the actual\n' + \ - 'field names will be different):\n\ntypedef struct {\n' - for i in range(len(records[0])): - record_slice = [record[i] for record in records] - slice_type, slice_size = get_type_size(record_slice) - # add padding: round up to the nearest power of slice_size - size = (size + slice_size - 1) & -slice_size - size += slice_size - structure += '%s property_%d;\n' % (slice_type, i) - - # round up to the first item of the next structure in array - record_slice = [record[0] for record in records] - slice_type, slice_size = get_type_size(record_slice) - size = (size + slice_size - 1) & -slice_size - - structure += '} ucd_record;\n*/\n' - return size, structure - -def test_record_size(): - tests = [ \ - ( [(3,), (6,), (6,), (1,)], 1 ), \ - ( [(300,), (600,), (600,), (100,)], 2 ), \ - ( [(25, 3), (6, 6), (34, 6), (68, 1)], 2 ), \ - ( [(300, 3), (6, 6), (340, 6), (690, 1)], 4 ), \ - ( [(3, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ - ( [(300, 300), (6, 6), (6, 340), (1, 690)], 4 ), \ - ( [(3, 100000), (6, 6), (6, 123456), (1, 690)], 8 ), \ - ( [(100000, 300), (6, 6), (123456, 6), (1, 690)], 8 ), \ - ] - for test in tests: - size, struct = get_record_size_struct(test[0]) - assert(size == test[1]) - #print struct - -def print_records(records, record_size): - print('const ucd_record PRIV(ucd_records)[] = { ' + \ - '/* %d bytes, record size %d */' % (len(records) * record_size, record_size)) - - records = list(zip(list(records.keys()), list(records.values()))) - records.sort(key = lambda x: x[1]) - for i, record in enumerate(records): - print((' {' + '%6d, ' * len(record[0]) + '}, /* %3d */') % (record[0] + (i,))) - print('};\n') - -script_names = ['Unknown', 'Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', - 'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', - 'Glagolitic', 'Gothic', 'Greek', 'Gujarati', 'Gurmukhi', 'Han', 'Hangul', 'Hanunoo', 'Hebrew', 'Hiragana', - 'Inherited', 'Kannada', 'Katakana', 'Kharoshthi', 'Khmer', 'Lao', 'Latin', 'Limbu', 'Linear_B', 'Malayalam', - 'Mongolian', 'Myanmar', 'New_Tai_Lue', 'Ogham', 'Old_Italic', 'Old_Persian', 'Oriya', 'Osmanya', 'Runic', - 'Shavian', 'Sinhala', 'Syloti_Nagri', 'Syriac', 'Tagalog', 'Tagbanwa', 'Tai_Le', 'Tamil', 'Telugu', 'Thaana', - 'Thai', 'Tibetan', 'Tifinagh', 'Ugaritic', 'Yi', -# New for Unicode 5.0 - 'Balinese', 'Cuneiform', 'Nko', 'Phags_Pa', 'Phoenician', -# New for Unicode 5.1 - 'Carian', 'Cham', 'Kayah_Li', 'Lepcha', 'Lycian', 'Lydian', 'Ol_Chiki', 'Rejang', 'Saurashtra', 'Sundanese', 'Vai', -# New for Unicode 5.2 - 'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', - 'Inscriptional_Pahlavi', 'Inscriptional_Parthian', - 'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', - 'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', -# New for Unicode 6.0.0 - 'Batak', 'Brahmi', 'Mandaic', -# New for Unicode 6.1.0 - 'Chakma', 'Meroitic_Cursive', 'Meroitic_Hieroglyphs', 'Miao', 'Sharada', 'Sora_Sompeng', 'Takri', -# New for Unicode 7.0.0 - 'Bassa_Vah', 'Caucasian_Albanian', 'Duployan', 'Elbasan', 'Grantha', 'Khojki', 'Khudawadi', - 'Linear_A', 'Mahajani', 'Manichaean', 'Mende_Kikakui', 'Modi', 'Mro', 'Nabataean', - 'Old_North_Arabian', 'Old_Permic', 'Pahawh_Hmong', 'Palmyrene', 'Psalter_Pahlavi', - 'Pau_Cin_Hau', 'Siddham', 'Tirhuta', 'Warang_Citi', -# New for Unicode 8.0.0 - 'Ahom', 'Anatolian_Hieroglyphs', 'Hatran', 'Multani', 'Old_Hungarian', - 'SignWriting', -# New for Unicode 10.0.0 - 'Adlam', 'Bhaiksuki', 'Marchen', 'Newa', 'Osage', 'Tangut', 'Masaram_Gondi', - 'Nushu', 'Soyombo', 'Zanabazar_Square', -# New for Unicode 11.0.0 - 'Dogra', 'Gunjala_Gondi', 'Hanifi_Rohingya', 'Makasar', 'Medefaidrin', - 'Old_Sogdian', 'Sogdian', -# New for Unicode 12.0.0 - 'Elymaic', 'Nandinagari', 'Nyiakeng_Puachue_Hmong', 'Wancho', -# New for Unicode 13.0.0 - 'Chorasmian', 'Dives_Akuru', 'Khitan_Small_Script', 'Yezidi', -# New for Unicode 14.0.0 - 'Cypro_Minoan', 'Old_Uyghur', 'Tangsa', 'Toto', 'Vithkuqi' - ] - -script_abbrevs = [ - 'Zzzz', 'Arab', 'Armn', 'Beng', 'Bopo', 'Brai', 'Bugi', 'Buhd', 'Cans', - 'Cher', 'Zyyy', 'Copt', 'Cprt', 'Cyrl', 'Dsrt', 'Deva', 'Ethi', 'Geor', - 'Glag', 'Goth', 'Grek', 'Gujr', 'Guru', 'Hani', 'Hang', 'Hano', 'Hebr', - 'Hira', 'Zinh', 'Knda', 'Kana', 'Khar', 'Khmr', 'Laoo', 'Latn', 'Limb', - 'Linb', 'Mlym', 'Mong', 'Mymr', 'Talu', 'Ogam', 'Ital', 'Xpeo', 'Orya', - 'Osma', 'Runr', 'Shaw', 'Sinh', 'Sylo', 'Syrc', 'Tglg', 'Tagb', 'Tale', - 'Taml', 'Telu', 'Thaa', 'Thai', 'Tibt', 'Tfng', 'Ugar', 'Yiii', -#New for Unicode 5.0 - 'Bali', 'Xsux', 'Nkoo', 'Phag', 'Phnx', -#New for Unicode 5.1 - 'Cari', 'Cham', 'Kali', 'Lepc', 'Lyci', 'Lydi', 'Olck', 'Rjng', 'Saur', - 'Sund', 'Vaii', -#New for Unicode 5.2 - 'Avst', 'Bamu', 'Egyp', 'Armi', 'Phli', 'Prti', 'Java', 'Kthi', 'Lisu', - 'Mtei', 'Sarb', 'Orkh', 'Samr', 'Lana', 'Tavt', -#New for Unicode 6.0.0 - 'Batk', 'Brah', 'Mand', -#New for Unicode 6.1.0 - 'Cakm', 'Merc', 'Mero', 'Plrd', 'Shrd', 'Sora', 'Takr', -#New for Unicode 7.0.0 - 'Bass', 'Aghb', 'Dupl', 'Elba', 'Gran', 'Khoj', 'Sind', 'Lina', 'Mahj', - 'Mani', 'Mend', 'Modi', 'Mroo', 'Nbat', 'Narb', 'Perm', 'Hmng', 'Palm', - 'Phlp', 'Pauc', 'Sidd', 'Tirh', 'Wara', -#New for Unicode 8.0.0 - 'Ahom', 'Hluw', 'Hatr', 'Mult', 'Hung', 'Sgnw', -#New for Unicode 10.0.0 - 'Adlm', 'Bhks', 'Marc', 'Newa', 'Osge', 'Tang', 'Gonm', 'Nshu', 'Soyo', - 'Zanb', -#New for Unicode 11.0.0 - 'Dogr', 'Gong', 'Rohg', 'Maka', 'Medf', 'Sogo', 'Sogd', -#New for Unicode 12.0.0 - 'Elym', 'Nand', 'Hmnp', 'Wcho', -#New for Unicode 13.0.0 - 'Chrs', 'Diak', 'Kits', 'Yezi', -#New for Unicode 14.0.0 - 'Cpmn', 'Ougr', 'Tngs', 'Toto', 'Vith' - ] - -category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', - 'Mc', 'Me', 'Mn', 'Nd', 'Nl', 'No', 'Pc', 'Pd', 'Pe', 'Pf', 'Pi', 'Po', 'Ps', - 'Sc', 'Sk', 'Sm', 'So', 'Zl', 'Zp', 'Zs' ] - -# The Extended_Pictographic property is not found in the file where all the -# others are (GraphemeBreakProperty.txt). It comes from the emoji-data.txt -# file, but we list it here so that the name has the correct index value. - -break_property_names = ['CR', 'LF', 'Control', 'Extend', 'Prepend', - 'SpacingMark', 'L', 'V', 'T', 'LV', 'LVT', 'Regional_Indicator', 'Other', - 'ZWJ', 'Extended_Pictographic' ] - -test_record_size() -unicode_version = "" - -script = read_table('Unicode.tables/Scripts.txt', make_get_names(script_names), script_names.index('Unknown')) -category = read_table('Unicode.tables/DerivedGeneralCategory.txt', make_get_names(category_names), category_names.index('Cn')) -break_props = read_table('Unicode.tables/GraphemeBreakProperty.txt', make_get_names(break_property_names), break_property_names.index('Other')) -other_case = read_table('Unicode.tables/CaseFolding.txt', get_other_case, 0) - -# The grapheme breaking rules were changed for Unicode 11.0.0 (June 2018). Now -# we need to find the Extended_Pictographic property for emoji characters. This -# can be set as an additional grapheme break property, because the default for -# all the emojis is "other". We scan the emoji-data.txt file and modify the -# break-props table. - -file = open('Unicode.tables/emoji-data.txt', 'r', encoding='utf-8') -for line in file: - line = re.sub(r'#.*', '', line) - chardata = list(map(str.strip, line.split(';'))) - if len(chardata) <= 1: - continue - - if chardata[1] != "Extended_Pictographic": - continue - - m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) - char = int(m.group(1), 16) - if m.group(3) is None: - last = char - else: - last = int(m.group(3), 16) - for i in range(char, last + 1): - if break_props[i] != break_property_names.index('Other'): - print("WARNING: Emoji 0x%x has break property %s, not 'Other'", - i, break_property_names[break_props[i]], file=sys.stderr) - break_props[i] = break_property_names.index('Extended_Pictographic') -file.close() - -# The Script Extensions property default value is the Script value. Parse the -# file, setting 'Unknown' as the default (this will never be a Script Extension -# value), then scan it and fill in the default from Scripts. Code added by PH -# in October 2018. Positive values are used for just a single script for a -# code point. Negative values are negated offsets in a list of lists of -# multiple scripts. Initialize this list with a single entry, as the zeroth -# element is never used. - -script_lists = [0] -script_abbrevs_default = script_abbrevs.index('Zzzz') -scriptx = read_table('Unicode.tables/ScriptExtensions.txt', get_script_extension, script_abbrevs_default) - -for i in range(0, MAX_UNICODE): - if scriptx[i] == script_abbrevs_default: - scriptx[i] = script[i] - -# With the addition of the new Script Extensions field, we need some padding -# to get the Unicode records up to 12 bytes (multiple of 4). Set a value -# greater than 255 to make the field 16 bits. - -padding_dummy = [0] * MAX_UNICODE -padding_dummy[0] = 256 - -# This block of code was added by PH in September 2012. I am not a Python -# programmer, so the style is probably dreadful, but it does the job. It scans -# the other_case table to find sets of more than two characters that must all -# match each other caselessly. Later in this script a table of these sets is -# written out. However, we have to do this work here in order to compute the -# offsets in the table that are inserted into the main table. - -# The CaseFolding.txt file lists pairs, but the common logic for reading data -# sets only one value, so first we go through the table and set "return" -# offsets for those that are not already set. - -for c in range(MAX_UNICODE): - if other_case[c] != 0 and other_case[c + other_case[c]] == 0: - other_case[c + other_case[c]] = -other_case[c] - -# Now scan again and create equivalence sets. - -sets = [] - -for c in range(MAX_UNICODE): - o = c + other_case[c] - - # Trigger when this character's other case does not point back here. We - # now have three characters that are case-equivalent. - - if other_case[o] != -other_case[c]: - t = o + other_case[o] - - # Scan the existing sets to see if any of the three characters are already - # part of a set. If so, unite the existing set with the new set. - - appended = 0 - for s in sets: - found = 0 - for x in s: - if x == c or x == o or x == t: - found = 1 - - # Add new characters to an existing set - - if found: - found = 0 - for y in [c, o, t]: - for x in s: - if x == y: - found = 1 - if not found: - s.append(y) - appended = 1 - - # If we have not added to an existing set, create a new one. - - if not appended: - sets.append([c, o, t]) - -# End of loop looking for caseless sets. - -# Now scan the sets and set appropriate offsets for the characters. - -caseless_offsets = [0] * MAX_UNICODE - -offset = 1; -for s in sets: - for x in s: - caseless_offsets[x] = offset - offset += len(s) + 1 - -# End of block of code for creating offsets for caseless matching sets. - - -# Combine the tables - -table, records = combine_tables(script, category, break_props, - caseless_offsets, other_case, scriptx, padding_dummy) - -record_size, record_struct = get_record_size_struct(list(records.keys())) - -# Find the optimum block size for the two-stage table -min_size = sys.maxsize -for block_size in [2 ** i for i in range(5,10)]: - size = len(records) * record_size - stage1, stage2 = compress_table(table, block_size) - size += get_tables_size(stage1, stage2) - #print "/* block size %5d => %5d bytes */" % (block_size, size) - if size < min_size: - min_size = size - min_stage1, min_stage2 = stage1, stage2 - min_block_size = block_size - -print("/* This module is generated by the maint/MultiStage2.py script.") -print("Do not modify it by hand. Instead modify the script and run it") -print("to regenerate this code.") -print() -print("As well as being part of the PCRE2 library, this module is #included") -print("by the pcre2test program, which redefines the PRIV macro to change") -print("table names from _pcre2_xxx to xxxx, thereby avoiding name clashes") -print("with the library. At present, just one of these tables is actually") -print("needed. */") -print() -print("#ifndef PCRE2_PCRE2TEST") -print() -print("#ifdef HAVE_CONFIG_H") -print("#include \"config.h\"") -print("#endif") -print() -print("#include \"pcre2_internal.h\"") -print() -print("#endif /* PCRE2_PCRE2TEST */") -print() -print("/* Unicode character database. */") -print("/* This file was autogenerated by the MultiStage2.py script. */") -print("/* Total size: %d bytes, block size: %d. */" % (min_size, min_block_size)) -print() -print("/* The tables herein are needed only when UCP support is built,") -print("and in PCRE2 that happens automatically with UTF support.") -print("This module should not be referenced otherwise, so") -print("it should not matter whether it is compiled or not. However") -print("a comment was received about space saving - maybe the guy linked") -print("all the modules rather than using a library - so we include a") -print("condition to cut out the tables when not needed. But don't leave") -print("a totally empty module because some compilers barf at that.") -print("Instead, just supply some small dummy tables. */") -print() -print("#ifndef SUPPORT_UNICODE") -print("const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }};") -print("const uint16_t PRIV(ucd_stage1)[] = {0};") -print("const uint16_t PRIV(ucd_stage2)[] = {0};") -print("const uint32_t PRIV(ucd_caseless_sets)[] = {0};") -print("#else") -print() -print("const char *PRIV(unicode_version) = \"{}\";".format(unicode_version)) -print() -print("/* If the 32-bit library is run in non-32-bit mode, character values") -print("greater than 0x10ffff may be encountered. For these we set up a") -print("special record. */") -print() -print("#if PCRE2_CODE_UNIT_WIDTH == 32") -print("const ucd_record PRIV(dummy_ucd_record)[] = {{") -print(" ucp_Unknown, /* script */") -print(" ucp_Cn, /* type unassigned */") -print(" ucp_gbOther, /* grapheme break property */") -print(" 0, /* case set */") -print(" 0, /* other case */") -print(" ucp_Unknown, /* script extension */") -print(" 0, /* dummy filler */") -print(" }};") -print("#endif") -print() -print(record_struct) - -# --- Added by PH: output the table of caseless character sets --- - -print("/* This table contains lists of characters that are caseless sets of") -print("more than one character. Each list is terminated by NOTACHAR. */\n") - -print("const uint32_t PRIV(ucd_caseless_sets)[] = {") -print(" NOTACHAR,") -for s in sets: - s = sorted(s) - for x in s: - print(' 0x%04x,' % x, end=' ') - print(' NOTACHAR,') -print('};') -print() - -# ------ - -print("/* When #included in pcre2test, we don't need the table of digit") -print("sets, nor the the large main UCD tables. */") -print() -print("#ifndef PCRE2_PCRE2TEST") -print() - -# --- Added by PH: read Scripts.txt again for the sets of 10 digits. --- - -digitsets = [] -file = open('Unicode.tables/Scripts.txt', 'r', encoding='utf-8') - -for line in file: - m = re.match(r'([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s+;\s+\S+\s+#\s+Nd\s+', line) - if m is None: - continue - first = int(m.group(1),16) - last = int(m.group(2),16) - if ((last - first + 1) % 10) != 0: - print("ERROR: %04x..%04x does not contain a multiple of 10 characters" % (first, last), - file=sys.stderr) - while first < last: - digitsets.append(first + 9) - first += 10 -file.close() -digitsets.sort() - -print("/* This table lists the code points for the '9' characters in each") -print("set of decimal digits. It is used to ensure that all the digits in") -print("a script run come from the same set. */\n") -print("const uint32_t PRIV(ucd_digit_sets)[] = {") - -print(" %d, /* Number of subsequent values */" % len(digitsets), end='') -count = 8 -for d in digitsets: - if count == 8: - print("\n ", end='') - count = 0 - print(" 0x%05x," % d, end='') - count += 1 -print("\n};\n") - -print("/* This vector is a list of lists of scripts for the Script Extension") -print("property. Each sublist is zero-terminated. */\n") -print("const uint8_t PRIV(ucd_script_sets)[] = {") - -count = 0 -print(" /* 0 */", end='') -for d in script_lists: - print(" %3d," % d, end='') - count += 1 - if d == 0: - print("\n /* %3d */" % count, end='') -print("\n};\n") - -# Output the main UCD tables. - -print("/* These are the main two-stage UCD tables. The fields in each record are:") -print("script (8 bits), character type (8 bits), grapheme break property (8 bits),") -print("offset to multichar other cases or zero (8 bits), offset to other case") -print("or zero (32 bits, signed), script extension (16 bits, signed), and a dummy") -print("16-bit field to make the whole thing a multiple of 4 bytes. */\n") - -print_records(records, record_size) -print_table(min_stage1, 'PRIV(ucd_stage1)') -print_table(min_stage2, 'PRIV(ucd_stage2)', min_block_size) -print("#if UCD_BLOCK_SIZE != %d" % min_block_size) -print("#error Please correct UCD_BLOCK_SIZE in pcre2_internal.h") -print("#endif") -print("#endif /* SUPPORT_UNICODE */") -print() -print("#endif /* PCRE2_PCRE2TEST */") - - -# This code was part of the original contribution, but is commented out as it -# was never used. A two-stage table has sufficed. - -""" - -# Three-stage tables: - -# Find the optimum block size for 3-stage table -min_size = sys.maxint -for stage3_block in [2 ** i for i in range(2,6)]: - stage_i, stage3 = compress_table(table, stage3_block) - for stage2_block in [2 ** i for i in range(5,10)]: - size = len(records) * 4 - stage1, stage2 = compress_table(stage_i, stage2_block) - size += get_tables_size(stage1, stage2, stage3) - # print "/* %5d / %3d => %5d bytes */" % (stage2_block, stage3_block, size) - if size < min_size: - min_size = size - min_stage1, min_stage2, min_stage3 = stage1, stage2, stage3 - min_stage2_block, min_stage3_block = stage2_block, stage3_block - -print "/* Total size: %d bytes" % min_size */ -print_records(records) -print_table(min_stage1, 'ucd_stage1') -print_table(min_stage2, 'ucd_stage2', min_stage2_block) -print_table(min_stage3, 'ucd_stage3', min_stage3_block) - -""" diff --git a/pcre2/maint/README b/pcre2/maint/README index ab9845c37738cc2905c356beed290b6a0428d871..f21ff87d82d11e9825efd2befb04cbd18118fba4 100644 --- a/pcre2/maint/README +++ b/pcre2/maint/README @@ -16,92 +16,114 @@ and also contains some notes for maintainers. Its contents are: Files in the maint directory ============================ -GenerateUtt.py A Python script to generate part of the pcre2_tables.c file - that contains Unicode script names in a long string with - offsets, which is tedious to maintain by hand. - -ManyConfigTests A shell script that runs "configure, make, test" a number of - times with different configuration settings. - -MultiStage2.py A Python script that generates the file pcre2_ucd.c from six - Unicode data files, which are themselves downloaded from the - Unicode web site. Run this script in the "maint" directory. - The generated file is written to stdout. It contains the - tables for a 2-stage lookup of Unicode properties, along with - some auxiliary tables. +GenerateCommon.py + A Python module containing data and functions that are used by the other + Generate scripts. + +GenerateTest26.py + A Python script that generates input and expected output test data for test + 26, which tests certain aspects of Unicode property support. + +GenerateUcd.py + A Python script that generates the file pcre2_ucd.c from GenerateCommon.py + and Unicode data files, which are themselves downloaded from the Unicode web + site. The generated file contains the tables for a 2-stage lookup of Unicode + properties, along with some auxiliary tables. The script starts with a long + comment that gives details of the tables it constructs. + +GenerateUcpHeader.py + A Python script that generates the file pcre2_ucp.h from GenerateCommon.py + and Unicode data files. The generated file defines constants for various + Unicode property values. + +GenerateUcpTables.py + A Python script that generates the file pcre2_ucptables.c from + GenerateCommon.py and Unicode data files. The generated file contains tables + for looking up Unicode property names. + +ManyConfigTests + A shell script that runs "configure, make, test" a number of times with + different configuration settings. pcre2_chartables.c.non-standard - This is a set of character tables that came from a Windows - system. It has characters greater than 128 that are set as - spaces, amongst other things. I kept it so that it can be - used for testing from time to time. - -README This file. - -Unicode.tables The files in this directory were downloaded from the Unicode - web site. They contain information about Unicode characters - and scripts. The ones used by the MultiStage2.py script are - CaseFolding.txt, DerivedGeneralCategory.txt, Scripts.txt, - ScriptExtensions.txt, GraphemeBreakProperty.txt, and - emoji-data.txt. I've kept UnicodeData.txt (which is no longer - used by the script) because it is useful occasionally for - manually looking up the details of certain characters. - However, note that character names in this file such as - "Arabic sign sanah" do NOT mean that the character is in a - particular script (in this case, Arabic). Scripts.txt and - ScriptExtensions.txt are where to look for script information. - -ucptest.c A short C program for testing the Unicode property macros - that do lookups in the pcre2_ucd.c data, mainly useful after - rebuilding the Unicode property table. Compile and run this in - the "maint" directory (see comments at its head). This program - can also be used to find characters with specific properties. - -ucptestdata A directory containing four files, testinput{1,2} and - testoutput{1,2}, for use in conjunction with the ucptest - program. - -utf8.c A short, freestanding C program for converting a Unicode code - point into a sequence of bytes in the UTF-8 encoding, and vice - versa. If its argument is a hex number such as 0x1234, it - outputs a list of the equivalent UTF-8 bytes. If its argument - is a sequence of concatenated UTF-8 bytes (e.g. e188b4) it - treats them as a UTF-8 character and outputs the equivalent - code point in hex. See comments at its head for details. + This is a set of character tables that came from a Windows system. It has + characters greater than 128 that are set as spaces, amongst other things. I + kept it so that it can be used for testing from time to time. + +README + This file. + +Unicode.tables + The files in this directory were downloaded from the Unicode web site. They + contain information about Unicode characters and scripts, and are used by the + Generate scripts. There is also UnicodeData.txt, which is no longer used by + any script, because it is useful occasionally for manually looking up the + details of certain characters. However, note that character names in this + file such as "Arabic sign sanah" do NOT mean that the character is in a + particular script (in this case, Arabic). Scripts.txt and + ScriptExtensions.txt are where to look for script information. + +ucptest.c + A program for testing the Unicode property macros that do lookups in the + pcre2_ucd.c data, mainly useful after rebuilding the Unicode property tables. + Compile and run this in the "maint" directory (see comments at its head). + This program can also be used to find characters with specific properties and + to list which properties are supported. + +ucptestdata + A directory containing four files, testinput{1,2} and testoutput{1,2}, for + use in conjunction with the ucptest program. + +utf8.c + A short, freestanding C program for converting a Unicode code point into a + sequence of bytes in the UTF-8 encoding, and vice versa. If its argument is a + hex number such as 0x1234, it outputs a list of the equivalent UTF-8 bytes. + If its argument is a sequence of concatenated UTF-8 bytes (e.g. e188b4) it + treats them as a UTF-8 character and outputs the equivalent code point in + hex. See comments at its head for details. Updating to a new Unicode release ================================= When there is a new release of Unicode, the files in Unicode.tables must be -refreshed from the web site. If the new version of Unicode adds new character -scripts, the source file pcre2_ucp.h and both the MultiStage2.py and the -GenerateUtt.py scripts must be edited to add the new names. I have been adding -each new group at the end of the relevant list, with a comment. Note also that -both the pcre2syntax.3 and pcre2pattern.3 man pages contain lists of Unicode -script names. - -MultiStage2.py has two lists: the full names and the abbreviations that are -found in the ScriptExtensions.txt file. A list of script names and their -abbreviations can be found in the PropertyValueAliases.txt file on the -Unicode web site. There is also a Wikipedia page that lists them, and notes the -Unicode version in which they were introduced: - -https://en.wikipedia.org/wiki/Unicode_scripts#Table_of_Unicode_scripts - -Once the script name lists have been updated, MultiStage2.py can be run to -generate a new version of pcre2_ucd.c, and GenerateUtt.py can be run to -generate the tricky tables for inclusion in pcre2_tables.c (which must be -hand-edited). If MultiStage2.py gives the error "ValueError: list.index(x): x -not in list", the cause is usually a missing (or misspelt) name in one of the -lists of scripts. - -The ucptest program can be compiled and used to check that the new tables in -pcre2_ucd.c work properly, using the data files in ucptestdata to check a -number of test characters. It used to be necessary to update the source -ucptest.c whenever new Unicode scripts were added, but this is no longer -required because that program now uses the lists in the PCRE2 source. However, -adding a few tests for new scripts to the files in ucptestdata is a good idea. +refreshed from the web site. Once that is done, the four Python scripts that +generate files from the Unicode data can be run from within the "maint" +directory. + +Note: Previously, it was necessary to update lists of scripts and their +abbreviations by hand before running the Python scripts. This is no longer +necessary because the scripts have been upgraded to extract this information +themselves. Also, there used to be explicit lists of script in two of the man +pages. This is no longer the case. + +You can give an output file name as an argument to the following scripts, but +by default: + +GenerateUcd.py creates pcre2_ucd.c ) +GenerateUcpHeader.py creates pcre2_ucp.h ) in the current directory +GenerateUcpTables.py creates pcre2_ucptables.c ) + +These files can be compared against the existing versions in the src directory +to check on any changes before replacing the old files, but you can also +generate directly into the final location by running: + +./GenerateUcd.py ../src/pcre2_ucd.c +./GenerateUcpHeader.py ../src/pcre2_ucp.h +./GenerateUcpTables.py ../src/pcre2_ucptables.c + +Once the .c and .h files are in the ../src directory, the ucptest program can +be compiled and used to check that the new tables work properly. The data files +in ucptestdata are set up to check a number of test characters. See the +comments at the start of ucptest.c. If there are new scripts, adding a few +tests to the files in ucptestdata is a good idea. + +Finally, you should run the GenerateTest26.py script to regenerate new versions +of the input and expected output from a series of Unicode property tests that +are automatically generated from the Unicode data files. By default, the files +are written to testinput26 and testoutput26 in the current directory, but you +can give an alternative directory name as an argument to the script. These +files should eventually be installed in the main testdata directory. Preparing for a PCRE2 release @@ -439,4 +461,4 @@ years. Philip Hazel Email local part: Philip.Hazel Email domain: gmail.com -Last updated: 26 August 2021 +Last updated: 10 January 2022 diff --git a/pcre2/maint/Unicode.tables/BidiMirroring.txt b/pcre2/maint/Unicode.tables/BidiMirroring.txt new file mode 100644 index 0000000000000000000000000000000000000000..bd8e2c5d00177655b5a84b79415fe7693e272148 --- /dev/null +++ b/pcre2/maint/Unicode.tables/BidiMirroring.txt @@ -0,0 +1,633 @@ +# BidiMirroring-14.0.0.txt +# Date: 2021-08-08, 22:55:00 GMT [KW, RP] +# © 2021 Unicode®, Inc. +# For terms of use, see https://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see https://www.unicode.org/reports/tr44/ +# +# Bidi_Mirroring_Glyph Property +# +# This file is an informative contributory data file in the +# Unicode Character Database. +# +# This data file lists characters that have the Bidi_Mirrored=Yes property +# value, for which there is another Unicode character that typically has a glyph +# that is the mirror image of the original character's glyph. +# +# The repertoire covered by the file is Unicode 14.0.0. +# +# The file contains a list of lines with mappings from one code point +# to another one for character-based mirroring. +# Note that for "real" mirroring, a rendering engine needs to select +# appropriate alternative glyphs, and that many Unicode characters do not +# have a mirror-image Unicode character. +# +# Each mapping line contains two fields, separated by a semicolon (';'). +# Each of the two fields contains a code point represented as a +# variable-length hexadecimal value with 4 to 6 digits. +# A comment indicates where the characters are "BEST FIT" mirroring. +# +# Code points for which Bidi_Mirrored=Yes, but for which no appropriate +# characters exist with mirrored glyphs, are +# listed as comments at the end of the file. +# +# Formally, the default value of the Bidi_Mirroring_Glyph property +# for each code point is , unless a mapping to +# some other character is specified in this data file. When a code +# point has the default value for the Bidi_Mirroring_Glyph property, +# that means that no other character exists whose glyph is suitable +# for character-based mirroring. +# +# For information on bidi mirroring, see UAX #9: Unicode Bidirectional Algorithm, +# at https://www.unicode.org/reports/tr9/ +# +# This file was originally created by Markus Scherer. +# Extended for Unicode 3.2, 4.0, 4.1, 5.0, 5.1, 5.2, and 6.0 by Ken Whistler, +# and for subsequent versions by Ken Whistler, Laurentiu Iancu, and Roozbeh Pournader. +# +# Historical and Compatibility Information: +# +# The OpenType Mirroring Pairs List (OMPL) is frozen to match the +# Unicode 5.1 version of the Bidi_Mirroring_Glyph property (2008). +# See https://www.microsoft.com/typography/otspec/ompl.txt +# +# The Unicode 6.1 version of the Bidi_Mirroring_Glyph property (2011) +# added one mirroring pair: 27CB <--> 27CD. +# +# The Unicode 11.0 version of the Bidi_Mirroring_Glyph property (2018) +# underwent a substantial revision, to formally recognize all of the +# exact mirroring pairs and "BEST FIT" mirroring pairs that had been +# added after the freezing of the OMPL list. As a result, starting +# with Unicode 11.0, the bmg mapping values more accurately reflect +# the current status of glyphs for Bidi_Mirrored characters in +# the Unicode Standard, but this listing now extends significantly +# beyond the frozen OMPL list. Implementers should be aware of this +# intentional distinction. +# +# ############################################################ +# +# Property: Bidi_Mirroring_Glyph +# +# @missing: 0000..10FFFF; + +0028; 0029 # LEFT PARENTHESIS +0029; 0028 # RIGHT PARENTHESIS +003C; 003E # LESS-THAN SIGN +003E; 003C # GREATER-THAN SIGN +005B; 005D # LEFT SQUARE BRACKET +005D; 005B # RIGHT SQUARE BRACKET +007B; 007D # LEFT CURLY BRACKET +007D; 007B # RIGHT CURLY BRACKET +00AB; 00BB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB; 00AB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +0F3A; 0F3B # TIBETAN MARK GUG RTAGS GYON +0F3B; 0F3A # TIBETAN MARK GUG RTAGS GYAS +0F3C; 0F3D # TIBETAN MARK ANG KHANG GYON +0F3D; 0F3C # TIBETAN MARK ANG KHANG GYAS +169B; 169C # OGHAM FEATHER MARK +169C; 169B # OGHAM REVERSED FEATHER MARK +2039; 203A # SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A; 2039 # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2045; 2046 # LEFT SQUARE BRACKET WITH QUILL +2046; 2045 # RIGHT SQUARE BRACKET WITH QUILL +207D; 207E # SUPERSCRIPT LEFT PARENTHESIS +207E; 207D # SUPERSCRIPT RIGHT PARENTHESIS +208D; 208E # SUBSCRIPT LEFT PARENTHESIS +208E; 208D # SUBSCRIPT RIGHT PARENTHESIS +2208; 220B # ELEMENT OF +2209; 220C # [BEST FIT] NOT AN ELEMENT OF +220A; 220D # SMALL ELEMENT OF +220B; 2208 # CONTAINS AS MEMBER +220C; 2209 # [BEST FIT] DOES NOT CONTAIN AS MEMBER +220D; 220A # SMALL CONTAINS AS MEMBER +2215; 29F5 # DIVISION SLASH +221F; 2BFE # RIGHT ANGLE +2220; 29A3 # ANGLE +2221; 299B # MEASURED ANGLE +2222; 29A0 # SPHERICAL ANGLE +2224; 2AEE # DOES NOT DIVIDE +223C; 223D # TILDE OPERATOR +223D; 223C # REVERSED TILDE +2243; 22CD # ASYMPTOTICALLY EQUAL TO +2245; 224C # APPROXIMATELY EQUAL TO +224C; 2245 # ALL EQUAL TO +2252; 2253 # APPROXIMATELY EQUAL TO OR THE IMAGE OF +2253; 2252 # IMAGE OF OR APPROXIMATELY EQUAL TO +2254; 2255 # COLON EQUALS +2255; 2254 # EQUALS COLON +2264; 2265 # LESS-THAN OR EQUAL TO +2265; 2264 # GREATER-THAN OR EQUAL TO +2266; 2267 # LESS-THAN OVER EQUAL TO +2267; 2266 # GREATER-THAN OVER EQUAL TO +2268; 2269 # [BEST FIT] LESS-THAN BUT NOT EQUAL TO +2269; 2268 # [BEST FIT] GREATER-THAN BUT NOT EQUAL TO +226A; 226B # MUCH LESS-THAN +226B; 226A # MUCH GREATER-THAN +226E; 226F # [BEST FIT] NOT LESS-THAN +226F; 226E # [BEST FIT] NOT GREATER-THAN +2270; 2271 # [BEST FIT] NEITHER LESS-THAN NOR EQUAL TO +2271; 2270 # [BEST FIT] NEITHER GREATER-THAN NOR EQUAL TO +2272; 2273 # [BEST FIT] LESS-THAN OR EQUIVALENT TO +2273; 2272 # [BEST FIT] GREATER-THAN OR EQUIVALENT TO +2274; 2275 # [BEST FIT] NEITHER LESS-THAN NOR EQUIVALENT TO +2275; 2274 # [BEST FIT] NEITHER GREATER-THAN NOR EQUIVALENT TO +2276; 2277 # LESS-THAN OR GREATER-THAN +2277; 2276 # GREATER-THAN OR LESS-THAN +2278; 2279 # [BEST FIT] NEITHER LESS-THAN NOR GREATER-THAN +2279; 2278 # [BEST FIT] NEITHER GREATER-THAN NOR LESS-THAN +227A; 227B # PRECEDES +227B; 227A # SUCCEEDS +227C; 227D # PRECEDES OR EQUAL TO +227D; 227C # SUCCEEDS OR EQUAL TO +227E; 227F # [BEST FIT] PRECEDES OR EQUIVALENT TO +227F; 227E # [BEST FIT] SUCCEEDS OR EQUIVALENT TO +2280; 2281 # [BEST FIT] DOES NOT PRECEDE +2281; 2280 # [BEST FIT] DOES NOT SUCCEED +2282; 2283 # SUBSET OF +2283; 2282 # SUPERSET OF +2284; 2285 # [BEST FIT] NOT A SUBSET OF +2285; 2284 # [BEST FIT] NOT A SUPERSET OF +2286; 2287 # SUBSET OF OR EQUAL TO +2287; 2286 # SUPERSET OF OR EQUAL TO +2288; 2289 # [BEST FIT] NEITHER A SUBSET OF NOR EQUAL TO +2289; 2288 # [BEST FIT] NEITHER A SUPERSET OF NOR EQUAL TO +228A; 228B # [BEST FIT] SUBSET OF WITH NOT EQUAL TO +228B; 228A # [BEST FIT] SUPERSET OF WITH NOT EQUAL TO +228F; 2290 # SQUARE IMAGE OF +2290; 228F # SQUARE ORIGINAL OF +2291; 2292 # SQUARE IMAGE OF OR EQUAL TO +2292; 2291 # SQUARE ORIGINAL OF OR EQUAL TO +2298; 29B8 # CIRCLED DIVISION SLASH +22A2; 22A3 # RIGHT TACK +22A3; 22A2 # LEFT TACK +22A6; 2ADE # ASSERTION +22A8; 2AE4 # TRUE +22A9; 2AE3 # FORCES +22AB; 2AE5 # DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +22B0; 22B1 # PRECEDES UNDER RELATION +22B1; 22B0 # SUCCEEDS UNDER RELATION +22B2; 22B3 # NORMAL SUBGROUP OF +22B3; 22B2 # CONTAINS AS NORMAL SUBGROUP +22B4; 22B5 # NORMAL SUBGROUP OF OR EQUAL TO +22B5; 22B4 # CONTAINS AS NORMAL SUBGROUP OR EQUAL TO +22B6; 22B7 # ORIGINAL OF +22B7; 22B6 # IMAGE OF +22B8; 27DC # MULTIMAP +22C9; 22CA # LEFT NORMAL FACTOR SEMIDIRECT PRODUCT +22CA; 22C9 # RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT +22CB; 22CC # LEFT SEMIDIRECT PRODUCT +22CC; 22CB # RIGHT SEMIDIRECT PRODUCT +22CD; 2243 # REVERSED TILDE EQUALS +22D0; 22D1 # DOUBLE SUBSET +22D1; 22D0 # DOUBLE SUPERSET +22D6; 22D7 # LESS-THAN WITH DOT +22D7; 22D6 # GREATER-THAN WITH DOT +22D8; 22D9 # VERY MUCH LESS-THAN +22D9; 22D8 # VERY MUCH GREATER-THAN +22DA; 22DB # LESS-THAN EQUAL TO OR GREATER-THAN +22DB; 22DA # GREATER-THAN EQUAL TO OR LESS-THAN +22DC; 22DD # EQUAL TO OR LESS-THAN +22DD; 22DC # EQUAL TO OR GREATER-THAN +22DE; 22DF # EQUAL TO OR PRECEDES +22DF; 22DE # EQUAL TO OR SUCCEEDS +22E0; 22E1 # [BEST FIT] DOES NOT PRECEDE OR EQUAL +22E1; 22E0 # [BEST FIT] DOES NOT SUCCEED OR EQUAL +22E2; 22E3 # [BEST FIT] NOT SQUARE IMAGE OF OR EQUAL TO +22E3; 22E2 # [BEST FIT] NOT SQUARE ORIGINAL OF OR EQUAL TO +22E4; 22E5 # [BEST FIT] SQUARE IMAGE OF OR NOT EQUAL TO +22E5; 22E4 # [BEST FIT] SQUARE ORIGINAL OF OR NOT EQUAL TO +22E6; 22E7 # [BEST FIT] LESS-THAN BUT NOT EQUIVALENT TO +22E7; 22E6 # [BEST FIT] GREATER-THAN BUT NOT EQUIVALENT TO +22E8; 22E9 # [BEST FIT] PRECEDES BUT NOT EQUIVALENT TO +22E9; 22E8 # [BEST FIT] SUCCEEDS BUT NOT EQUIVALENT TO +22EA; 22EB # [BEST FIT] NOT NORMAL SUBGROUP OF +22EB; 22EA # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP +22EC; 22ED # [BEST FIT] NOT NORMAL SUBGROUP OF OR EQUAL TO +22ED; 22EC # [BEST FIT] DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL +22F0; 22F1 # UP RIGHT DIAGONAL ELLIPSIS +22F1; 22F0 # DOWN RIGHT DIAGONAL ELLIPSIS +22F2; 22FA # ELEMENT OF WITH LONG HORIZONTAL STROKE +22F3; 22FB # ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22F4; 22FC # SMALL ELEMENT OF WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22F6; 22FD # ELEMENT OF WITH OVERBAR +22F7; 22FE # SMALL ELEMENT OF WITH OVERBAR +22FA; 22F2 # CONTAINS WITH LONG HORIZONTAL STROKE +22FB; 22F3 # CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22FC; 22F4 # SMALL CONTAINS WITH VERTICAL BAR AT END OF HORIZONTAL STROKE +22FD; 22F6 # CONTAINS WITH OVERBAR +22FE; 22F7 # SMALL CONTAINS WITH OVERBAR +2308; 2309 # LEFT CEILING +2309; 2308 # RIGHT CEILING +230A; 230B # LEFT FLOOR +230B; 230A # RIGHT FLOOR +2329; 232A # LEFT-POINTING ANGLE BRACKET +232A; 2329 # RIGHT-POINTING ANGLE BRACKET +2768; 2769 # MEDIUM LEFT PARENTHESIS ORNAMENT +2769; 2768 # MEDIUM RIGHT PARENTHESIS ORNAMENT +276A; 276B # MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B; 276A # MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C; 276D # MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D; 276C # MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E; 276F # HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F; 276E # HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770; 2771 # HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771; 2770 # HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772; 2773 # LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773; 2772 # LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774; 2775 # MEDIUM LEFT CURLY BRACKET ORNAMENT +2775; 2774 # MEDIUM RIGHT CURLY BRACKET ORNAMENT +27C3; 27C4 # OPEN SUBSET +27C4; 27C3 # OPEN SUPERSET +27C5; 27C6 # LEFT S-SHAPED BAG DELIMITER +27C6; 27C5 # RIGHT S-SHAPED BAG DELIMITER +27C8; 27C9 # REVERSE SOLIDUS PRECEDING SUBSET +27C9; 27C8 # SUPERSET PRECEDING SOLIDUS +27CB; 27CD # MATHEMATICAL RISING DIAGONAL +27CD; 27CB # MATHEMATICAL FALLING DIAGONAL +27D5; 27D6 # LEFT OUTER JOIN +27D6; 27D5 # RIGHT OUTER JOIN +27DC; 22B8 # LEFT MULTIMAP +27DD; 27DE # LONG RIGHT TACK +27DE; 27DD # LONG LEFT TACK +27E2; 27E3 # WHITE CONCAVE-SIDED DIAMOND WITH LEFTWARDS TICK +27E3; 27E2 # WHITE CONCAVE-SIDED DIAMOND WITH RIGHTWARDS TICK +27E4; 27E5 # WHITE SQUARE WITH LEFTWARDS TICK +27E5; 27E4 # WHITE SQUARE WITH RIGHTWARDS TICK +27E6; 27E7 # MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7; 27E6 # MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8; 27E9 # MATHEMATICAL LEFT ANGLE BRACKET +27E9; 27E8 # MATHEMATICAL RIGHT ANGLE BRACKET +27EA; 27EB # MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB; 27EA # MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC; 27ED # MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED; 27EC # MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE; 27EF # MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF; 27EE # MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983; 2984 # LEFT WHITE CURLY BRACKET +2984; 2983 # RIGHT WHITE CURLY BRACKET +2985; 2986 # LEFT WHITE PARENTHESIS +2986; 2985 # RIGHT WHITE PARENTHESIS +2987; 2988 # Z NOTATION LEFT IMAGE BRACKET +2988; 2987 # Z NOTATION RIGHT IMAGE BRACKET +2989; 298A # Z NOTATION LEFT BINDING BRACKET +298A; 2989 # Z NOTATION RIGHT BINDING BRACKET +298B; 298C # LEFT SQUARE BRACKET WITH UNDERBAR +298C; 298B # RIGHT SQUARE BRACKET WITH UNDERBAR +298D; 2990 # LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E; 298F # RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F; 298E # LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990; 298D # RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991; 2992 # LEFT ANGLE BRACKET WITH DOT +2992; 2991 # RIGHT ANGLE BRACKET WITH DOT +2993; 2994 # LEFT ARC LESS-THAN BRACKET +2994; 2993 # RIGHT ARC GREATER-THAN BRACKET +2995; 2996 # DOUBLE LEFT ARC GREATER-THAN BRACKET +2996; 2995 # DOUBLE RIGHT ARC LESS-THAN BRACKET +2997; 2998 # LEFT BLACK TORTOISE SHELL BRACKET +2998; 2997 # RIGHT BLACK TORTOISE SHELL BRACKET +299B; 2221 # MEASURED ANGLE OPENING LEFT +29A0; 2222 # SPHERICAL ANGLE OPENING LEFT +29A3; 2220 # REVERSED ANGLE +29A4; 29A5 # ANGLE WITH UNDERBAR +29A5; 29A4 # REVERSED ANGLE WITH UNDERBAR +29A8; 29A9 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND RIGHT +29A9; 29A8 # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING UP AND LEFT +29AA; 29AB # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND RIGHT +29AB; 29AA # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING DOWN AND LEFT +29AC; 29AD # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND UP +29AD; 29AC # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND UP +29AE; 29AF # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING RIGHT AND DOWN +29AF; 29AE # MEASURED ANGLE WITH OPEN ARM ENDING IN ARROW POINTING LEFT AND DOWN +29B8; 2298 # CIRCLED REVERSE SOLIDUS +29C0; 29C1 # CIRCLED LESS-THAN +29C1; 29C0 # CIRCLED GREATER-THAN +29C4; 29C5 # SQUARED RISING DIAGONAL SLASH +29C5; 29C4 # SQUARED FALLING DIAGONAL SLASH +29CF; 29D0 # LEFT TRIANGLE BESIDE VERTICAL BAR +29D0; 29CF # VERTICAL BAR BESIDE RIGHT TRIANGLE +29D1; 29D2 # BOWTIE WITH LEFT HALF BLACK +29D2; 29D1 # BOWTIE WITH RIGHT HALF BLACK +29D4; 29D5 # TIMES WITH LEFT HALF BLACK +29D5; 29D4 # TIMES WITH RIGHT HALF BLACK +29D8; 29D9 # LEFT WIGGLY FENCE +29D9; 29D8 # RIGHT WIGGLY FENCE +29DA; 29DB # LEFT DOUBLE WIGGLY FENCE +29DB; 29DA # RIGHT DOUBLE WIGGLY FENCE +29E8; 29E9 # DOWN-POINTING TRIANGLE WITH LEFT HALF BLACK +29E9; 29E8 # DOWN-POINTING TRIANGLE WITH RIGHT HALF BLACK +29F5; 2215 # REVERSE SOLIDUS OPERATOR +29F8; 29F9 # BIG SOLIDUS +29F9; 29F8 # BIG REVERSE SOLIDUS +29FC; 29FD # LEFT-POINTING CURVED ANGLE BRACKET +29FD; 29FC # RIGHT-POINTING CURVED ANGLE BRACKET +2A2B; 2A2C # MINUS SIGN WITH FALLING DOTS +2A2C; 2A2B # MINUS SIGN WITH RISING DOTS +2A2D; 2A2E # PLUS SIGN IN LEFT HALF CIRCLE +2A2E; 2A2D # PLUS SIGN IN RIGHT HALF CIRCLE +2A34; 2A35 # MULTIPLICATION SIGN IN LEFT HALF CIRCLE +2A35; 2A34 # MULTIPLICATION SIGN IN RIGHT HALF CIRCLE +2A3C; 2A3D # INTERIOR PRODUCT +2A3D; 2A3C # RIGHTHAND INTERIOR PRODUCT +2A64; 2A65 # Z NOTATION DOMAIN ANTIRESTRICTION +2A65; 2A64 # Z NOTATION RANGE ANTIRESTRICTION +2A79; 2A7A # LESS-THAN WITH CIRCLE INSIDE +2A7A; 2A79 # GREATER-THAN WITH CIRCLE INSIDE +2A7B; 2A7C # [BEST FIT] LESS-THAN WITH QUESTION MARK ABOVE +2A7C; 2A7B # [BEST FIT] GREATER-THAN WITH QUESTION MARK ABOVE +2A7D; 2A7E # LESS-THAN OR SLANTED EQUAL TO +2A7E; 2A7D # GREATER-THAN OR SLANTED EQUAL TO +2A7F; 2A80 # LESS-THAN OR SLANTED EQUAL TO WITH DOT INSIDE +2A80; 2A7F # GREATER-THAN OR SLANTED EQUAL TO WITH DOT INSIDE +2A81; 2A82 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE +2A82; 2A81 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE +2A83; 2A84 # LESS-THAN OR SLANTED EQUAL TO WITH DOT ABOVE RIGHT +2A84; 2A83 # GREATER-THAN OR SLANTED EQUAL TO WITH DOT ABOVE LEFT +2A85; 2A86 # [BEST FIT] LESS-THAN OR APPROXIMATE +2A86; 2A85 # [BEST FIT] GREATER-THAN OR APPROXIMATE +2A87; 2A88 # [BEST FIT] LESS-THAN AND SINGLE-LINE NOT EQUAL TO +2A88; 2A87 # [BEST FIT] GREATER-THAN AND SINGLE-LINE NOT EQUAL TO +2A89; 2A8A # [BEST FIT] LESS-THAN AND NOT APPROXIMATE +2A8A; 2A89 # [BEST FIT] GREATER-THAN AND NOT APPROXIMATE +2A8B; 2A8C # LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN +2A8C; 2A8B # GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN +2A8D; 2A8E # [BEST FIT] LESS-THAN ABOVE SIMILAR OR EQUAL +2A8E; 2A8D # [BEST FIT] GREATER-THAN ABOVE SIMILAR OR EQUAL +2A8F; 2A90 # [BEST FIT] LESS-THAN ABOVE SIMILAR ABOVE GREATER-THAN +2A90; 2A8F # [BEST FIT] GREATER-THAN ABOVE SIMILAR ABOVE LESS-THAN +2A91; 2A92 # LESS-THAN ABOVE GREATER-THAN ABOVE DOUBLE-LINE EQUAL +2A92; 2A91 # GREATER-THAN ABOVE LESS-THAN ABOVE DOUBLE-LINE EQUAL +2A93; 2A94 # LESS-THAN ABOVE SLANTED EQUAL ABOVE GREATER-THAN ABOVE SLANTED EQUAL +2A94; 2A93 # GREATER-THAN ABOVE SLANTED EQUAL ABOVE LESS-THAN ABOVE SLANTED EQUAL +2A95; 2A96 # SLANTED EQUAL TO OR LESS-THAN +2A96; 2A95 # SLANTED EQUAL TO OR GREATER-THAN +2A97; 2A98 # SLANTED EQUAL TO OR LESS-THAN WITH DOT INSIDE +2A98; 2A97 # SLANTED EQUAL TO OR GREATER-THAN WITH DOT INSIDE +2A99; 2A9A # DOUBLE-LINE EQUAL TO OR LESS-THAN +2A9A; 2A99 # DOUBLE-LINE EQUAL TO OR GREATER-THAN +2A9B; 2A9C # DOUBLE-LINE SLANTED EQUAL TO OR LESS-THAN +2A9C; 2A9B # DOUBLE-LINE SLANTED EQUAL TO OR GREATER-THAN +2A9D; 2A9E # [BEST FIT] SIMILAR OR LESS-THAN +2A9E; 2A9D # [BEST FIT] SIMILAR OR GREATER-THAN +2A9F; 2AA0 # [BEST FIT] SIMILAR ABOVE LESS-THAN ABOVE EQUALS SIGN +2AA0; 2A9F # [BEST FIT] SIMILAR ABOVE GREATER-THAN ABOVE EQUALS SIGN +2AA1; 2AA2 # DOUBLE NESTED LESS-THAN +2AA2; 2AA1 # DOUBLE NESTED GREATER-THAN +2AA6; 2AA7 # LESS-THAN CLOSED BY CURVE +2AA7; 2AA6 # GREATER-THAN CLOSED BY CURVE +2AA8; 2AA9 # LESS-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL +2AA9; 2AA8 # GREATER-THAN CLOSED BY CURVE ABOVE SLANTED EQUAL +2AAA; 2AAB # SMALLER THAN +2AAB; 2AAA # LARGER THAN +2AAC; 2AAD # SMALLER THAN OR EQUAL TO +2AAD; 2AAC # LARGER THAN OR EQUAL TO +2AAF; 2AB0 # PRECEDES ABOVE SINGLE-LINE EQUALS SIGN +2AB0; 2AAF # SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN +2AB1; 2AB2 # [BEST FIT] PRECEDES ABOVE SINGLE-LINE NOT EQUAL TO +2AB2; 2AB1 # [BEST FIT] SUCCEEDS ABOVE SINGLE-LINE NOT EQUAL TO +2AB3; 2AB4 # PRECEDES ABOVE EQUALS SIGN +2AB4; 2AB3 # SUCCEEDS ABOVE EQUALS SIGN +2AB5; 2AB6 # [BEST FIT] PRECEDES ABOVE NOT EQUAL TO +2AB6; 2AB5 # [BEST FIT] SUCCEEDS ABOVE NOT EQUAL TO +2AB7; 2AB8 # [BEST FIT] PRECEDES ABOVE ALMOST EQUAL TO +2AB8; 2AB7 # [BEST FIT] SUCCEEDS ABOVE ALMOST EQUAL TO +2AB9; 2ABA # [BEST FIT] PRECEDES ABOVE NOT ALMOST EQUAL TO +2ABA; 2AB9 # [BEST FIT] SUCCEEDS ABOVE NOT ALMOST EQUAL TO +2ABB; 2ABC # DOUBLE PRECEDES +2ABC; 2ABB # DOUBLE SUCCEEDS +2ABD; 2ABE # SUBSET WITH DOT +2ABE; 2ABD # SUPERSET WITH DOT +2ABF; 2AC0 # SUBSET WITH PLUS SIGN BELOW +2AC0; 2ABF # SUPERSET WITH PLUS SIGN BELOW +2AC1; 2AC2 # SUBSET WITH MULTIPLICATION SIGN BELOW +2AC2; 2AC1 # SUPERSET WITH MULTIPLICATION SIGN BELOW +2AC3; 2AC4 # SUBSET OF OR EQUAL TO WITH DOT ABOVE +2AC4; 2AC3 # SUPERSET OF OR EQUAL TO WITH DOT ABOVE +2AC5; 2AC6 # SUBSET OF ABOVE EQUALS SIGN +2AC6; 2AC5 # SUPERSET OF ABOVE EQUALS SIGN +2AC7; 2AC8 # [BEST FIT] SUBSET OF ABOVE TILDE OPERATOR +2AC8; 2AC7 # [BEST FIT] SUPERSET OF ABOVE TILDE OPERATOR +2AC9; 2ACA # [BEST FIT] SUBSET OF ABOVE ALMOST EQUAL TO +2ACA; 2AC9 # [BEST FIT] SUPERSET OF ABOVE ALMOST EQUAL TO +2ACB; 2ACC # [BEST FIT] SUBSET OF ABOVE NOT EQUAL TO +2ACC; 2ACB # [BEST FIT] SUPERSET OF ABOVE NOT EQUAL TO +2ACD; 2ACE # SQUARE LEFT OPEN BOX OPERATOR +2ACE; 2ACD # SQUARE RIGHT OPEN BOX OPERATOR +2ACF; 2AD0 # CLOSED SUBSET +2AD0; 2ACF # CLOSED SUPERSET +2AD1; 2AD2 # CLOSED SUBSET OR EQUAL TO +2AD2; 2AD1 # CLOSED SUPERSET OR EQUAL TO +2AD3; 2AD4 # SUBSET ABOVE SUPERSET +2AD4; 2AD3 # SUPERSET ABOVE SUBSET +2AD5; 2AD6 # SUBSET ABOVE SUBSET +2AD6; 2AD5 # SUPERSET ABOVE SUPERSET +2ADE; 22A6 # SHORT LEFT TACK +2AE3; 22A9 # DOUBLE VERTICAL BAR LEFT TURNSTILE +2AE4; 22A8 # VERTICAL BAR DOUBLE LEFT TURNSTILE +2AE5; 22AB # DOUBLE VERTICAL BAR DOUBLE LEFT TURNSTILE +2AEC; 2AED # DOUBLE STROKE NOT SIGN +2AED; 2AEC # REVERSED DOUBLE STROKE NOT SIGN +2AEE; 2224 # DOES NOT DIVIDE WITH REVERSED NEGATION SLASH +2AF7; 2AF8 # TRIPLE NESTED LESS-THAN +2AF8; 2AF7 # TRIPLE NESTED GREATER-THAN +2AF9; 2AFA # DOUBLE-LINE SLANTED LESS-THAN OR EQUAL TO +2AFA; 2AF9 # DOUBLE-LINE SLANTED GREATER-THAN OR EQUAL TO +2BFE; 221F # REVERSED RIGHT ANGLE +2E02; 2E03 # LEFT SUBSTITUTION BRACKET +2E03; 2E02 # RIGHT SUBSTITUTION BRACKET +2E04; 2E05 # LEFT DOTTED SUBSTITUTION BRACKET +2E05; 2E04 # RIGHT DOTTED SUBSTITUTION BRACKET +2E09; 2E0A # LEFT TRANSPOSITION BRACKET +2E0A; 2E09 # RIGHT TRANSPOSITION BRACKET +2E0C; 2E0D # LEFT RAISED OMISSION BRACKET +2E0D; 2E0C # RIGHT RAISED OMISSION BRACKET +2E1C; 2E1D # LEFT LOW PARAPHRASE BRACKET +2E1D; 2E1C # RIGHT LOW PARAPHRASE BRACKET +2E20; 2E21 # LEFT VERTICAL BAR WITH QUILL +2E21; 2E20 # RIGHT VERTICAL BAR WITH QUILL +2E22; 2E23 # TOP LEFT HALF BRACKET +2E23; 2E22 # TOP RIGHT HALF BRACKET +2E24; 2E25 # BOTTOM LEFT HALF BRACKET +2E25; 2E24 # BOTTOM RIGHT HALF BRACKET +2E26; 2E27 # LEFT SIDEWAYS U BRACKET +2E27; 2E26 # RIGHT SIDEWAYS U BRACKET +2E28; 2E29 # LEFT DOUBLE PARENTHESIS +2E29; 2E28 # RIGHT DOUBLE PARENTHESIS +2E55; 2E56 # LEFT SQUARE BRACKET WITH STROKE +2E56; 2E55 # RIGHT SQUARE BRACKET WITH STROKE +2E57; 2E58 # LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58; 2E57 # RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59; 2E5A # TOP HALF LEFT PARENTHESIS +2E5A; 2E59 # TOP HALF RIGHT PARENTHESIS +2E5B; 2E5C # BOTTOM HALF LEFT PARENTHESIS +2E5C; 2E5B # BOTTOM HALF RIGHT PARENTHESIS +3008; 3009 # LEFT ANGLE BRACKET +3009; 3008 # RIGHT ANGLE BRACKET +300A; 300B # LEFT DOUBLE ANGLE BRACKET +300B; 300A # RIGHT DOUBLE ANGLE BRACKET +300C; 300D # [BEST FIT] LEFT CORNER BRACKET +300D; 300C # [BEST FIT] RIGHT CORNER BRACKET +300E; 300F # [BEST FIT] LEFT WHITE CORNER BRACKET +300F; 300E # [BEST FIT] RIGHT WHITE CORNER BRACKET +3010; 3011 # LEFT BLACK LENTICULAR BRACKET +3011; 3010 # RIGHT BLACK LENTICULAR BRACKET +3014; 3015 # LEFT TORTOISE SHELL BRACKET +3015; 3014 # RIGHT TORTOISE SHELL BRACKET +3016; 3017 # LEFT WHITE LENTICULAR BRACKET +3017; 3016 # RIGHT WHITE LENTICULAR BRACKET +3018; 3019 # LEFT WHITE TORTOISE SHELL BRACKET +3019; 3018 # RIGHT WHITE TORTOISE SHELL BRACKET +301A; 301B # LEFT WHITE SQUARE BRACKET +301B; 301A # RIGHT WHITE SQUARE BRACKET +FE59; FE5A # SMALL LEFT PARENTHESIS +FE5A; FE59 # SMALL RIGHT PARENTHESIS +FE5B; FE5C # SMALL LEFT CURLY BRACKET +FE5C; FE5B # SMALL RIGHT CURLY BRACKET +FE5D; FE5E # SMALL LEFT TORTOISE SHELL BRACKET +FE5E; FE5D # SMALL RIGHT TORTOISE SHELL BRACKET +FE64; FE65 # SMALL LESS-THAN SIGN +FE65; FE64 # SMALL GREATER-THAN SIGN +FF08; FF09 # FULLWIDTH LEFT PARENTHESIS +FF09; FF08 # FULLWIDTH RIGHT PARENTHESIS +FF1C; FF1E # FULLWIDTH LESS-THAN SIGN +FF1E; FF1C # FULLWIDTH GREATER-THAN SIGN +FF3B; FF3D # FULLWIDTH LEFT SQUARE BRACKET +FF3D; FF3B # FULLWIDTH RIGHT SQUARE BRACKET +FF5B; FF5D # FULLWIDTH LEFT CURLY BRACKET +FF5D; FF5B # FULLWIDTH RIGHT CURLY BRACKET +FF5F; FF60 # FULLWIDTH LEFT WHITE PARENTHESIS +FF60; FF5F # FULLWIDTH RIGHT WHITE PARENTHESIS +FF62; FF63 # [BEST FIT] HALFWIDTH LEFT CORNER BRACKET +FF63; FF62 # [BEST FIT] HALFWIDTH RIGHT CORNER BRACKET + +# The following characters have no appropriate mirroring character. +# For these characters it is up to the rendering system +# to provide mirrored glyphs. + +# 2140; DOUBLE-STRUCK N-ARY SUMMATION +# 2201; COMPLEMENT +# 2202; PARTIAL DIFFERENTIAL +# 2203; THERE EXISTS +# 2204; THERE DOES NOT EXIST +# 2211; N-ARY SUMMATION +# 2216; SET MINUS +# 221A; SQUARE ROOT +# 221B; CUBE ROOT +# 221C; FOURTH ROOT +# 221D; PROPORTIONAL TO +# 2226; NOT PARALLEL TO +# 222B; INTEGRAL +# 222C; DOUBLE INTEGRAL +# 222D; TRIPLE INTEGRAL +# 222E; CONTOUR INTEGRAL +# 222F; SURFACE INTEGRAL +# 2230; VOLUME INTEGRAL +# 2231; CLOCKWISE INTEGRAL +# 2232; CLOCKWISE CONTOUR INTEGRAL +# 2233; ANTICLOCKWISE CONTOUR INTEGRAL +# 2239; EXCESS +# 223B; HOMOTHETIC +# 223E; INVERTED LAZY S +# 223F; SINE WAVE +# 2240; WREATH PRODUCT +# 2241; NOT TILDE +# 2242; MINUS TILDE +# 2244; NOT ASYMPTOTICALLY EQUAL TO +# 2246; APPROXIMATELY BUT NOT ACTUALLY EQUAL TO +# 2247; NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO +# 2248; ALMOST EQUAL TO +# 2249; NOT ALMOST EQUAL TO +# 224A; ALMOST EQUAL OR EQUAL TO +# 224B; TRIPLE TILDE +# 225F; QUESTIONED EQUAL TO +# 2260; NOT EQUAL TO +# 2262; NOT IDENTICAL TO +# 228C; MULTISET +# 22A7; MODELS +# 22AA; TRIPLE VERTICAL BAR RIGHT TURNSTILE +# 22AC; DOES NOT PROVE +# 22AD; NOT TRUE +# 22AE; DOES NOT FORCE +# 22AF; NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE +# 22BE; RIGHT ANGLE WITH ARC +# 22BF; RIGHT TRIANGLE +# 22F5; ELEMENT OF WITH DOT ABOVE +# 22F8; ELEMENT OF WITH UNDERBAR +# 22F9; ELEMENT OF WITH TWO HORIZONTAL STROKES +# 22FF; Z NOTATION BAG MEMBERSHIP +# 2320; TOP HALF INTEGRAL +# 2321; BOTTOM HALF INTEGRAL +# 27C0; THREE DIMENSIONAL ANGLE +# 27CC; LONG DIVISION +# 27D3; LOWER RIGHT CORNER WITH DOT +# 27D4; UPPER LEFT CORNER WITH DOT +# 299C; RIGHT ANGLE VARIANT WITH SQUARE +# 299D; MEASURED RIGHT ANGLE WITH DOT +# 299E; ANGLE WITH S INSIDE +# 299F; ACUTE ANGLE +# 29A2; TURNED ANGLE +# 29A6; OBLIQUE ANGLE OPENING UP +# 29A7; OBLIQUE ANGLE OPENING DOWN +# 29C2; CIRCLE WITH SMALL CIRCLE TO THE RIGHT +# 29C3; CIRCLE WITH TWO HORIZONTAL STROKES TO THE RIGHT +# 29C9; TWO JOINED SQUARES +# 29CE; RIGHT TRIANGLE ABOVE LEFT TRIANGLE +# 29DC; INCOMPLETE INFINITY +# 29E1; INCREASES AS +# 29E3; EQUALS SIGN AND SLANTED PARALLEL +# 29E4; EQUALS SIGN AND SLANTED PARALLEL WITH TILDE ABOVE +# 29E5; IDENTICAL TO AND SLANTED PARALLEL +# 29F4; RULE-DELAYED +# 29F6; SOLIDUS WITH OVERBAR +# 29F7; REVERSE SOLIDUS WITH HORIZONTAL STROKE +# 2A0A; MODULO TWO SUM +# 2A0B; SUMMATION WITH INTEGRAL +# 2A0C; QUADRUPLE INTEGRAL OPERATOR +# 2A0D; FINITE PART INTEGRAL +# 2A0E; INTEGRAL WITH DOUBLE STROKE +# 2A0F; INTEGRAL AVERAGE WITH SLASH +# 2A10; CIRCULATION FUNCTION +# 2A11; ANTICLOCKWISE INTEGRATION +# 2A12; LINE INTEGRATION WITH RECTANGULAR PATH AROUND POLE +# 2A13; LINE INTEGRATION WITH SEMICIRCULAR PATH AROUND POLE +# 2A14; LINE INTEGRATION NOT INCLUDING THE POLE +# 2A15; INTEGRAL AROUND A POINT OPERATOR +# 2A16; QUATERNION INTEGRAL OPERATOR +# 2A17; INTEGRAL WITH LEFTWARDS ARROW WITH HOOK +# 2A18; INTEGRAL WITH TIMES SIGN +# 2A19; INTEGRAL WITH INTERSECTION +# 2A1A; INTEGRAL WITH UNION +# 2A1B; INTEGRAL WITH OVERBAR +# 2A1C; INTEGRAL WITH UNDERBAR +# 2A1E; LARGE LEFT TRIANGLE OPERATOR +# 2A1F; Z NOTATION SCHEMA COMPOSITION +# 2A20; Z NOTATION SCHEMA PIPING +# 2A21; Z NOTATION SCHEMA PROJECTION +# 2A24; PLUS SIGN WITH TILDE ABOVE +# 2A26; PLUS SIGN WITH TILDE BELOW +# 2A29; MINUS SIGN WITH COMMA ABOVE +# 2A3E; Z NOTATION RELATIONAL COMPOSITION +# 2A57; SLOPING LARGE OR +# 2A58; SLOPING LARGE AND +# 2A6A; TILDE OPERATOR WITH DOT ABOVE +# 2A6B; TILDE OPERATOR WITH RISING DOTS +# 2A6C; SIMILAR MINUS SIMILAR +# 2A6D; CONGRUENT WITH DOT ABOVE +# 2A6F; ALMOST EQUAL TO WITH CIRCUMFLEX ACCENT +# 2A70; APPROXIMATELY EQUAL OR EQUAL TO +# 2A73; EQUALS SIGN ABOVE TILDE OPERATOR +# 2A74; DOUBLE COLON EQUAL +# 2AA3; DOUBLE NESTED LESS-THAN WITH UNDERBAR +# 2ADC; FORKING +# 2AE2; VERTICAL BAR TRIPLE RIGHT TURNSTILE +# 2AE6; LONG DASH FROM LEFT MEMBER OF DOUBLE VERTICAL +# 2AF3; PARALLEL WITH TILDE OPERATOR +# 2AFB; TRIPLE SOLIDUS BINARY RELATION +# 2AFD; DOUBLE SOLIDUS OPERATOR +# 1D6DB; MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +# 1D715; MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +# 1D74F; MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +# 1D789; MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +# 1D7C3; MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL + +# EOF diff --git a/pcre2/maint/Unicode.tables/DerivedBidiClass.txt b/pcre2/maint/Unicode.tables/DerivedBidiClass.txt new file mode 100644 index 0000000000000000000000000000000000000000..4012dc25da1ae38de025aa8baf5a864f555b8fa5 --- /dev/null +++ b/pcre2/maint/Unicode.tables/DerivedBidiClass.txt @@ -0,0 +1,2524 @@ +# DerivedBidiClass-14.0.0.txt +# Date: 2021-07-10, 00:35:02 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Bidi Class (listing UnicodeData.txt, field 4: see UAX #44: https://www.unicode.org/reports/tr44/) +# Unlike other properties, unassigned code points in blocks +# reserved for right-to-left scripts are given either types R or AL. +# +# The unassigned code points that default to AL are in the ranges: +# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF +# \U00010D00-\U00010D3F \U00010F30-\U00010F6F +# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF] +# +# This includes code points in the Arabic, Syriac, and Thaana blocks, among others. +# +# The unassigned code points that default to R are in the ranges: +# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F +# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF +# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF] +# +# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others. +# +# The unassigned code points that default to ET are in the range: +# [\u20A0-\u20CF] +# +# This consists of code points in the Currency Symbols block. +# +# The unassigned code points that default to BN have one of the following properties: +# Default_Ignorable_Code_Point +# Noncharacter_Code_Point +# +# For all other cases: + +# All code points not explicitly listed for Bidi_Class +# have the value Left_To_Right (L). + +# @missing: 0000..10FFFF; Left_To_Right + +# ================================================ + +# Bidi_Class=Left_To_Right + +0041..005A ; L # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; L # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; L # Lo FEMININE ORDINAL INDICATOR +00B5 ; L # L& MICRO SIGN +00BA ; L # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; L # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; L # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; L # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; L # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; L # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; L # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; L # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; L # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; L # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; L # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02BB..02C1 ; L # Lm [7] MODIFIER LETTER TURNED COMMA..MODIFIER LETTER REVERSED GLOTTAL STOP +02D0..02D1 ; L # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; L # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EE ; L # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; L # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; L # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; L # Lm GREEK YPOGEGRAMMENI +037B..037D ; L # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; L # L& GREEK CAPITAL LETTER YOT +0386 ; L # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; L # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; L # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; L # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; L # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; L # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0482 ; L # So CYRILLIC THOUSANDS SIGN +048A..052F ; L # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; L # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; L # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; L # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; L # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; L # Po ARMENIAN FULL STOP +0903 ; L # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; L # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093B ; L # Mc DEVANAGARI VOWEL SIGN OOE +093D ; L # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; L # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; L # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; L # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; L # Lo DEVANAGARI OM +0958..0961 ; L # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0964..0965 ; L # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; L # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; L # Po DEVANAGARI ABBREVIATION SIGN +0971 ; L # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; L # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0982..0983 ; L # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; L # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; L # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; L # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; L # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; L # Lo BENGALI LETTER LA +09B6..09B9 ; L # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; L # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; L # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C7..09C8 ; L # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; L # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; L # Lo BENGALI LETTER KHANDA TA +09D7 ; L # Mc BENGALI AU LENGTH MARK +09DC..09DD ; L # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; L # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E6..09EF ; L # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; L # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F4..09F9 ; L # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; L # So BENGALI ISSHAR +09FC ; L # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; L # Po BENGALI ABBREVIATION SIGN +0A03 ; L # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; L # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; L # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; L # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; L # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; L # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; L # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; L # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; L # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; L # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; L # Lo GURMUKHI LETTER FA +0A66..0A6F ; L # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A72..0A74 ; L # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A76 ; L # Po GURMUKHI ABBREVIATION SIGN +0A83 ; L # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; L # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; L # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; L # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; L # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; L # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; L # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; L # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; L # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; L # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; L # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; L # Lo GUJARATI OM +0AE0..0AE1 ; L # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE6..0AEF ; L # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; L # Po GUJARATI ABBREVIATION SIGN +0AF9 ; L # Lo GUJARATI LETTER ZHA +0B02..0B03 ; L # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; L # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; L # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; L # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; L # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; L # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; L # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; L # Lo ORIYA SIGN AVAGRAHA +0B3E ; L # Mc ORIYA VOWEL SIGN AA +0B40 ; L # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; L # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; L # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B57 ; L # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; L # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; L # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B66..0B6F ; L # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; L # So ORIYA ISSHAR +0B71 ; L # Lo ORIYA LETTER WA +0B72..0B77 ; L # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B83 ; L # Lo TAMIL SIGN VISARGA +0B85..0B8A ; L # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; L # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; L # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; L # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; L # Lo TAMIL LETTER JA +0B9E..0B9F ; L # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; L # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; L # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; L # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; L # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC1..0BC2 ; L # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; L # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; L # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; L # Lo TAMIL OM +0BD7 ; L # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; L # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; L # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0C01..0C03 ; L # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; L # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; L # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; L # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; L # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; L # Lo TELUGU SIGN AVAGRAHA +0C41..0C44 ; L # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C58..0C5A ; L # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; L # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; L # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C66..0C6F ; L # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; L # Po TELUGU SIGN SIDDHAM +0C7F ; L # So TELUGU SIGN TUUMU +0C80 ; L # Lo KANNADA SIGN SPACING CANDRABINDU +0C82..0C83 ; L # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; L # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; L # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; L # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; L # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; L # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; L # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; L # Lo KANNADA SIGN AVAGRAHA +0CBE ; L # Mc KANNADA VOWEL SIGN AA +0CBF ; L # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; L # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; L # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; L # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; L # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CD5..0CD6 ; L # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; L # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; L # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE6..0CEF ; L # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; L # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D02..0D03 ; L # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; L # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; L # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; L # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; L # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; L # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D46..0D48 ; L # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; L # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; L # Lo MALAYALAM LETTER DOT REPH +0D4F ; L # So MALAYALAM SIGN PARA +0D54..0D56 ; L # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; L # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; L # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; L # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D66..0D6F ; L # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; L # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; L # So MALAYALAM DATE MARK +0D7A..0D7F ; L # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D82..0D83 ; L # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; L # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; L # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; L # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; L # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; L # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCF..0DD1 ; L # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDF ; L # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; L # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; L # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; L # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; L # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; L # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; L # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; L # Lm THAI CHARACTER MAIYAMOK +0E4F ; L # Po THAI CHARACTER FONGMAN +0E50..0E59 ; L # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; L # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; L # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; L # Lo LAO LETTER KHO TAM +0E86..0E8A ; L # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; L # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; L # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; L # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; L # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; L # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; L # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; L # Lm LAO KO LA +0ED0..0ED9 ; L # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; L # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; L # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; L # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; L # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; L # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; L # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; L # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; L # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; L # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; L # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; L # So TIBETAN MARK BSDUS RTAGS +0F36 ; L # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; L # So TIBETAN MARK CHE MGO +0F3E..0F3F ; L # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; L # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; L # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F7F ; L # Mc TIBETAN SIGN RNAM BCAD +0F85 ; L # Po TIBETAN MARK PALUTA +0F88..0F8C ; L # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0FBE..0FC5 ; L # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; L # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; L # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; L # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; L # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; L # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; L # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; L # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; L # Mc MYANMAR VOWEL SIGN E +1038 ; L # Mc MYANMAR SIGN VISARGA +103B..103C ; L # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103F ; L # Lo MYANMAR LETTER GREAT SA +1040..1049 ; L # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; L # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; L # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; L # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +105A..105D ; L # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; L # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; L # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; L # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; L # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; L # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; L # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1083..1084 ; L # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; L # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108E ; L # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; L # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; L # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; L # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109E..109F ; L # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; L # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; L # L& GEORGIAN CAPITAL LETTER YN +10CD ; L # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; L # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; L # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; L # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; L # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; L # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; L # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; L # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; L # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; L # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; L # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; L # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; L # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; L # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; L # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; L # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; L # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; L # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; L # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; L # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; L # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1360..1368 ; L # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; L # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; L # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; L # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; L # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; L # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; L # So CANADIAN SYLLABICS CHI SIGN +166E ; L # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; L # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; L # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; L # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; L # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; L # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; L # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; L # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1715 ; L # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; L # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1734 ; L # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; L # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; L # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; L # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; L # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; L # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; L # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; L # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; L # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D4..17D6 ; L # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; L # Lm KHMER SIGN LEK TOO +17D8..17DA ; L # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DC ; L # Lo KHMER SIGN AVAKRAHASANYA +17E0..17E9 ; L # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +1810..1819 ; L # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; L # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; L # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; L # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; L # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; L # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; L # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; L # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; L # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1923..1926 ; L # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; L # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; L # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; L # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1946..194F ; L # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; L # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; L # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; L # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; L # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; L # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; L # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; L # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1A ; L # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1E..1A1F ; L # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; L # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; L # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; L # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; L # Mc TAI THAM VOWEL SIGN A +1A63..1A64 ; L # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; L # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A80..1A89 ; L # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; L # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; L # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; L # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; L # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B04 ; L # Mc BALINESE SIGN BISAH +1B05..1B33 ; L # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B35 ; L # Mc BALINESE VOWEL SIGN TEDUNG +1B3B ; L # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; L # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; L # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; L # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; L # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; L # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; L # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; L # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; L # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B82 ; L # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; L # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; L # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; L # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; L # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; L # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; L # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; L # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; L # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; L # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; L # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; L # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; L # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; L # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; L # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; L # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C3B..1C3F ; L # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; L # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; L # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; L # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; L # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; L # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; L # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88 ; L # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; L # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; L # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; L # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; L # Po VEDIC SIGN NIHSHVASA +1CE1 ; L # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; L # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; L # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; L # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; L # Mc VEDIC SIGN ATIKRAMA +1CFA ; L # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; L # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; L # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; L # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; L # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; L # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; L # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; L # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; L # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; L # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; L # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; L # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; L # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; L # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; L # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; L # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; L # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; L # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; L # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; L # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; L # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; L # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; L # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; L # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +200E ; L # Cf LEFT-TO-RIGHT MARK +2071 ; L # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; L # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; L # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; L # L& DOUBLE-STRUCK CAPITAL C +2107 ; L # L& EULER CONSTANT +210A..2113 ; L # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; L # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; L # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; L # L& DOUBLE-STRUCK CAPITAL Z +2126 ; L # L& OHM SIGN +2128 ; L # L& BLACK-LETTER CAPITAL Z +212A..212D ; L # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; L # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; L # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; L # L& INFORMATION SOURCE +213C..213F ; L # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; L # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; L # L& TURNED SMALL F +214F ; L # So SYMBOL FOR SAMARITAN SOURCE +2160..2182 ; L # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; L # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; L # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2336..237A ; L # So [69] APL FUNCTIONAL SYMBOL I-BEAM..APL FUNCTIONAL SYMBOL ALPHA +2395 ; L # So APL FUNCTIONAL SYMBOL QUAD +249C..24E9 ; L # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +26AC ; L # So MEDIUM SMALL WHITE CIRCLE +2800..28FF ; L # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2C00..2C7B ; L # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; L # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; L # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; L # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; L # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; L # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; L # L& GEORGIAN SMALL LETTER YN +2D2D ; L # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; L # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; L # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; L # Po TIFINAGH SEPARATOR MARK +2D80..2D96 ; L # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; L # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; L # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; L # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; L # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; L # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; L # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; L # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; L # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; L # Lm IDEOGRAPHIC ITERATION MARK +3006 ; L # Lo IDEOGRAPHIC CLOSING MARK +3007 ; L # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; L # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302E..302F ; L # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; L # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; L # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; L # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; L # Lo MASU MARK +3041..3096 ; L # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; L # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; L # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; L # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; L # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; L # Lo KATAKANA DIGRAPH KOTO +3105..312F ; L # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; L # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; L # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; L # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; L # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; L # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; L # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321C ; L # So [29] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED HANGUL CIEUC U +3220..3229 ; L # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; L # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; L # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3260..327B ; L # So [28] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL HIEUH A +327F ; L # So KOREAN STANDARD SYMBOL +3280..3289 ; L # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; L # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..32CB ; L # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +32D0..3376 ; L # So [167] CIRCLED KATAKANA A..SQUARE PC +337B..33DD ; L # So [99] SQUARE ERA NAME HEISEI..SQUARE WB +33E0..33FE ; L # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +3400..4DBF ; L # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; L # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; L # Lm YI SYLLABLE WU +A016..A48C ; L # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; L # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; L # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; L # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; L # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; L # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; L # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; L # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; L # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; L # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; L # Lo CYRILLIC LETTER MULTIOCULAR O +A680..A69B ; L # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; L # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; L # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; L # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F2..A6F7 ; L # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A722..A76F ; L # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; L # Lm MODIFIER LETTER US +A771..A787 ; L # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A789..A78A ; L # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; L # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; L # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; L # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; L # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; L # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; L # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; L # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; L # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; L # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; L # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; L # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; L # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; L # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; L # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; L # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; L # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; L # Mc SYLOTI NAGRI VOWEL SIGN OO +A830..A835 ; L # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; L # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A840..A873 ; L # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; L # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; L # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; L # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8CE..A8CF ; L # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; L # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8F2..A8F7 ; L # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; L # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; L # Lo DEVANAGARI HEADSTROKE +A8FC ; L # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; L # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A900..A909 ; L # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; L # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A92E..A92F ; L # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; L # Lo [23] REJANG LETTER KA..REJANG LETTER A +A952..A953 ; L # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; L # Po REJANG SECTION MARK +A960..A97C ; L # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A983 ; L # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; L # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; L # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; L # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; L # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; L # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; L # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; L # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; L # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; L # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; L # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; L # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; L # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; L # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; L # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA2F..AA30 ; L # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; L # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA40..AA42 ; L # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; L # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4D ; L # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; L # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; L # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; L # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; L # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; L # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; L # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; L # Lo MYANMAR LETTER AITON RA +AA7B ; L # Mc MYANMAR SIGN PAO KAREN TONE +AA7D ; L # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; L # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; L # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; L # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; L # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; L # Lo TAI VIET TONE MAI NUENG +AAC2 ; L # Lo TAI VIET TONE MAI SONG +AADB..AADC ; L # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; L # Lm TAI VIET SYMBOL SAM +AADE..AADF ; L # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; L # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; L # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; L # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; L # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; L # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; L # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; L # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; L # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; L # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; L # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; L # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; L # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; L # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; L # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; L # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; L # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; L # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; L # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; L # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; L # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; L # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; L # Po MEETEI MAYEK CHEIKHEI +ABEC ; L # Mc MEETEI MAYEK LUM IYEK +ABF0..ABF9 ; L # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; L # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; L # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; L # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +E000..F8FF ; L # Co [6400] .. +F900..FA6D ; L # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; L # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; L # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; L # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; L # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; L # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; L # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; L # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; L # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; L # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; L # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; L # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; L # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; L # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; L # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; L # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; L # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; L # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; L # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; L # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; L # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100 ; L # Po AEGEAN WORD SEPARATOR LINE +10102 ; L # Po AEGEAN CHECK MARK +10107..10133 ; L # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; L # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +1018D..1018E ; L # So [2] GREEK INDICTION SIGN..NOMISMA SIGN +101D0..101FC ; L # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10280..1029C ; L # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; L # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; L # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; L # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..10340 ; L # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; L # Nl GOTHIC LETTER NINETY +10342..10349 ; L # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; L # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; L # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; L # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; L # Po UGARITIC WORD DIVIDER +103A0..103C3 ; L # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; L # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; L # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; L # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; L # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; L # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; L # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; L # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; L # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; L # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; L # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; L # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; L # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; L # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; L # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; L # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; L # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; L # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; L # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; L # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; L # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; L # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; L # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; L # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; L # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; L # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +11000 ; L # Mc BRAHMI SIGN CANDRABINDU +11002 ; L # Mc BRAHMI SIGN VISARGA +11003..11037 ; L # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11047..1104D ; L # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11066..1106F ; L # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11071..11072 ; L # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; L # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; L # Mc KAITHI SIGN VISARGA +11083..110AF ; L # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; L # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; L # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110BB..110BC ; L # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; L # Cf KAITHI NUMBER SIGN +110BE..110C1 ; L # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110CD ; L # Cf KAITHI NUMBER SIGN ABOVE +110D0..110E8 ; L # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; L # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; L # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; L # Mc CHAKMA VOWEL SIGN E +11136..1113F ; L # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; L # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; L # Lo CHAKMA LETTER LHAA +11145..11146 ; L # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; L # Lo CHAKMA LETTER VAA +11150..11172 ; L # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11174..11175 ; L # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; L # Lo MAHAJANI LIGATURE SHRI +11182 ; L # Mc SHARADA SIGN VISARGA +11183..111B2 ; L # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; L # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; L # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; L # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; L # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; L # Po SHARADA SUTRA MARK +111CE ; L # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111D0..111D9 ; L # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; L # Lo SHARADA EKAM +111DB ; L # Po SHARADA SIGN SIDDHAM +111DC ; L # Lo SHARADA HEADSTROKE +111DD..111DF ; L # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; L # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; L # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; L # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; L # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; L # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; L # Mc KHOJKI SIGN VIRAMA +11238..1123D ; L # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +11280..11286 ; L # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; L # Lo MULTANI LETTER GHA +1128A..1128D ; L # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; L # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; L # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; L # Po MULTANI SECTION MARK +112B0..112DE ; L # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112E0..112E2 ; L # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112F0..112F9 ; L # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11302..11303 ; L # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; L # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; L # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; L # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; L # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; L # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; L # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; L # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; L # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11341..11344 ; L # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; L # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; L # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; L # Lo GRANTHA OM +11357 ; L # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; L # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; L # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11400..11434 ; L # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; L # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; L # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; L # Mc NEWA SIGN VISARGA +11447..1144A ; L # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; L # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; L # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; L # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; L # Po NEWA INSERTION SIGN +1145F..11461 ; L # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; L # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; L # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B9 ; L # Mc TIRHUTA VOWEL SIGN E +114BB..114BE ; L # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114C1 ; L # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; L # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; L # Po TIRHUTA ABBREVIATION SIGN +114C7 ; L # Lo TIRHUTA OM +114D0..114D9 ; L # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; L # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; L # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B8..115BB ; L # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; L # Mc SIDDHAM SIGN VISARGA +115C1..115D7 ; L # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; L # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; L # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; L # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; L # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; L # Mc MODI SIGN VISARGA +11641..11643 ; L # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; L # Lo MODI SIGN HUVA +11650..11659 ; L # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; L # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; L # Mc TAKRI SIGN VISARGA +116AE..116AF ; L # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; L # Mc TAKRI SIGN VIRAMA +116B8 ; L # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; L # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; L # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; L # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11720..11721 ; L # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; L # Mc AHOM VOWEL SIGN E +11730..11739 ; L # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; L # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; L # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; L # So AHOM SYMBOL VI +11740..11746 ; L # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; L # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; L # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; L # Mc DOGRA SIGN VISARGA +1183B ; L # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; L # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; L # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; L # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF..11906 ; L # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; L # Lo DIVES AKURU LETTER O +1190C..11913 ; L # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; L # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; L # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; L # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; L # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; L # Mc DIVES AKURU SIGN HALANTA +1193F ; L # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; L # Mc DIVES AKURU MEDIAL YA +11941 ; L # Lo DIVES AKURU INITIAL RA +11942 ; L # Mc DIVES AKURU MEDIAL RA +11944..11946 ; L # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; L # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; L # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; L # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; L # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; L # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; L # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; L # Po NANDINAGARI SIGN SIDDHAM +119E3 ; L # Lo NANDINAGARI HEADSTROKE +119E4 ; L # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; L # Lo ZANABAZAR SQUARE LETTER A +11A07..11A08 ; L # Mn [2] ZANABAZAR SQUARE VOWEL SIGN AI..ZANABAZAR SQUARE VOWEL SIGN AU +11A0B..11A32 ; L # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A39 ; L # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; L # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3F..11A46 ; L # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A50 ; L # Lo SOYOMBO LETTER A +11A57..11A58 ; L # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A5C..11A89 ; L # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A97 ; L # Mc SOYOMBO SIGN VISARGA +11A9A..11A9C ; L # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; L # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; L # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11AF8 ; L # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; L # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; L # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; L # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; L # Mc BHAIKSUKI SIGN VISARGA +11C3F ; L # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; L # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; L # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; L # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; L # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; L # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; L # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11CA9 ; L # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; L # Mc MARCHEN VOWEL SIGN I +11CB4 ; L # Mc MARCHEN VOWEL SIGN O +11D00..11D06 ; L # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; L # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; L # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; L # Lo MASARAM GONDI REPHA +11D50..11D59 ; L # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; L # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; L # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; L # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; L # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; L # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; L # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; L # Lo GUNJALA GONDI OM +11DA0..11DA9 ; L # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; L # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF5..11EF6 ; L # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; L # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11FB0 ; L # Lo LISU LETTER YHA +11FC0..11FD4 ; L # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FFF ; L # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; L # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; L # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; L # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; L # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; L # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; L # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342E ; L # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13430..13438 ; L # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +14400..14646 ; L # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; L # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; L # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; L # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; L # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; L # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; L # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; L # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF5 ; L # Po BASSA VAH FULL STOP +16B00..16B2F ; L # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B37..16B3B ; L # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; L # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; L # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; L # Po PAHAWH HMONG SIGN XAUS +16B45 ; L # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; L # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; L # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; L # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; L # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; L # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; L # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; L # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; L # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; L # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; L # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F93..16F9F ; L # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; L # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; L # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; L # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; L # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; L # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; L # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; L # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; L # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; L # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; L # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; L # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; L # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; L # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; L # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; L # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; L # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; L # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; L # So DUPLOYAN SIGN O WITH CROSS +1BC9F ; L # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CF50..1CFC3 ; L # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; L # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; L # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; L # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; L # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; L # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; L # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN +1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; L # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; L # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; L # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; L # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; L # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; L # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; L # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; L # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; L # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; L # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; L # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; L # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; L # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; L # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; L # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; L # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; L # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; L # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; L # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; L # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; L # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; L # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; L # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; L # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; L # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; L # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; L # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; L # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; L # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; L # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; L # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; L # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; L # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D800..1D9FF ; L # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; L # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; L # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; L # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; L # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; L # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DF00..1DF09 ; L # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; L # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; L # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E100..1E12C ; L # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; L # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; L # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; L # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; L # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; L # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; L # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2F0..1E2F9 ; L # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E7E0..1E7E6 ; L # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; L # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; L # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; L # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1F110..1F12E ; L # So [31] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ +1F130..1F169 ; L # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F1AC ; L # So [61] NEGATIVE SQUARED LATIN CAPITAL LETTER A..SQUARED VOD +1F1E6..1F202 ; L # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; L # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; L # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; L # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +20000..2A6DF ; L # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; L # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; L # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; L # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; L # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; L # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; L # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +F0000..FFFFD ; L # Co [65534] .. +100000..10FFFD; L # Co [65534] .. + +# The above property value applies to 825575 code points not listed here. +# Total code points: 1096333 + +# ================================================ + +# Bidi_Class=Right_To_Left + +0590 ; R # Cn +05BE ; R # Pd HEBREW PUNCTUATION MAQAF +05C0 ; R # Po HEBREW PUNCTUATION PASEQ +05C3 ; R # Po HEBREW PUNCTUATION SOF PASUQ +05C6 ; R # Po HEBREW PUNCTUATION NUN HAFUKHA +05C8..05CF ; R # Cn [8] .. +05D0..05EA ; R # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EB..05EE ; R # Cn [4] .. +05EF..05F2 ; R # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; R # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +05F5..05FF ; R # Cn [11] .. +07C0..07C9 ; R # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; R # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; R # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; R # Lm NKO LAJANYALAN +07FB..07FC ; R # Cn [2] .. +07FE..07FF ; R # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; R # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; R # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; R # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; R # Lm SAMARITAN MODIFIER LETTER I +082E..082F ; R # Cn [2] .. +0830..083E ; R # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +083F ; R # Cn +0840..0858 ; R # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +085C..085D ; R # Cn [2] .. +085E ; R # Po MANDAIC PUNCTUATION +085F ; R # Cn +200F ; R # Cf RIGHT-TO-LEFT MARK +FB1D ; R # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; R # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; R # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB37 ; R # Cn +FB38..FB3C ; R # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3D ; R # Cn +FB3E ; R # Lo HEBREW LETTER MEM WITH DAGESH +FB3F ; R # Cn +FB40..FB41 ; R # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB42 ; R # Cn +FB43..FB44 ; R # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB45 ; R # Cn +FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED +10800..10805 ; R # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10806..10807 ; R # Cn [2] .. +10808 ; R # Lo CYPRIOT SYLLABLE JO +10809 ; R # Cn +1080A..10835 ; R # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10836 ; R # Cn +10837..10838 ; R # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +10839..1083B ; R # Cn [3] .. +1083C ; R # Lo CYPRIOT SYLLABLE ZA +1083D..1083E ; R # Cn [2] .. +1083F..10855 ; R # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10856 ; R # Cn +10857 ; R # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; R # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; R # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; R # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; R # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; R # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +1089F..108A6 ; R # Cn [8] .. +108A7..108AF ; R # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108B0..108DF ; R # Cn [48] .. +108E0..108F2 ; R # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F3 ; R # Cn +108F4..108F5 ; R # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108F6..108FA ; R # Cn [5] .. +108FB..108FF ; R # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; R # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; R # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091C..1091E ; R # Cn [3] .. +10920..10939 ; R # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093A..1093E ; R # Cn [5] .. +1093F ; R # Po LYDIAN TRIANGULAR MARK +10940..1097F ; R # Cn [64] .. +10980..109B7 ; R # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109B8..109BB ; R # Cn [4] .. +109BC..109BD ; R # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; R # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; R # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D0..109D1 ; R # Cn [2] .. +109D2..109FF ; R # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; R # Lo KHAROSHTHI LETTER A +10A04 ; R # Cn +10A07..10A0B ; R # Cn [5] .. +10A10..10A13 ; R # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A14 ; R # Cn +10A15..10A17 ; R # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A18 ; R # Cn +10A19..10A35 ; R # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A36..10A37 ; R # Cn [2] .. +10A3B..10A3E ; R # Cn [4] .. +10A40..10A48 ; R # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A49..10A4F ; R # Cn [7] .. +10A50..10A58 ; R # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A59..10A5F ; R # Cn [7] .. +10A60..10A7C ; R # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; R # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; R # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; R # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; R # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AA0..10ABF ; R # Cn [32] .. +10AC0..10AC7 ; R # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; R # So MANICHAEAN SIGN UD +10AC9..10AE4 ; R # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE7..10AEA ; R # Cn [4] .. +10AEB..10AEF ; R # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; R # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10AF7..10AFF ; R # Cn [9] .. +10B00..10B35 ; R # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B36..10B38 ; R # Cn [3] .. +10B40..10B55 ; R # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B56..10B57 ; R # Cn [2] .. +10B58..10B5F ; R # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; R # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B73..10B77 ; R # Cn [5] .. +10B78..10B7F ; R # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; R # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B92..10B98 ; R # Cn [7] .. +10B99..10B9C ; R # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10B9D..10BA8 ; R # Cn [12] .. +10BA9..10BAF ; R # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10BB0..10BFF ; R # Cn [80] .. +10C00..10C48 ; R # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C49..10C7F ; R # Cn [55] .. +10C80..10CB2 ; R # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CB3..10CBF ; R # Cn [13] .. +10CC0..10CF2 ; R # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CF3..10CF9 ; R # Cn [7] .. +10CFA..10CFF ; R # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D40..10E5F ; R # Cn [288] .. +10E7F ; R # Cn +10E80..10EA9 ; R # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAA ; R # Cn +10EAD ; R # Pd YEZIDI HYPHENATION MARK +10EAE..10EAF ; R # Cn [2] .. +10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10EB2..10EFF ; R # Cn [78] .. +10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F28..10F2F ; R # Cn [8] .. +10F70..10F81 ; R # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F86..10F89 ; R # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10F8A..10FAF ; R # Cn [38] .. +10FB0..10FC4 ; R # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; R # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FCC..10FDF ; R # Cn [20] .. +10FE0..10FF6 ; R # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +10FF7..10FFF ; R # Cn [9] .. +1E800..1E8C4 ; R # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C5..1E8C6 ; R # Cn [2] .. +1E8C7..1E8CF ; R # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D7..1E8FF ; R # Cn [41] .. +1E900..1E943 ; R # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; R # Lm ADLAM NASALIZATION MARK +1E94C..1E94F ; R # Cn [4] .. +1E950..1E959 ; R # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95A..1E95D ; R # Cn [4] .. +1E95E..1E95F ; R # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1E960..1EC6F ; R # Cn [784] .. +1ECC0..1ECFF ; R # Cn [64] .. +1ED50..1EDFF ; R # Cn [176] .. +1EF00..1EFFF ; R # Cn [256] .. + +# Total code points: 3711 + +# ================================================ + +# Bidi_Class=European_Number + +0030..0039 ; EN # Nd [10] DIGIT ZERO..DIGIT NINE +00B2..00B3 ; EN # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B9 ; EN # No SUPERSCRIPT ONE +06F0..06F9 ; EN # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +2070 ; EN # No SUPERSCRIPT ZERO +2074..2079 ; EN # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +2080..2089 ; EN # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +2488..249B ; EN # No [20] DIGIT ONE FULL STOP..NUMBER TWENTY FULL STOP +FF10..FF19 ; EN # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +102E1..102FB ; EN # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +1D7CE..1D7FF ; EN # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1F100..1F10A ; EN # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA +1FBF0..1FBF9 ; EN # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE + +# Total code points: 168 + +# ================================================ + +# Bidi_Class=European_Separator + +002B ; ES # Sm PLUS SIGN +002D ; ES # Pd HYPHEN-MINUS +207A..207B ; ES # Sm [2] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT MINUS +208A..208B ; ES # Sm [2] SUBSCRIPT PLUS SIGN..SUBSCRIPT MINUS +2212 ; ES # Sm MINUS SIGN +FB29 ; ES # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FE62 ; ES # Sm SMALL PLUS SIGN +FE63 ; ES # Pd SMALL HYPHEN-MINUS +FF0B ; ES # Sm FULLWIDTH PLUS SIGN +FF0D ; ES # Pd FULLWIDTH HYPHEN-MINUS + +# Total code points: 12 + +# ================================================ + +# Bidi_Class=European_Terminator + +0023 ; ET # Po NUMBER SIGN +0024 ; ET # Sc DOLLAR SIGN +0025 ; ET # Po PERCENT SIGN +00A2..00A5 ; ET # Sc [4] CENT SIGN..YEN SIGN +00B0 ; ET # So DEGREE SIGN +00B1 ; ET # Sm PLUS-MINUS SIGN +058F ; ET # Sc ARMENIAN DRAM SIGN +0609..060A ; ET # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +066A ; ET # Po ARABIC PERCENT SIGN +09F2..09F3 ; ET # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09FB ; ET # Sc BENGALI GANDA MARK +0AF1 ; ET # Sc GUJARATI RUPEE SIGN +0BF9 ; ET # Sc TAMIL RUPEE SIGN +0E3F ; ET # Sc THAI CURRENCY SYMBOL BAHT +17DB ; ET # Sc KHMER CURRENCY SYMBOL RIEL +2030..2034 ; ET # Po [5] PER MILLE SIGN..TRIPLE PRIME +20A0..20C0 ; ET # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +20C1..20CF ; ET # Cn [15] .. +212E ; ET # So ESTIMATED SYMBOL +2213 ; ET # Sm MINUS-OR-PLUS SIGN +A838 ; ET # Sc NORTH INDIC RUPEE MARK +A839 ; ET # So NORTH INDIC QUANTITY MARK +FE5F ; ET # Po SMALL NUMBER SIGN +FE69 ; ET # Sc SMALL DOLLAR SIGN +FE6A ; ET # Po SMALL PERCENT SIGN +FF03 ; ET # Po FULLWIDTH NUMBER SIGN +FF04 ; ET # Sc FULLWIDTH DOLLAR SIGN +FF05 ; ET # Po FULLWIDTH PERCENT SIGN +FFE0..FFE1 ; ET # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE5..FFE6 ; ET # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +11FDD..11FE0 ; ET # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +1E2FF ; ET # Sc WANCHO NGUN SIGN + +# Total code points: 92 + +# ================================================ + +# Bidi_Class=Arabic_Number + +0600..0605 ; AN # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0660..0669 ; AN # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066B..066C ; AN # Po [2] ARABIC DECIMAL SEPARATOR..ARABIC THOUSANDS SEPARATOR +06DD ; AN # Cf ARABIC END OF AYAH +0890..0891 ; AN # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; AN # Cf ARABIC DISPUTED END OF AYAH +10D30..10D39 ; AN # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E ; AN # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS + +# Total code points: 63 + +# ================================================ + +# Bidi_Class=Common_Separator + +002C ; CS # Po COMMA +002E..002F ; CS # Po [2] FULL STOP..SOLIDUS +003A ; CS # Po COLON +00A0 ; CS # Zs NO-BREAK SPACE +060C ; CS # Po ARABIC COMMA +202F ; CS # Zs NARROW NO-BREAK SPACE +2044 ; CS # Sm FRACTION SLASH +FE50 ; CS # Po SMALL COMMA +FE52 ; CS # Po SMALL FULL STOP +FE55 ; CS # Po SMALL COLON +FF0C ; CS # Po FULLWIDTH COMMA +FF0E..FF0F ; CS # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF1A ; CS # Po FULLWIDTH COLON + +# Total code points: 15 + +# ================================================ + +# Bidi_Class=Paragraph_Separator + +000A ; B # Cc +000D ; B # Cc +001C..001E ; B # Cc [3] .. +0085 ; B # Cc +2029 ; B # Zp PARAGRAPH SEPARATOR + +# Total code points: 7 + +# ================================================ + +# Bidi_Class=Segment_Separator + +0009 ; S # Cc +000B ; S # Cc +001F ; S # Cc + +# Total code points: 3 + +# ================================================ + +# Bidi_Class=White_Space + +000C ; WS # Cc +0020 ; WS # Zs SPACE +1680 ; WS # Zs OGHAM SPACE MARK +2000..200A ; WS # Zs [11] EN QUAD..HAIR SPACE +2028 ; WS # Zl LINE SEPARATOR +205F ; WS # Zs MEDIUM MATHEMATICAL SPACE +3000 ; WS # Zs IDEOGRAPHIC SPACE + +# Total code points: 17 + +# ================================================ + +# Bidi_Class=Other_Neutral + +0021..0022 ; ON # Po [2] EXCLAMATION MARK..QUOTATION MARK +0026..0027 ; ON # Po [2] AMPERSAND..APOSTROPHE +0028 ; ON # Ps LEFT PARENTHESIS +0029 ; ON # Pe RIGHT PARENTHESIS +002A ; ON # Po ASTERISK +003B ; ON # Po SEMICOLON +003C..003E ; ON # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; ON # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; ON # Ps LEFT SQUARE BRACKET +005C ; ON # Po REVERSE SOLIDUS +005D ; ON # Pe RIGHT SQUARE BRACKET +005E ; ON # Sk CIRCUMFLEX ACCENT +005F ; ON # Pc LOW LINE +0060 ; ON # Sk GRAVE ACCENT +007B ; ON # Ps LEFT CURLY BRACKET +007C ; ON # Sm VERTICAL LINE +007D ; ON # Pe RIGHT CURLY BRACKET +007E ; ON # Sm TILDE +00A1 ; ON # Po INVERTED EXCLAMATION MARK +00A6 ; ON # So BROKEN BAR +00A7 ; ON # Po SECTION SIGN +00A8 ; ON # Sk DIAERESIS +00A9 ; ON # So COPYRIGHT SIGN +00AB ; ON # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; ON # Sm NOT SIGN +00AE ; ON # So REGISTERED SIGN +00AF ; ON # Sk MACRON +00B4 ; ON # Sk ACUTE ACCENT +00B6..00B7 ; ON # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; ON # Sk CEDILLA +00BB ; ON # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; ON # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; ON # Po INVERTED QUESTION MARK +00D7 ; ON # Sm MULTIPLICATION SIGN +00F7 ; ON # Sm DIVISION SIGN +02B9..02BA ; ON # Lm [2] MODIFIER LETTER PRIME..MODIFIER LETTER DOUBLE PRIME +02C2..02C5 ; ON # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02CF ; ON # Lm [10] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER LOW ACUTE ACCENT +02D2..02DF ; ON # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02EB ; ON # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; ON # Lm MODIFIER LETTER VOICING +02ED ; ON # Sk MODIFIER LETTER UNASPIRATED +02EF..02FF ; ON # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; ON # Lm GREEK NUMERAL SIGN +0375 ; ON # Sk GREEK LOWER NUMERAL SIGN +037E ; ON # Po GREEK QUESTION MARK +0384..0385 ; ON # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0387 ; ON # Po GREEK ANO TELEIA +03F6 ; ON # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +058A ; ON # Pd ARMENIAN HYPHEN +058D..058E ; ON # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +0606..0607 ; ON # Sm [2] ARABIC-INDIC CUBE ROOT..ARABIC-INDIC FOURTH ROOT +060E..060F ; ON # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +06DE ; ON # So ARABIC START OF RUB EL HIZB +06E9 ; ON # So ARABIC PLACE OF SAJDAH +07F6 ; ON # So NKO SYMBOL OO DENNEN +07F7..07F9 ; ON # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +0BF3..0BF8 ; ON # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BFA ; ON # So TAMIL NUMBER SIGN +0C78..0C7E ; ON # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0F3A ; ON # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; ON # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; ON # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; ON # Pe TIBETAN MARK ANG KHANG GYAS +1390..1399 ; ON # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +1400 ; ON # Pd CANADIAN SYLLABICS HYPHEN +169B ; ON # Ps OGHAM FEATHER MARK +169C ; ON # Pe OGHAM REVERSED FEATHER MARK +17F0..17F9 ; ON # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; ON # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; ON # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; ON # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1940 ; ON # So LIMBU SIGN LOO +1944..1945 ; ON # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +19DE..19FF ; ON # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1FBD ; ON # Sk GREEK KORONIS +1FBF..1FC1 ; ON # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; ON # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; ON # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; ON # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; ON # Sk [2] GREEK OXIA..GREEK DASIA +2010..2015 ; ON # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; ON # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; ON # Pi LEFT SINGLE QUOTATION MARK +2019 ; ON # Pf RIGHT SINGLE QUOTATION MARK +201A ; ON # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; ON # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; ON # Pf RIGHT DOUBLE QUOTATION MARK +201E ; ON # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; ON # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; ON # Po [8] DAGGER..HYPHENATION POINT +2035..2038 ; ON # Po [4] REVERSED PRIME..CARET +2039 ; ON # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; ON # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; ON # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; ON # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; ON # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2045 ; ON # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; ON # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; ON # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; ON # Sm COMMERCIAL MINUS SIGN +2053 ; ON # Po SWUNG DASH +2054 ; ON # Pc INVERTED UNDERTIE +2055..205E ; ON # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +207C ; ON # Sm SUPERSCRIPT EQUALS SIGN +207D ; ON # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; ON # Pe SUPERSCRIPT RIGHT PARENTHESIS +208C ; ON # Sm SUBSCRIPT EQUALS SIGN +208D ; ON # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; ON # Pe SUBSCRIPT RIGHT PARENTHESIS +2100..2101 ; ON # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2103..2106 ; ON # So [4] DEGREE CELSIUS..CADA UNA +2108..2109 ; ON # So [2] SCRUPLE..DEGREE FAHRENHEIT +2114 ; ON # So L B BAR SYMBOL +2116..2117 ; ON # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; ON # Sm SCRIPT CAPITAL P +211E..2123 ; ON # So [6] PRESCRIPTION TAKE..VERSICLE +2125 ; ON # So OUNCE SIGN +2127 ; ON # So INVERTED OHM SIGN +2129 ; ON # So TURNED GREEK SMALL LETTER IOTA +213A..213B ; ON # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +2140..2144 ; ON # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +214A ; ON # So PROPERTY LINE +214B ; ON # Sm TURNED AMPERSAND +214C..214D ; ON # So [2] PER SIGN..AKTIESELSKAB +2150..215F ; ON # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; ON # No VULGAR FRACTION ZERO THIRDS +218A..218B ; ON # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; ON # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; ON # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; ON # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; ON # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; ON # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; ON # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; ON # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; ON # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; ON # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; ON # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; ON # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; ON # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; ON # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; ON # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; ON # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; ON # So DOWNWARDS DOUBLE ARROW +21D4 ; ON # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; ON # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..2211 ; ON # Sm [30] RIGHT ARROW WITH SMALL CIRCLE..N-ARY SUMMATION +2214..22FF ; ON # Sm [236] DOT PLUS..Z NOTATION BAG MEMBERSHIP +2300..2307 ; ON # So [8] DIAMETER SIGN..WAVY LINE +2308 ; ON # Ps LEFT CEILING +2309 ; ON # Pe RIGHT CEILING +230A ; ON # Ps LEFT FLOOR +230B ; ON # Pe RIGHT FLOOR +230C..231F ; ON # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; ON # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; ON # So [7] FROWN..KEYBOARD +2329 ; ON # Ps LEFT-POINTING ANGLE BRACKET +232A ; ON # Pe RIGHT-POINTING ANGLE BRACKET +232B..2335 ; ON # So [11] ERASE TO THE LEFT..COUNTERSINK +237B ; ON # So NOT CHECK MARK +237C ; ON # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..2394 ; ON # So [24] SHOULDERED OPEN BOX..SOFTWARE-FUNCTION SYMBOL +2396..239A ; ON # So [5] DECIMAL SEPARATOR KEY SYMBOL..CLEAR SCREEN SYMBOL +239B..23B3 ; ON # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; ON # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; ON # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; ON # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; ON # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..2487 ; ON # No [40] CIRCLED DIGIT ONE..PARENTHESIZED NUMBER TWENTY +24EA..24FF ; ON # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; ON # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; ON # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; ON # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; ON # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; ON # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; ON # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; ON # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; ON # Sm MUSIC SHARP SIGN +2670..26AB ; ON # So [60] WEST SYRIAC CROSS..MEDIUM BLACK CIRCLE +26AD..2767 ; ON # So [187] MARRIAGE SYMBOL..ROTATED FLORAL HEART BULLET +2768 ; ON # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; ON # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; ON # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; ON # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; ON # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; ON # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; ON # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; ON # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; ON # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; ON # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; ON # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; ON # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; ON # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; ON # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; ON # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; ON # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; ON # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; ON # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; ON # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; ON # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; ON # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; ON # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; ON # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; ON # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; ON # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; ON # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; ON # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; ON # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; ON # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; ON # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; ON # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; ON # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; ON # Ps LEFT WHITE CURLY BRACKET +2984 ; ON # Pe RIGHT WHITE CURLY BRACKET +2985 ; ON # Ps LEFT WHITE PARENTHESIS +2986 ; ON # Pe RIGHT WHITE PARENTHESIS +2987 ; ON # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; ON # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; ON # Ps Z NOTATION LEFT BINDING BRACKET +298A ; ON # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; ON # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; ON # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; ON # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; ON # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; ON # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; ON # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; ON # Ps LEFT ARC LESS-THAN BRACKET +2994 ; ON # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; ON # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; ON # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; ON # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; ON # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; ON # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; ON # Ps LEFT WIGGLY FENCE +29D9 ; ON # Pe RIGHT WIGGLY FENCE +29DA ; ON # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; ON # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; ON # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; ON # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; ON # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; ON # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; ON # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; ON # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; ON # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; ON # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; ON # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; ON # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; ON # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2CE5..2CEA ; ON # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CF9..2CFC ; ON # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; ON # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; ON # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2E00..2E01 ; ON # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; ON # Pi LEFT SUBSTITUTION BRACKET +2E03 ; ON # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; ON # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; ON # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; ON # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; ON # Pi LEFT TRANSPOSITION BRACKET +2E0A ; ON # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; ON # Po RAISED SQUARE +2E0C ; ON # Pi LEFT RAISED OMISSION BRACKET +2E0D ; ON # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; ON # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; ON # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; ON # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; ON # Pd HYPHEN WITH DIAERESIS +2E1B ; ON # Po TILDE WITH RING ABOVE +2E1C ; ON # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; ON # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; ON # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; ON # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; ON # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; ON # Ps TOP LEFT HALF BRACKET +2E23 ; ON # Pe TOP RIGHT HALF BRACKET +2E24 ; ON # Ps BOTTOM LEFT HALF BRACKET +2E25 ; ON # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; ON # Ps LEFT SIDEWAYS U BRACKET +2E27 ; ON # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; ON # Ps LEFT DOUBLE PARENTHESIS +2E29 ; ON # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; ON # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; ON # Lm VERTICAL TILDE +2E30..2E39 ; ON # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; ON # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; ON # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; ON # Pd DOUBLE HYPHEN +2E41 ; ON # Po REVERSED COMMA +2E42 ; ON # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; ON # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; ON # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; ON # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; ON # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; ON # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; ON # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; ON # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; ON # Ps TOP HALF LEFT PARENTHESIS +2E5A ; ON # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; ON # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; ON # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; ON # Pd OBLIQUE HYPHEN +2E80..2E99 ; ON # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; ON # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; ON # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFB ; ON # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3001..3003 ; ON # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; ON # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3008 ; ON # Ps LEFT ANGLE BRACKET +3009 ; ON # Pe RIGHT ANGLE BRACKET +300A ; ON # Ps LEFT DOUBLE ANGLE BRACKET +300B ; ON # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; ON # Ps LEFT CORNER BRACKET +300D ; ON # Pe RIGHT CORNER BRACKET +300E ; ON # Ps LEFT WHITE CORNER BRACKET +300F ; ON # Pe RIGHT WHITE CORNER BRACKET +3010 ; ON # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; ON # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; ON # So [2] POSTAL MARK..GETA MARK +3014 ; ON # Ps LEFT TORTOISE SHELL BRACKET +3015 ; ON # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; ON # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; ON # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; ON # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; ON # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; ON # Ps LEFT WHITE SQUARE BRACKET +301B ; ON # Pe RIGHT WHITE SQUARE BRACKET +301C ; ON # Pd WAVE DASH +301D ; ON # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; ON # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; ON # So POSTAL MARK FACE +3030 ; ON # Pd WAVY DASH +3036..3037 ; ON # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303D ; ON # Po PART ALTERNATION MARK +303E..303F ; ON # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; ON # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; ON # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; ON # Po KATAKANA MIDDLE DOT +31C0..31E3 ; ON # So [36] CJK STROKE T..CJK STROKE Q +321D..321E ; ON # So [2] PARENTHESIZED KOREAN CHARACTER OJEON..PARENTHESIZED KOREAN CHARACTER O HU +3250 ; ON # So PARTNERSHIP SIGN +3251..325F ; ON # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327C..327E ; ON # So [3] CIRCLED KOREAN CHARACTER CHAMKO..CIRCLED HANGUL IEUNG U +32B1..32BF ; ON # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32CC..32CF ; ON # So [4] SQUARE HG..LIMITED LIABILITY SIGN +3377..337A ; ON # So [4] SQUARE DM..SQUARE IU +33DE..33DF ; ON # So [2] SQUARE V OVER M..SQUARE A OVER M +33FF ; ON # So SQUARE GAL +4DC0..4DFF ; ON # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A490..A4C6 ; ON # So [55] YI RADICAL QOT..YI RADICAL KE +A60D..A60F ; ON # Po [3] VAI COMMA..VAI QUESTION MARK +A673 ; ON # Po SLAVONIC ASTERISK +A67E ; ON # Po CYRILLIC KAVYKA +A67F ; ON # Lm CYRILLIC PAYEROK +A700..A716 ; ON # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; ON # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; ON # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; ON # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A828..A82B ; ON # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A874..A877 ; ON # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +AB6A..AB6B ; ON # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FD3E ; ON # Pe ORNATE LEFT PARENTHESIS +FD3F ; ON # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; ON # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FDCF ; ON # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDFD..FDFF ; ON # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE10..FE16 ; ON # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; ON # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; ON # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; ON # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; ON # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; ON # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; ON # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; ON # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; ON # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; ON # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE51 ; ON # Po SMALL IDEOGRAPHIC COMMA +FE54 ; ON # Po SMALL SEMICOLON +FE56..FE57 ; ON # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FE58 ; ON # Pd SMALL EM DASH +FE59 ; ON # Ps SMALL LEFT PARENTHESIS +FE5A ; ON # Pe SMALL RIGHT PARENTHESIS +FE5B ; ON # Ps SMALL LEFT CURLY BRACKET +FE5C ; ON # Pe SMALL RIGHT CURLY BRACKET +FE5D ; ON # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; ON # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE60..FE61 ; ON # Po [2] SMALL AMPERSAND..SMALL ASTERISK +FE64..FE66 ; ON # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; ON # Po SMALL REVERSE SOLIDUS +FE6B ; ON # Po SMALL COMMERCIAL AT +FF01..FF02 ; ON # Po [2] FULLWIDTH EXCLAMATION MARK..FULLWIDTH QUOTATION MARK +FF06..FF07 ; ON # Po [2] FULLWIDTH AMPERSAND..FULLWIDTH APOSTROPHE +FF08 ; ON # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; ON # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; ON # Po FULLWIDTH ASTERISK +FF1B ; ON # Po FULLWIDTH SEMICOLON +FF1C..FF1E ; ON # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; ON # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; ON # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; ON # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; ON # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; ON # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; ON # Pc FULLWIDTH LOW LINE +FF40 ; ON # Sk FULLWIDTH GRAVE ACCENT +FF5B ; ON # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; ON # Sm FULLWIDTH VERTICAL LINE +FF5D ; ON # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; ON # Sm FULLWIDTH TILDE +FF5F ; ON # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; ON # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; ON # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; ON # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; ON # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; ON # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FFE2 ; ON # Sm FULLWIDTH NOT SIGN +FFE3 ; ON # Sk FULLWIDTH MACRON +FFE4 ; ON # So FULLWIDTH BROKEN BAR +FFE8 ; ON # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; ON # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; ON # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; ON # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; ON # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10101 ; ON # Po AEGEAN WORD SEPARATOR DOT +10140..10174 ; ON # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; ON # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; ON # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; ON # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C ; ON # So GREEK SINUSOID SIGN +10190..1019C ; ON # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; ON # So GREEK SYMBOL TAU RHO +1091F ; ON # Po PHOENICIAN WORD SEPARATOR +10B39..10B3F ; ON # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +11052..11065 ; ON # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11660..1166C ; ON # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11FD5..11FDC ; ON # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FE1..11FF1 ; ON # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +16FE2 ; ON # Po OLD CHINESE HOOK MARK +1D1E9..1D1EA ; ON # So [2] MUSICAL SYMBOL SORI..MUSICAL SYMBOL KORON +1D200..1D241 ; ON # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; ON # So GREEK MUSICAL LEIMMA +1D300..1D356 ; ON # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D6DB ; ON # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D715 ; ON # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D74F ; ON # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D789 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D7C3 ; ON # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1EEF0..1EEF1 ; ON # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F02B ; ON # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; ON # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; ON # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; ON # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; ON # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; ON # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F10B..1F10C ; ON # No [2] DINGBAT CIRCLED SANS-SERIF DIGIT ZERO..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F10F ; ON # So [3] CIRCLED ZERO WITH SLASH..CIRCLED DOLLAR SIGN WITH OVERLAID BACKSLASH +1F12F ; ON # So COPYLEFT SYMBOL +1F16A..1F16F ; ON # So [6] RAISED MC SIGN..CIRCLED HUMAN FIGURE +1F1AD ; ON # So MASK WORK SYMBOL +1F260..1F265 ; ON # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; ON # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; ON # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; ON # So [728] RAT..ELEVATOR +1F6DD..1F6EC ; ON # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6F0..1F6FC ; ON # So [13] SATELLITE..ROLLER SKATE +1F700..1F773 ; ON # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F780..1F7D8 ; ON # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F7E0..1F7EB ; ON # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; ON # So HEAVY EQUALS SIGN +1F800..1F80B ; ON # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; ON # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; ON # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; ON # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; ON # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; ON # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1FA53 ; ON # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; ON # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA74 ; ON # So [5] BALLET SHOES..THONG SANDAL +1FA78..1FA7C ; ON # So [5] DROP OF BLOOD..CRUTCH +1FA80..1FA86 ; ON # So [7] YO-YO..NESTING DOLLS +1FA90..1FAAC ; ON # So [29] RINGED PLANET..HAMSA +1FAB0..1FABA ; ON # So [11] FLY..NEST WITH EGGS +1FAC0..1FAC5 ; ON # So [6] ANATOMICAL HEART..PERSON WITH CROWN +1FAD0..1FAD9 ; ON # So [10] BLUEBERRIES..JAR +1FAE0..1FAE7 ; ON # So [8] MELTING FACE..BUBBLES +1FAF0..1FAF6 ; ON # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FB00..1FB92 ; ON # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; ON # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON + +# Total code points: 6000 + +# ================================================ + +# Bidi_Class=Boundary_Neutral + +0000..0008 ; BN # Cc [9] .. +000E..001B ; BN # Cc [14] .. +007F..0084 ; BN # Cc [6] .. +0086..009F ; BN # Cc [26] .. +00AD ; BN # Cf SOFT HYPHEN +180E ; BN # Cf MONGOLIAN VOWEL SEPARATOR +200B..200D ; BN # Cf [3] ZERO WIDTH SPACE..ZERO WIDTH JOINER +2060..2064 ; BN # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; BN # Cn +206A..206F ; BN # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +FDD0..FDEF ; BN # Cn [32] .. +FEFF ; BN # Cf ZERO WIDTH NO-BREAK SPACE +FFF0..FFF8 ; BN # Cn [9] .. +FFFE..FFFF ; BN # Cn [2] .. +1BCA0..1BCA3 ; BN # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; BN # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1FFFE..1FFFF ; BN # Cn [2] .. +2FFFE..2FFFF ; BN # Cn [2] .. +3FFFE..3FFFF ; BN # Cn [2] .. +4FFFE..4FFFF ; BN # Cn [2] .. +5FFFE..5FFFF ; BN # Cn [2] .. +6FFFE..6FFFF ; BN # Cn [2] .. +7FFFE..7FFFF ; BN # Cn [2] .. +8FFFE..8FFFF ; BN # Cn [2] .. +9FFFE..9FFFF ; BN # Cn [2] .. +AFFFE..AFFFF ; BN # Cn [2] .. +BFFFE..BFFFF ; BN # Cn [2] .. +CFFFE..CFFFF ; BN # Cn [2] .. +DFFFE..E0000 ; BN # Cn [3] .. +E0001 ; BN # Cf LANGUAGE TAG +E0002..E001F ; BN # Cn [30] .. +E0020..E007F ; BN # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; BN # Cn [128] .. +E01F0..E0FFF ; BN # Cn [3600] .. +EFFFE..EFFFF ; BN # Cn [2] .. +FFFFE..FFFFF ; BN # Cn [2] .. +10FFFE..10FFFF; BN # Cn [2] .. + +# Total code points: 4016 + +# ================================================ + +# Bidi_Class=Nonspacing_Mark + +0300..036F ; NSM # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; NSM # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; NSM # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; NSM # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; NSM # Mn HEBREW POINT RAFE +05C1..05C2 ; NSM # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; NSM # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; NSM # Mn HEBREW POINT QAMATS QATAN +0610..061A ; NSM # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; NSM # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; NSM # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; NSM # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; NSM # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; NSM # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; NSM # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; NSM # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; NSM # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; NSM # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; NSM # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; NSM # Mn NKO DANTAYALAN +0816..0819 ; NSM # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; NSM # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; NSM # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; NSM # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; NSM # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; NSM # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; NSM # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; NSM # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; NSM # Mn DEVANAGARI VOWEL SIGN OE +093C ; NSM # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; NSM # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; NSM # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; NSM # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; NSM # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; NSM # Mn BENGALI SIGN CANDRABINDU +09BC ; NSM # Mn BENGALI SIGN NUKTA +09C1..09C4 ; NSM # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; NSM # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; NSM # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; NSM # Mn BENGALI SANDHI MARK +0A01..0A02 ; NSM # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; NSM # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; NSM # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; NSM # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; NSM # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; NSM # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; NSM # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; NSM # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; NSM # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; NSM # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; NSM # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; NSM # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; NSM # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; NSM # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; NSM # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; NSM # Mn ORIYA SIGN CANDRABINDU +0B3C ; NSM # Mn ORIYA SIGN NUKTA +0B3F ; NSM # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; NSM # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; NSM # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; NSM # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; NSM # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; NSM # Mn TAMIL SIGN ANUSVARA +0BC0 ; NSM # Mn TAMIL VOWEL SIGN II +0BCD ; NSM # Mn TAMIL SIGN VIRAMA +0C00 ; NSM # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; NSM # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; NSM # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; NSM # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; NSM # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; NSM # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; NSM # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; NSM # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; NSM # Mn KANNADA SIGN CANDRABINDU +0CBC ; NSM # Mn KANNADA SIGN NUKTA +0CCC..0CCD ; NSM # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; NSM # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; NSM # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; NSM # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; NSM # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; NSM # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; NSM # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; NSM # Mn SINHALA SIGN CANDRABINDU +0DCA ; NSM # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; NSM # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; NSM # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; NSM # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; NSM # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; NSM # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; NSM # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; NSM # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECD ; NSM # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0F18..0F19 ; NSM # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; NSM # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; NSM # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; NSM # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; NSM # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; NSM # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; NSM # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; NSM # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; NSM # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; NSM # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; NSM # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; NSM # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; NSM # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; NSM # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; NSM # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; NSM # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; NSM # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; NSM # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; NSM # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; NSM # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; NSM # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; NSM # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; NSM # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; NSM # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; NSM # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; NSM # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; NSM # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; NSM # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; NSM # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; NSM # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; NSM # Mn KHMER SIGN ATTHACAN +180B..180D ; NSM # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; NSM # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; NSM # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; NSM # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; NSM # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; NSM # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; NSM # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; NSM # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; NSM # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; NSM # Mn BUGINESE VOWEL SIGN AE +1A56 ; NSM # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; NSM # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; NSM # Mn TAI THAM SIGN SAKOT +1A62 ; NSM # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; NSM # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; NSM # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; NSM # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; NSM # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; NSM # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; NSM # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; NSM # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; NSM # Mn BALINESE SIGN REREKAN +1B36..1B3A ; NSM # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; NSM # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; NSM # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; NSM # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; NSM # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; NSM # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; NSM # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; NSM # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; NSM # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; NSM # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; NSM # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; NSM # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; NSM # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; NSM # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; NSM # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; NSM # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; NSM # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; NSM # Mn VEDIC SIGN TIRYAK +1CF4 ; NSM # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; NSM # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; NSM # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +20D0..20DC ; NSM # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; NSM # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; NSM # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; NSM # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; NSM # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; NSM # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; NSM # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; NSM # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; NSM # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; NSM # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; NSM # Mn COMBINING CYRILLIC VZMET +A670..A672 ; NSM # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; NSM # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; NSM # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; NSM # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; NSM # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; NSM # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; NSM # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; NSM # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; NSM # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; NSM # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; NSM # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; NSM # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; NSM # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; NSM # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; NSM # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; NSM # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; NSM # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; NSM # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; NSM # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; NSM # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; NSM # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; NSM # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; NSM # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; NSM # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; NSM # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; NSM # Mn TAI VIET MAI KANG +AAB2..AAB4 ; NSM # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; NSM # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; NSM # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; NSM # Mn TAI VIET TONE MAI THO +AAEC..AAED ; NSM # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; NSM # Mn MEETEI MAYEK VIRAMA +ABE5 ; NSM # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; NSM # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; NSM # Mn MEETEI MAYEK APUN IYEK +FB1E ; NSM # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; NSM # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; NSM # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +101FD ; NSM # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; NSM # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; NSM # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; NSM # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; NSM # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; NSM # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; NSM # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; NSM # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; NSM # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; NSM # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; NSM # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10F46..10F50 ; NSM # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; NSM # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; NSM # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; NSM # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; NSM # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; NSM # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; NSM # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; NSM # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; NSM # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; NSM # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; NSM # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; NSM # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; NSM # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; NSM # Mn MAHAJANI SIGN NUKTA +11180..11181 ; NSM # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; NSM # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; NSM # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; NSM # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; NSM # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; NSM # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; NSM # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; NSM # Mn KHOJKI SIGN SUKUN +112DF ; NSM # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; NSM # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; NSM # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; NSM # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; NSM # Mn GRANTHA VOWEL SIGN II +11366..1136C ; NSM # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; NSM # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; NSM # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; NSM # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; NSM # Mn NEWA SIGN NUKTA +1145E ; NSM # Mn NEWA SANDHI MARK +114B3..114B8 ; NSM # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; NSM # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; NSM # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; NSM # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; NSM # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; NSM # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; NSM # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; NSM # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; NSM # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; NSM # Mn MODI SIGN ANUSVARA +1163F..11640 ; NSM # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; NSM # Mn TAKRI SIGN ANUSVARA +116AD ; NSM # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; NSM # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; NSM # Mn TAKRI SIGN NUKTA +1171D..1171F ; NSM # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; NSM # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; NSM # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; NSM # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; NSM # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; NSM # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; NSM # Mn DIVES AKURU VIRAMA +11943 ; NSM # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; NSM # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; NSM # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; NSM # Mn NANDINAGARI SIGN VIRAMA +11A01..11A06 ; NSM # Mn [6] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL SIGN O +11A09..11A0A ; NSM # Mn [2] ZANABAZAR SQUARE VOWEL SIGN REVERSED I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; NSM # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; NSM # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; NSM # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; NSM # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; NSM # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; NSM # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; NSM # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; NSM # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; NSM # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C92..11CA7 ; NSM # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; NSM # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; NSM # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; NSM # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; NSM # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; NSM # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; NSM # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; NSM # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; NSM # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; NSM # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; NSM # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; NSM # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; NSM # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +16AF0..16AF4 ; NSM # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; NSM # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; NSM # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; NSM # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; NSM # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; NSM # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; NSM # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; NSM # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; NSM # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; NSM # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; NSM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; NSM # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; NSM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; NSM # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; NSM # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; NSM # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; NSM # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; NSM # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; NSM # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; NSM # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; NSM # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; NSM # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; NSM # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E130..1E136 ; NSM # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; NSM # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; NSM # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; NSM # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; NSM # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0100..E01EF ; NSM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 1958 + +# ================================================ + +# Bidi_Class=Arabic_Letter + +0608 ; AL # Sm ARABIC RAY +060B ; AL # Sc AFGHANI SIGN +060D ; AL # Po ARABIC DATE SEPARATOR +061B ; AL # Po ARABIC SEMICOLON +061C ; AL # Cf ARABIC LETTER MARK +061D..061F ; AL # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; AL # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; AL # Lm ARABIC TATWEEL +0641..064A ; AL # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066D ; AL # Po ARABIC FIVE POINTED STAR +066E..066F ; AL # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; AL # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; AL # Po ARABIC FULL STOP +06D5 ; AL # Lo ARABIC LETTER AE +06E5..06E6 ; AL # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; AL # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; AL # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; AL # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; AL # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; AL # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070E ; AL # Cn +070F ; AL # Cf SYRIAC ABBREVIATION MARK +0710 ; AL # Lo SYRIAC LETTER ALAPH +0712..072F ; AL # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074B..074C ; AL # Cn [2] .. +074D..07A5 ; AL # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; AL # Lo THAANA LETTER NAA +07B2..07BF ; AL # Cn [14] .. +0860..086A ; AL # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +086B..086F ; AL # Cn [5] .. +0870..0887 ; AL # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; AL # Sk ARABIC RAISED ROUND DOT +0889..088E ; AL # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +088F ; AL # Cn +0892..0897 ; AL # Cn [6] .. +08A0..08C8 ; AL # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; AL # Lm ARABIC SMALL FARSI YEH +FB50..FBB1 ; AL # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; AL # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBC3..FBD2 ; AL # Cn [16] .. +FBD3..FD3D ; AL # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; AL # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD90..FD91 ; AL # Cn [2] .. +FD92..FDC7 ; AL # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDC8..FDCE ; AL # Cn [7] .. +FDF0..FDFB ; AL # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; AL # Sc RIAL SIGN +FE70..FE74 ; AL # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE75 ; AL # Cn +FE76..FEFC ; AL # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FEFD..FEFE ; AL # Cn [2] .. +10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D28..10D2F ; AL # Cn [8] .. +10D3A..10D3F ; AL # Cn [6] .. +10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F5A..10F6F ; AL # Cn [22] .. +1EC70 ; AL # Cn +1EC71..1ECAB ; AL # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; AL # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; AL # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; AL # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; AL # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ECB5..1ECBF ; AL # Cn [11] .. +1ED00 ; AL # Cn +1ED01..1ED2D ; AL # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; AL # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; AL # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1ED3E..1ED4F ; AL # Cn [18] .. +1EE00..1EE03 ; AL # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE04 ; AL # Cn +1EE05..1EE1F ; AL # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE20 ; AL # Cn +1EE21..1EE22 ; AL # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE23 ; AL # Cn +1EE24 ; AL # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE25..1EE26 ; AL # Cn [2] .. +1EE27 ; AL # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE28 ; AL # Cn +1EE29..1EE32 ; AL # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE33 ; AL # Cn +1EE34..1EE37 ; AL # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE38 ; AL # Cn +1EE39 ; AL # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3A ; AL # Cn +1EE3B ; AL # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE3C..1EE41 ; AL # Cn [6] .. +1EE42 ; AL # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE43..1EE46 ; AL # Cn [4] .. +1EE47 ; AL # Lo ARABIC MATHEMATICAL TAILED HAH +1EE48 ; AL # Cn +1EE49 ; AL # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4A ; AL # Cn +1EE4B ; AL # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4C ; AL # Cn +1EE4D..1EE4F ; AL # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE50 ; AL # Cn +1EE51..1EE52 ; AL # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE53 ; AL # Cn +1EE54 ; AL # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE55..1EE56 ; AL # Cn [2] .. +1EE57 ; AL # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE58 ; AL # Cn +1EE59 ; AL # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5A ; AL # Cn +1EE5B ; AL # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5C ; AL # Cn +1EE5D ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5E ; AL # Cn +1EE5F ; AL # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE60 ; AL # Cn +1EE61..1EE62 ; AL # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE63 ; AL # Cn +1EE64 ; AL # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE65..1EE66 ; AL # Cn [2] .. +1EE67..1EE6A ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6B ; AL # Cn +1EE6C..1EE72 ; AL # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE73 ; AL # Cn +1EE74..1EE77 ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE78 ; AL # Cn +1EE79..1EE7C ; AL # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7D ; AL # Cn +1EE7E ; AL # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE7F ; AL # Cn +1EE80..1EE89 ; AL # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8A ; AL # Cn +1EE8B..1EE9B ; AL # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EE9C..1EEA0 ; AL # Cn [5] .. +1EEA1..1EEA3 ; AL # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA4 ; AL # Cn +1EEA5..1EEA9 ; AL # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAA ; AL # Cn +1EEAB..1EEBB ; AL # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEBC..1EEEF ; AL # Cn [52] .. +1EEF2..1EEFF ; AL # Cn [14] .. + +# Total code points: 1708 + +# ================================================ + +# Bidi_Class=Left_To_Right_Override + +202D ; LRO # Cf LEFT-TO-RIGHT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Override + +202E ; RLO # Cf RIGHT-TO-LEFT OVERRIDE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Embedding + +202A ; LRE # Cf LEFT-TO-RIGHT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Embedding + +202B ; RLE # Cf RIGHT-TO-LEFT EMBEDDING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Format + +202C ; PDF # Cf POP DIRECTIONAL FORMATTING + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Left_To_Right_Isolate + +2066 ; LRI # Cf LEFT-TO-RIGHT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Right_To_Left_Isolate + +2067 ; RLI # Cf RIGHT-TO-LEFT ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=First_Strong_Isolate + +2068 ; FSI # Cf FIRST STRONG ISOLATE + +# Total code points: 1 + +# ================================================ + +# Bidi_Class=Pop_Directional_Isolate + +2069 ; PDI # Cf POP DIRECTIONAL ISOLATE + +# Total code points: 1 + +# EOF diff --git a/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt b/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt new file mode 100644 index 0000000000000000000000000000000000000000..afc2abd97ecab4b8b2697228c5e847887dc7bb67 --- /dev/null +++ b/pcre2/maint/Unicode.tables/DerivedCoreProperties.txt @@ -0,0 +1,12416 @@ +# DerivedCoreProperties-14.0.0.txt +# Date: 2021-08-12, 23:12:53 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +# Derived Property: Math +# Generated from: Sm + Other_Math + +002B ; Math # Sm PLUS SIGN +003C..003E ; Math # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +005E ; Math # Sk CIRCUMFLEX ACCENT +007C ; Math # Sm VERTICAL LINE +007E ; Math # Sm TILDE +00AC ; Math # Sm NOT SIGN +00B1 ; Math # Sm PLUS-MINUS SIGN +00D7 ; Math # Sm MULTIPLICATION SIGN +00F7 ; Math # Sm DIVISION SIGN +03D0..03D2 ; Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F6 ; Math # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +0606..0608 ; Math # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +2016 ; Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Math # Pc CHARACTER TIE +2044 ; Math # Sm FRACTION SLASH +2052 ; Math # Sm COMMERCIAL MINUS SIGN +2061..2064 ; Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207A..207C ; Math # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208A..208C ; Math # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Math # L& EULER CONSTANT +210A..2113 ; Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Math # L& DOUBLE-STRUCK CAPITAL N +2118 ; Math # Sm SCRIPT CAPITAL P +2119..211D ; Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Math # L& BLACK-LETTER CAPITAL Z +2129 ; Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Math # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214B ; Math # Sm TURNED AMPERSAND +2190..2194 ; Math # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Math # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Math # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Math # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Math # Sm RIGHTWARDS ARROW FROM BAR +21A7 ; Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21AE ; Math # Sm LEFT RIGHT ARROW WITH STROKE +21B0..21B1 ; Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Math # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Math # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Math # So DOWNWARDS DOUBLE ARROW +21D4 ; Math # Sm LEFT RIGHT DOUBLE ARROW +21D5..21DB ; Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +21F4..22FF ; Math # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2308 ; Math # Ps LEFT CEILING +2309 ; Math # Pe RIGHT CEILING +230A ; Math # Ps LEFT FLOOR +230B ; Math # Pe RIGHT FLOOR +2320..2321 ; Math # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +237C ; Math # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +239B..23B3 ; Math # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23B5 ; Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Math # So RADICAL SYMBOL BOTTOM +23D0 ; Math # So VERTICAL LINE EXTENSION +23DC..23E1 ; Math # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2 ; Math # So WHITE TRAPEZIUM +25A0..25A1 ; Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Math # Sm WHITE RIGHT-POINTING TRIANGLE +25BC..25C0 ; Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Math # Sm WHITE LEFT-POINTING TRIANGLE +25C6..25C7 ; Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +25F8..25FF ; Math # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2605..2606 ; Math # So [2] BLACK STAR..WHITE STAR +2640 ; Math # So FEMALE SIGN +2642 ; Math # So MALE SIGN +2660..2663 ; Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +266F ; Math # Sm MUSIC SHARP SIGN +27C0..27C4 ; Math # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Math # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Math # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Math # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Math # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Math # Ps LEFT WHITE PARENTHESIS +2986 ; Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Math # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Math # Ps LEFT WIGGLY FENCE +29D9 ; Math # Pe RIGHT WIGGLY FENCE +29DA ; Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Math # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Math # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Math # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B30..2B44 ; Math # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B47..2B4C ; Math # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +FB29 ; Math # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FE61 ; Math # Po SMALL ASTERISK +FE62 ; Math # Sm SMALL PLUS SIGN +FE63 ; Math # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Math # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Math # Po SMALL REVERSE SOLIDUS +FF0B ; Math # Sm FULLWIDTH PLUS SIGN +FF1C..FF1E ; Math # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF3C ; Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF5C ; Math # Sm FULLWIDTH VERTICAL LINE +FF5E ; Math # Sm FULLWIDTH TILDE +FFE2 ; Math # Sm FULLWIDTH NOT SIGN +FFE9..FFEC ; Math # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +1D400..1D454 ; Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Math # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Math # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Math # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Math # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Math # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Math # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Math # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Math # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Math # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 2310 + +# ================================================ + +# Derived Property: Alphabetic +# Generated from: Uppercase + Lowercase + Lt + Lm + Lo + Nl + Other_Alphabetic + +0041..005A ; Alphabetic # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Alphabetic # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Alphabetic # Lo FEMININE ORDINAL INDICATOR +00B5 ; Alphabetic # L& MICRO SIGN +00BA ; Alphabetic # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Alphabetic # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Alphabetic # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Alphabetic # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Alphabetic # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Alphabetic # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Alphabetic # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Alphabetic # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Alphabetic # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Alphabetic # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; Alphabetic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; Alphabetic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; Alphabetic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; Alphabetic # Lm MODIFIER LETTER VOICING +02EE ; Alphabetic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0345 ; Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +0370..0373 ; Alphabetic # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; Alphabetic # Lm GREEK NUMERAL SIGN +0376..0377 ; Alphabetic # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Alphabetic # Lm GREEK YPOGEGRAMMENI +037B..037D ; Alphabetic # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Alphabetic # L& GREEK CAPITAL LETTER YOT +0386 ; Alphabetic # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Alphabetic # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Alphabetic # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Alphabetic # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Alphabetic # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; Alphabetic # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Alphabetic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Alphabetic # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Alphabetic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; Alphabetic # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05B0..05BD ; Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Alphabetic # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; Alphabetic # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Alphabetic # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; Alphabetic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; Alphabetic # Lm ARABIC TATWEEL +0641..064A ; Alphabetic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..0657 ; Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +066E..066F ; Alphabetic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; Alphabetic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; Alphabetic # Lo ARABIC LETTER AE +06D6..06DC ; Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Alphabetic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Alphabetic # Mn ARABIC SMALL LOW MEEM +06EE..06EF ; Alphabetic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; Alphabetic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; Alphabetic # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; Alphabetic # Lo SYRIAC LETTER ALAPH +0711 ; Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; Alphabetic # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..073F ; Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +074D..07A5 ; Alphabetic # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; Alphabetic # Lo THAANA LETTER NAA +07CA..07EA ; Alphabetic # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; Alphabetic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; Alphabetic # Lm NKO LAJANYALAN +0800..0815 ; Alphabetic # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0817 ; Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081A ; Alphabetic # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Alphabetic # Lm SAMARITAN MODIFIER LETTER I +0829..082C ; Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +0840..0858 ; Alphabetic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; Alphabetic # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; Alphabetic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; Alphabetic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; Alphabetic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; Alphabetic # Lm ARABIC SMALL FARSI YEH +08D4..08DF ; Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Alphabetic # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Alphabetic # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093D ; Alphabetic # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; Alphabetic # Lo DEVANAGARI OM +0955..0957 ; Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; Alphabetic # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0971 ; Alphabetic # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; Alphabetic # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Alphabetic # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Alphabetic # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Alphabetic # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Alphabetic # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Alphabetic # Lo BENGALI LETTER LA +09B6..09B9 ; Alphabetic # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; Alphabetic # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; Alphabetic # Lo BENGALI LETTER KHANDA TA +09D7 ; Alphabetic # Mc BENGALI AU LENGTH MARK +09DC..09DD ; Alphabetic # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Alphabetic # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09F0..09F1 ; Alphabetic # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; Alphabetic # Lo BENGALI LETTER VEDIC ANUSVARA +0A01..0A02 ; Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Alphabetic # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Alphabetic # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Alphabetic # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Alphabetic # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Alphabetic # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Alphabetic # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Alphabetic # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Alphabetic # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Alphabetic # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; Alphabetic # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Alphabetic # Lo GURMUKHI LETTER FA +0A70..0A71 ; Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; Alphabetic # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Alphabetic # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Alphabetic # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Alphabetic # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Alphabetic # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Alphabetic # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Alphabetic # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Alphabetic # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; Alphabetic # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; Alphabetic # Lo GUJARATI OM +0AE0..0AE1 ; Alphabetic # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AF9 ; Alphabetic # Lo GUJARATI LETTER ZHA +0AFA..0AFC ; Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Alphabetic # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Alphabetic # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Alphabetic # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Alphabetic # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Alphabetic # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Alphabetic # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; Alphabetic # Lo ORIYA SIGN AVAGRAHA +0B3E ; Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Alphabetic # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; Alphabetic # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Alphabetic # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B71 ; Alphabetic # Lo ORIYA LETTER WA +0B82 ; Alphabetic # Mn TAMIL SIGN ANUSVARA +0B83 ; Alphabetic # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Alphabetic # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Alphabetic # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Alphabetic # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Alphabetic # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Alphabetic # Lo TAMIL LETTER JA +0B9E..0B9F ; Alphabetic # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Alphabetic # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Alphabetic # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Alphabetic # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; Alphabetic # Lo TAMIL OM +0BD7 ; Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; Alphabetic # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Alphabetic # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Alphabetic # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Alphabetic # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; Alphabetic # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; Alphabetic # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Alphabetic # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Alphabetic # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C80 ; Alphabetic # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; Alphabetic # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Alphabetic # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Alphabetic # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Alphabetic # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Alphabetic # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; Alphabetic # Lo KANNADA SIGN AVAGRAHA +0CBE ; Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; Alphabetic # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Alphabetic # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CF1..0CF2 ; Alphabetic # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D00..0D01 ; Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; Alphabetic # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Alphabetic # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Alphabetic # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; Alphabetic # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; Alphabetic # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; Alphabetic # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; Alphabetic # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D7A..0D7F ; Alphabetic # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; Alphabetic # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Alphabetic # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Alphabetic # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Alphabetic # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Alphabetic # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Alphabetic # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCF..0DD1 ; Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; Alphabetic # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; Alphabetic # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; Alphabetic # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Alphabetic # Lm THAI CHARACTER MAIYAMOK +0E4D ; Alphabetic # Mn THAI CHARACTER NIKHAHIT +0E81..0E82 ; Alphabetic # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Alphabetic # Lo LAO LETTER KHO TAM +0E86..0E8A ; Alphabetic # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Alphabetic # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Alphabetic # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; Alphabetic # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; Alphabetic # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EB9 ; Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0EBD ; Alphabetic # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Alphabetic # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Alphabetic # Lm LAO KO LA +0ECD ; Alphabetic # Mn LAO NIGGAHITA +0EDC..0EDF ; Alphabetic # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; Alphabetic # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; Alphabetic # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Alphabetic # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F88..0F8C ; Alphabetic # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +1000..102A ; Alphabetic # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; Alphabetic # Lo MYANMAR LETTER GREAT SA +1050..1055 ; Alphabetic # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; Alphabetic # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; Alphabetic # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Alphabetic # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Alphabetic # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; Alphabetic # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; Alphabetic # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; Alphabetic # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Alphabetic # L& GEORGIAN CAPITAL LETTER YN +10CD ; Alphabetic # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Alphabetic # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Alphabetic # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Alphabetic # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; Alphabetic # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Alphabetic # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Alphabetic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Alphabetic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Alphabetic # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Alphabetic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Alphabetic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Alphabetic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; Alphabetic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; Alphabetic # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Alphabetic # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; Alphabetic # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; Alphabetic # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; Alphabetic # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; Alphabetic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; Alphabetic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; Alphabetic # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; Alphabetic # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1713 ; Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +171F..1731 ; Alphabetic # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1740..1751 ; Alphabetic # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; Alphabetic # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Alphabetic # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; Alphabetic # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D7 ; Alphabetic # Lm KHMER SIGN LEK TOO +17DC ; Alphabetic # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; Alphabetic # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Alphabetic # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Alphabetic # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Alphabetic # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; Alphabetic # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; Alphabetic # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; Alphabetic # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; Alphabetic # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1950..196D ; Alphabetic # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Alphabetic # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; Alphabetic # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; Alphabetic # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; Alphabetic # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; Alphabetic # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1AA7 ; Alphabetic # Lm TAI THAM SIGN MAI YAMOK +1ABF..1AC0 ; Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ACC..1ACE ; Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Alphabetic # Mc BALINESE SIGN BISAH +1B05..1B33 ; Alphabetic # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B35 ; Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B45..1B4C ; Alphabetic # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B80..1B81 ; Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Alphabetic # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; Alphabetic # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; Alphabetic # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C00..1C23 ; Alphabetic # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36 ; Alphabetic # Mn LEPCHA SIGN RAN +1C4D..1C4F ; Alphabetic # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; Alphabetic # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Alphabetic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; Alphabetic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; Alphabetic # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Alphabetic # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; Alphabetic # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Alphabetic # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Alphabetic # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; Alphabetic # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; Alphabetic # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Alphabetic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Alphabetic # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Alphabetic # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Alphabetic # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Alphabetic # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DE7..1DF4 ; Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +1E00..1F15 ; Alphabetic # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Alphabetic # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Alphabetic # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Alphabetic # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Alphabetic # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Alphabetic # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Alphabetic # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Alphabetic # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Alphabetic # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Alphabetic # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Alphabetic # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Alphabetic # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Alphabetic # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Alphabetic # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Alphabetic # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Alphabetic # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Alphabetic # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Alphabetic # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Alphabetic # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL C +2107 ; Alphabetic # L& EULER CONSTANT +210A..2113 ; Alphabetic # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Alphabetic # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Alphabetic # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Alphabetic # L& OHM SIGN +2128 ; Alphabetic # L& BLACK-LETTER CAPITAL Z +212A..212D ; Alphabetic # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; Alphabetic # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; Alphabetic # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Alphabetic # L& INFORMATION SOURCE +213C..213F ; Alphabetic # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Alphabetic # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Alphabetic # L& TURNED SMALL F +2160..2182 ; Alphabetic # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Alphabetic # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Alphabetic # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +24B6..24E9 ; Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C7B ; Alphabetic # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Alphabetic # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Alphabetic # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; Alphabetic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Alphabetic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Alphabetic # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Alphabetic # L& GEORGIAN SMALL LETTER YN +2D2D ; Alphabetic # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Alphabetic # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Alphabetic # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; Alphabetic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E2F ; Alphabetic # Lm VERTICAL TILDE +3005 ; Alphabetic # Lm IDEOGRAPHIC ITERATION MARK +3006 ; Alphabetic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Alphabetic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Alphabetic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; Alphabetic # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; Alphabetic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Alphabetic # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; Alphabetic # Lo MASU MARK +3041..3096 ; Alphabetic # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; Alphabetic # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Alphabetic # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; Alphabetic # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; Alphabetic # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Alphabetic # Lo KATAKANA DIGRAPH KOTO +3105..312F ; Alphabetic # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; Alphabetic # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; Alphabetic # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; Alphabetic # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; Alphabetic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; Alphabetic # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; Alphabetic # Lm YI SYLLABLE WU +A016..A48C ; Alphabetic # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; Alphabetic # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Alphabetic # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; Alphabetic # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Alphabetic # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; Alphabetic # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; Alphabetic # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; Alphabetic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Alphabetic # Lo CYRILLIC LETTER MULTIOCULAR O +A674..A67B ; Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A67F ; Alphabetic # Lm CYRILLIC PAYEROK +A680..A69B ; Alphabetic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Alphabetic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; Alphabetic # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Alphabetic # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; Alphabetic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; Alphabetic # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Alphabetic # Lm MODIFIER LETTER US +A771..A787 ; Alphabetic # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; Alphabetic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; Alphabetic # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; Alphabetic # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; Alphabetic # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; Alphabetic # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Alphabetic # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; Alphabetic # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Alphabetic # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Alphabetic # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; Alphabetic # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; Alphabetic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Alphabetic # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; Alphabetic # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; Alphabetic # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; Alphabetic # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; Alphabetic # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A840..A873 ; Alphabetic # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Alphabetic # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A8F2..A8F7 ; Alphabetic # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; Alphabetic # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; Alphabetic # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; Alphabetic # Mn DEVANAGARI VOWEL SIGN AY +A90A..A925 ; Alphabetic # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92A ; Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A930..A946 ; Alphabetic # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Alphabetic # Mc REJANG CONSONANT SIGN H +A960..A97C ; Alphabetic # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Alphabetic # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Alphabetic # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9BF ; Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9CF ; Alphabetic # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; Alphabetic # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; Alphabetic # Mn MYANMAR SIGN SHAN SAW +A9E6 ; Alphabetic # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; Alphabetic # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; Alphabetic # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; Alphabetic # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; Alphabetic # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; Alphabetic # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AA60..AA6F ; Alphabetic # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Alphabetic # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Alphabetic # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; Alphabetic # Lo MYANMAR LETTER AITON RA +AA7B ; Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; Alphabetic # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; Alphabetic # Mn TAI VIET MAI KANG +AAB1 ; Alphabetic # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; Alphabetic # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; Alphabetic # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE ; Alphabetic # Mn TAI VIET VOWEL AM +AAC0 ; Alphabetic # Lo TAI VIET TONE MAI NUENG +AAC2 ; Alphabetic # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Alphabetic # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Alphabetic # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; Alphabetic # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; Alphabetic # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Alphabetic # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Alphabetic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; Alphabetic # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Alphabetic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Alphabetic # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Alphabetic # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; Alphabetic # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; Alphabetic # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +AC00..D7A3 ; Alphabetic # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Alphabetic # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Alphabetic # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; Alphabetic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Alphabetic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; Alphabetic # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Alphabetic # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; Alphabetic # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; Alphabetic # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; Alphabetic # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Alphabetic # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Alphabetic # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Alphabetic # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Alphabetic # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; Alphabetic # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; Alphabetic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; Alphabetic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Alphabetic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; Alphabetic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; Alphabetic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Alphabetic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; Alphabetic # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Alphabetic # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; Alphabetic # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Alphabetic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Alphabetic # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; Alphabetic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; Alphabetic # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Alphabetic # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Alphabetic # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; Alphabetic # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Alphabetic # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Alphabetic # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Alphabetic # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Alphabetic # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Alphabetic # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Alphabetic # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; Alphabetic # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; Alphabetic # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; Alphabetic # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; Alphabetic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; Alphabetic # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; Alphabetic # Nl GOTHIC LETTER NINETY +10342..10349 ; Alphabetic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Alphabetic # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; Alphabetic # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; Alphabetic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; Alphabetic # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Alphabetic # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; Alphabetic # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; Alphabetic # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; Alphabetic # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; Alphabetic # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Alphabetic # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; Alphabetic # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; Alphabetic # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; Alphabetic # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Alphabetic # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Alphabetic # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Alphabetic # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Alphabetic # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Alphabetic # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Alphabetic # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Alphabetic # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; Alphabetic # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Alphabetic # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Alphabetic # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; Alphabetic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Alphabetic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Alphabetic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; Alphabetic # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Alphabetic # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Alphabetic # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Alphabetic # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Alphabetic # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; Alphabetic # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; Alphabetic # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; Alphabetic # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; Alphabetic # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Alphabetic # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; Alphabetic # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; Alphabetic # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; Alphabetic # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; Alphabetic # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; Alphabetic # Lo KHAROSHTHI LETTER A +10A01..10A03 ; Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; Alphabetic # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Alphabetic # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Alphabetic # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; Alphabetic # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; Alphabetic # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; Alphabetic # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; Alphabetic # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; Alphabetic # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; Alphabetic # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; Alphabetic # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; Alphabetic # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; Alphabetic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; Alphabetic # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Alphabetic # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; Alphabetic # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10E80..10EA9 ; Alphabetic # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; Alphabetic # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; Alphabetic # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; Alphabetic # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; Alphabetic # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; Alphabetic # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; Alphabetic # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; Alphabetic # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Alphabetic # Mc BRAHMI SIGN VISARGA +11003..11037 ; Alphabetic # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11045 ; Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11071..11072 ; Alphabetic # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; Alphabetic # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; Alphabetic # Mc KAITHI SIGN VISARGA +11083..110AF ; Alphabetic # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110C2 ; Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; Alphabetic # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11100..11102 ; Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Alphabetic # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11144 ; Alphabetic # Lo CHAKMA LETTER LHAA +11145..11146 ; Alphabetic # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; Alphabetic # Lo CHAKMA LETTER VAA +11150..11172 ; Alphabetic # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; Alphabetic # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Alphabetic # Mc SHARADA SIGN VISARGA +11183..111B2 ; Alphabetic # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Alphabetic # Mc SHARADA VOWEL SIGN AU +111C1..111C4 ; Alphabetic # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111CE ; Alphabetic # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Alphabetic # Mn SHARADA SIGN INVERTED CANDRABINDU +111DA ; Alphabetic # Lo SHARADA EKAM +111DC ; Alphabetic # Lo SHARADA HEADSTROKE +11200..11211 ; Alphabetic # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Alphabetic # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Alphabetic # Mn KHOJKI SIGN SUKUN +11280..11286 ; Alphabetic # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Alphabetic # Lo MULTANI LETTER GHA +1128A..1128D ; Alphabetic # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Alphabetic # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Alphabetic # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; Alphabetic # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; Alphabetic # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Alphabetic # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Alphabetic # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Alphabetic # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Alphabetic # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Alphabetic # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; Alphabetic # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11350 ; Alphabetic # Lo GRANTHA OM +11357 ; Alphabetic # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; Alphabetic # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11400..11434 ; Alphabetic # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Alphabetic # Mc NEWA SIGN VISARGA +11447..1144A ; Alphabetic # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; Alphabetic # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; Alphabetic # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Alphabetic # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; Alphabetic # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; Alphabetic # Lo TIRHUTA OM +11580..115AE ; Alphabetic # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Alphabetic # Mc SIDDHAM SIGN VISARGA +115D8..115DB ; Alphabetic # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; Alphabetic # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Alphabetic # Mc MODI SIGN VISARGA +11640 ; Alphabetic # Mn MODI SIGN ARDHACANDRA +11644 ; Alphabetic # Lo MODI SIGN HUVA +11680..116AA ; Alphabetic # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B8 ; Alphabetic # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; Alphabetic # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +11740..11746 ; Alphabetic # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; Alphabetic # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Alphabetic # Mc DOGRA SIGN VISARGA +118A0..118DF ; Alphabetic # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; Alphabetic # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; Alphabetic # Lo DIVES AKURU LETTER O +1190C..11913 ; Alphabetic # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Alphabetic # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Alphabetic # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; Alphabetic # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Alphabetic # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Alphabetic # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193F ; Alphabetic # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; Alphabetic # Mc DIVES AKURU MEDIAL YA +11941 ; Alphabetic # Lo DIVES AKURU INITIAL RA +11942 ; Alphabetic # Mc DIVES AKURU MEDIAL RA +119A0..119A7 ; Alphabetic # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Alphabetic # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; Alphabetic # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; Alphabetic # Lo NANDINAGARI HEADSTROKE +119E4 ; Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; Alphabetic # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; Alphabetic # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A35..11A38 ; Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; Alphabetic # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A50 ; Alphabetic # Lo SOYOMBO LETTER A +11A51..11A56 ; Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; Alphabetic # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Alphabetic # Mc SOYOMBO SIGN VISARGA +11A9D ; Alphabetic # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; Alphabetic # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; Alphabetic # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Alphabetic # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C40 ; Alphabetic # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; Alphabetic # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; Alphabetic # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Alphabetic # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Alphabetic # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D46 ; Alphabetic # Lo MASARAM GONDI REPHA +11D47 ; Alphabetic # Mn MASARAM GONDI RA-KARA +11D60..11D65 ; Alphabetic # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Alphabetic # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Alphabetic # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; Alphabetic # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Alphabetic # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Alphabetic # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Alphabetic # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; Alphabetic # Lo GUNJALA GONDI OM +11EE0..11EF2 ; Alphabetic # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11FB0 ; Alphabetic # Lo LISU LETTER YHA +12000..12399 ; Alphabetic # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; Alphabetic # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; Alphabetic # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; Alphabetic # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342E ; Alphabetic # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; Alphabetic # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; Alphabetic # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; Alphabetic # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; Alphabetic # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; Alphabetic # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; Alphabetic # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; Alphabetic # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; Alphabetic # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Alphabetic # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; Alphabetic # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; Alphabetic # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; Alphabetic # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Alphabetic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Alphabetic # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Alphabetic # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; Alphabetic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Alphabetic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; Alphabetic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; Alphabetic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Alphabetic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Alphabetic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; Alphabetic # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; Alphabetic # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; Alphabetic # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; Alphabetic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; Alphabetic # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Alphabetic # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Alphabetic # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Alphabetic # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9E ; Alphabetic # Mn DUPLOYAN DOUBLE MARK +1D400..1D454 ; Alphabetic # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Alphabetic # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Alphabetic # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Alphabetic # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Alphabetic # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Alphabetic # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Alphabetic # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Alphabetic # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Alphabetic # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Alphabetic # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Alphabetic # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Alphabetic # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Alphabetic # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Alphabetic # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Alphabetic # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Alphabetic # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Alphabetic # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Alphabetic # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Alphabetic # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Alphabetic # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Alphabetic # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Alphabetic # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Alphabetic # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Alphabetic # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Alphabetic # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Alphabetic # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Alphabetic # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Alphabetic # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Alphabetic # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Alphabetic # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; Alphabetic # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; Alphabetic # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E000..1E006 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E100..1E12C ; Alphabetic # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Alphabetic # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; Alphabetic # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; Alphabetic # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Alphabetic # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E7E0..1E7E6 ; Alphabetic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Alphabetic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Alphabetic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Alphabetic # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; Alphabetic # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; Alphabetic # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E947 ; Alphabetic # Mn ADLAM HAMZA +1E94B ; Alphabetic # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Alphabetic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Alphabetic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Alphabetic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Alphabetic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Alphabetic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Alphabetic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Alphabetic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Alphabetic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Alphabetic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Alphabetic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Alphabetic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Alphabetic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Alphabetic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1F130..1F149 ; Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z +20000..2A6DF ; Alphabetic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Alphabetic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; Alphabetic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Alphabetic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Alphabetic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Alphabetic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Alphabetic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 133396 + +# ================================================ + +# Derived Property: Lowercase +# Generated from: Ll + Other_Lowercase + +0061..007A ; Lowercase # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Lowercase # Lo FEMININE ORDINAL INDICATOR +00B5 ; Lowercase # L& MICRO SIGN +00BA ; Lowercase # Lo MASCULINE ORDINAL INDICATOR +00DF..00F6 ; Lowercase # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Lowercase # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Lowercase # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Lowercase # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Lowercase # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Lowercase # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Lowercase # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Lowercase # L& LATIN SMALL LETTER C WITH CARON +010F ; Lowercase # L& LATIN SMALL LETTER D WITH CARON +0111 ; Lowercase # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Lowercase # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Lowercase # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Lowercase # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Lowercase # L& LATIN SMALL LETTER E WITH CARON +011D ; Lowercase # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Lowercase # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Lowercase # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Lowercase # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Lowercase # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Lowercase # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Lowercase # L& LATIN SMALL LETTER I WITH TILDE +012B ; Lowercase # L& LATIN SMALL LETTER I WITH MACRON +012D ; Lowercase # L& LATIN SMALL LETTER I WITH BREVE +012F ; Lowercase # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Lowercase # L& LATIN SMALL LETTER DOTLESS I +0133 ; Lowercase # L& LATIN SMALL LIGATURE IJ +0135 ; Lowercase # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137..0138 ; Lowercase # L& [2] LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA +013A ; Lowercase # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Lowercase # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Lowercase # L& LATIN SMALL LETTER L WITH CARON +0140 ; Lowercase # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Lowercase # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Lowercase # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Lowercase # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Lowercase # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Lowercase # L& LATIN SMALL LETTER ENG +014D ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON +014F ; Lowercase # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Lowercase # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Lowercase # L& LATIN SMALL LIGATURE OE +0155 ; Lowercase # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Lowercase # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Lowercase # L& LATIN SMALL LETTER R WITH CARON +015B ; Lowercase # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Lowercase # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Lowercase # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Lowercase # L& LATIN SMALL LETTER S WITH CARON +0163 ; Lowercase # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Lowercase # L& LATIN SMALL LETTER T WITH CARON +0167 ; Lowercase # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE +016B ; Lowercase # L& LATIN SMALL LETTER U WITH MACRON +016D ; Lowercase # L& LATIN SMALL LETTER U WITH BREVE +016F ; Lowercase # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Lowercase # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Lowercase # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Lowercase # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Lowercase # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Lowercase # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Lowercase # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Lowercase # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Lowercase # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Lowercase # L& LATIN SMALL LETTER TONE SIX +0188 ; Lowercase # L& LATIN SMALL LETTER C WITH HOOK +018C..018D ; Lowercase # L& [2] LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA +0192 ; Lowercase # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Lowercase # L& LATIN SMALL LETTER HV +0199..019B ; Lowercase # L& [3] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE +019E ; Lowercase # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Lowercase # L& LATIN SMALL LETTER OI +01A5 ; Lowercase # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Lowercase # L& LATIN SMALL LETTER TONE TWO +01AA..01AB ; Lowercase # L& [2] LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK +01AD ; Lowercase # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Lowercase # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Lowercase # L& LATIN SMALL LETTER Z WITH STROKE +01B9..01BA ; Lowercase # L& [2] LATIN SMALL LETTER EZH REVERSED..LATIN SMALL LETTER EZH WITH TAIL +01BD..01BF ; Lowercase # L& [3] LATIN SMALL LETTER TONE FIVE..LATIN LETTER WYNN +01C6 ; Lowercase # L& LATIN SMALL LETTER DZ WITH CARON +01C9 ; Lowercase # L& LATIN SMALL LETTER LJ +01CC ; Lowercase # L& LATIN SMALL LETTER NJ +01CE ; Lowercase # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Lowercase # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Lowercase # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Lowercase # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Lowercase # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Lowercase # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Lowercase # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Lowercase # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Lowercase # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Lowercase # L& LATIN SMALL LETTER K WITH CARON +01EB ; Lowercase # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Lowercase # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F0 ; Lowercase # L& [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON +01F3 ; Lowercase # L& LATIN SMALL LETTER DZ +01F5 ; Lowercase # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Lowercase # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Lowercase # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Lowercase # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Lowercase # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Lowercase # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Lowercase # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Lowercase # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Lowercase # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Lowercase # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Lowercase # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Lowercase # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Lowercase # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Lowercase # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Lowercase # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Lowercase # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Lowercase # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Lowercase # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Lowercase # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Lowercase # L& LATIN SMALL LETTER YOGH +021F ; Lowercase # L& LATIN SMALL LETTER H WITH CARON +0221 ; Lowercase # L& LATIN SMALL LETTER D WITH CURL +0223 ; Lowercase # L& LATIN SMALL LETTER OU +0225 ; Lowercase # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Lowercase # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Lowercase # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Lowercase # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Lowercase # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233..0239 ; Lowercase # L& [7] LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH +023C ; Lowercase # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Lowercase # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Lowercase # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Lowercase # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Lowercase # L& LATIN SMALL LETTER J WITH STROKE +024B ; Lowercase # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Lowercase # L& LATIN SMALL LETTER R WITH STROKE +024F..0293 ; Lowercase # L& [69] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER EZH WITH CURL +0295..02AF ; Lowercase # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Lowercase # L& GREEK SMALL LETTER HETA +0373 ; Lowercase # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Lowercase # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Lowercase # Lm GREEK YPOGEGRAMMENI +037B..037D ; Lowercase # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Lowercase # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Lowercase # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Lowercase # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Lowercase # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Lowercase # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Lowercase # L& GREEK SMALL LETTER STIGMA +03DD ; Lowercase # L& GREEK SMALL LETTER DIGAMMA +03DF ; Lowercase # L& GREEK SMALL LETTER KOPPA +03E1 ; Lowercase # L& GREEK SMALL LETTER SAMPI +03E3 ; Lowercase # L& COPTIC SMALL LETTER SHEI +03E5 ; Lowercase # L& COPTIC SMALL LETTER FEI +03E7 ; Lowercase # L& COPTIC SMALL LETTER KHEI +03E9 ; Lowercase # L& COPTIC SMALL LETTER HORI +03EB ; Lowercase # L& COPTIC SMALL LETTER GANGIA +03ED ; Lowercase # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Lowercase # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Lowercase # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Lowercase # L& GREEK SMALL LETTER SHO +03FB..03FC ; Lowercase # L& [2] GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL +0430..045F ; Lowercase # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Lowercase # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Lowercase # L& CYRILLIC SMALL LETTER YAT +0465 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Lowercase # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Lowercase # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Lowercase # L& CYRILLIC SMALL LETTER KSI +0471 ; Lowercase # L& CYRILLIC SMALL LETTER PSI +0473 ; Lowercase # L& CYRILLIC SMALL LETTER FITA +0475 ; Lowercase # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Lowercase # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Lowercase # L& CYRILLIC SMALL LETTER UK +047B ; Lowercase # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Lowercase # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Lowercase # L& CYRILLIC SMALL LETTER OT +0481 ; Lowercase # L& CYRILLIC SMALL LETTER KOPPA +048B ; Lowercase # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Lowercase # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Lowercase # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Lowercase # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Lowercase # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Lowercase # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Lowercase # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Lowercase # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Lowercase # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Lowercase # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Lowercase # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Lowercase # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Lowercase # L& CYRILLIC SMALL LETTER SHHA +04BD ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Lowercase # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Lowercase # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Lowercase # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Lowercase # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Lowercase # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Lowercase # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Lowercase # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Lowercase # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Lowercase # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Lowercase # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Lowercase # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Lowercase # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Lowercase # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Lowercase # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Lowercase # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Lowercase # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Lowercase # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Lowercase # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Lowercase # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Lowercase # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Lowercase # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Lowercase # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Lowercase # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Lowercase # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Lowercase # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Lowercase # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Lowercase # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Lowercase # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Lowercase # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Lowercase # L& CYRILLIC SMALL LETTER LHA +0517 ; Lowercase # L& CYRILLIC SMALL LETTER RHA +0519 ; Lowercase # L& CYRILLIC SMALL LETTER YAE +051B ; Lowercase # L& CYRILLIC SMALL LETTER QA +051D ; Lowercase # L& CYRILLIC SMALL LETTER WE +051F ; Lowercase # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Lowercase # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Lowercase # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Lowercase # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Lowercase # L& CYRILLIC SMALL LETTER DZZHE +052D ; Lowercase # L& CYRILLIC SMALL LETTER DCHE +052F ; Lowercase # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0560..0588 ; Lowercase # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10D0..10FA ; Lowercase # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Lowercase # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13F8..13FD ; Lowercase # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Lowercase # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1D00..1D2B ; Lowercase # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Lowercase # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Lowercase # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E01 ; Lowercase # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Lowercase # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Lowercase # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Lowercase # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Lowercase # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Lowercase # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Lowercase # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Lowercase # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Lowercase # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Lowercase # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Lowercase # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Lowercase # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Lowercase # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Lowercase # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Lowercase # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Lowercase # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Lowercase # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Lowercase # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Lowercase # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Lowercase # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Lowercase # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Lowercase # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Lowercase # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Lowercase # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Lowercase # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Lowercase # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Lowercase # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Lowercase # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Lowercase # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Lowercase # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Lowercase # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Lowercase # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Lowercase # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Lowercase # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Lowercase # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Lowercase # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Lowercase # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Lowercase # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Lowercase # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Lowercase # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Lowercase # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Lowercase # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Lowercase # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Lowercase # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Lowercase # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Lowercase # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Lowercase # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Lowercase # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Lowercase # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Lowercase # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Lowercase # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Lowercase # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Lowercase # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Lowercase # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Lowercase # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Lowercase # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Lowercase # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Lowercase # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Lowercase # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Lowercase # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Lowercase # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Lowercase # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Lowercase # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Lowercase # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Lowercase # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Lowercase # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Lowercase # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9D ; Lowercase # L& [9] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH HIGH STROKE +1E9F ; Lowercase # L& LATIN SMALL LETTER DELTA +1EA1 ; Lowercase # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Lowercase # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Lowercase # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Lowercase # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Lowercase # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Lowercase # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Lowercase # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Lowercase # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Lowercase # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Lowercase # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Lowercase # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Lowercase # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Lowercase # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Lowercase # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Lowercase # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Lowercase # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Lowercase # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Lowercase # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Lowercase # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Lowercase # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Lowercase # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Lowercase # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Lowercase # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Lowercase # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Lowercase # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Lowercase # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Lowercase # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Lowercase # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Lowercase # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Lowercase # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1F87 ; Lowercase # L& [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F90..1F97 ; Lowercase # L& [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA0..1FA7 ; Lowercase # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FB0..1FB4 ; Lowercase # L& [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Lowercase # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBE ; Lowercase # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Lowercase # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Lowercase # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FD0..1FD3 ; Lowercase # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Lowercase # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Lowercase # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Lowercase # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Lowercase # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +2071 ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +210A ; Lowercase # L& SCRIPT SMALL G +210E..210F ; Lowercase # L& [2] PLANCK CONSTANT..PLANCK CONSTANT OVER TWO PI +2113 ; Lowercase # L& SCRIPT SMALL L +212F ; Lowercase # L& SCRIPT SMALL E +2134 ; Lowercase # L& SCRIPT SMALL O +2139 ; Lowercase # L& INFORMATION SOURCE +213C..213D ; Lowercase # L& [2] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK SMALL GAMMA +2146..2149 ; Lowercase # L& [4] DOUBLE-STRUCK ITALIC SMALL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Lowercase # L& TURNED SMALL F +2170..217F ; Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Lowercase # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Lowercase # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Lowercase # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Lowercase # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Lowercase # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Lowercase # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Lowercase # L& LATIN SMALL LETTER Z WITH DESCENDER +2C71 ; Lowercase # L& LATIN SMALL LETTER V WITH RIGHT HOOK +2C73..2C74 ; Lowercase # L& [2] LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL +2C76..2C7B ; Lowercase # L& [6] LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C81 ; Lowercase # L& COPTIC SMALL LETTER ALFA +2C83 ; Lowercase # L& COPTIC SMALL LETTER VIDA +2C85 ; Lowercase # L& COPTIC SMALL LETTER GAMMA +2C87 ; Lowercase # L& COPTIC SMALL LETTER DALDA +2C89 ; Lowercase # L& COPTIC SMALL LETTER EIE +2C8B ; Lowercase # L& COPTIC SMALL LETTER SOU +2C8D ; Lowercase # L& COPTIC SMALL LETTER ZATA +2C8F ; Lowercase # L& COPTIC SMALL LETTER HATE +2C91 ; Lowercase # L& COPTIC SMALL LETTER THETHE +2C93 ; Lowercase # L& COPTIC SMALL LETTER IAUDA +2C95 ; Lowercase # L& COPTIC SMALL LETTER KAPA +2C97 ; Lowercase # L& COPTIC SMALL LETTER LAULA +2C99 ; Lowercase # L& COPTIC SMALL LETTER MI +2C9B ; Lowercase # L& COPTIC SMALL LETTER NI +2C9D ; Lowercase # L& COPTIC SMALL LETTER KSI +2C9F ; Lowercase # L& COPTIC SMALL LETTER O +2CA1 ; Lowercase # L& COPTIC SMALL LETTER PI +2CA3 ; Lowercase # L& COPTIC SMALL LETTER RO +2CA5 ; Lowercase # L& COPTIC SMALL LETTER SIMA +2CA7 ; Lowercase # L& COPTIC SMALL LETTER TAU +2CA9 ; Lowercase # L& COPTIC SMALL LETTER UA +2CAB ; Lowercase # L& COPTIC SMALL LETTER FI +2CAD ; Lowercase # L& COPTIC SMALL LETTER KHI +2CAF ; Lowercase # L& COPTIC SMALL LETTER PSI +2CB1 ; Lowercase # L& COPTIC SMALL LETTER OOU +2CB3 ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Lowercase # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Lowercase # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Lowercase # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Lowercase # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Lowercase # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Lowercase # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Lowercase # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3..2CE4 ; Lowercase # L& [2] COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI +2CEC ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Lowercase # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Lowercase # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Lowercase # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Lowercase # L& GEORGIAN SMALL LETTER YN +2D2D ; Lowercase # L& GEORGIAN SMALL LETTER AEN +A641 ; Lowercase # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Lowercase # L& CYRILLIC SMALL LETTER DZELO +A645 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Lowercase # L& CYRILLIC SMALL LETTER IOTA +A649 ; Lowercase # L& CYRILLIC SMALL LETTER DJERV +A64B ; Lowercase # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Lowercase # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Lowercase # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Lowercase # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Lowercase # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Lowercase # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Lowercase # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Lowercase # L& CYRILLIC SMALL LETTER YN +A661 ; Lowercase # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Lowercase # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Lowercase # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Lowercase # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Lowercase # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Lowercase # L& CYRILLIC SMALL LETTER DWE +A683 ; Lowercase # L& CYRILLIC SMALL LETTER DZWE +A685 ; Lowercase # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Lowercase # L& CYRILLIC SMALL LETTER CCHE +A689 ; Lowercase # L& CYRILLIC SMALL LETTER DZZE +A68B ; Lowercase # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Lowercase # L& CYRILLIC SMALL LETTER TWE +A68F ; Lowercase # L& CYRILLIC SMALL LETTER TSWE +A691 ; Lowercase # L& CYRILLIC SMALL LETTER TSSE +A693 ; Lowercase # L& CYRILLIC SMALL LETTER TCHE +A695 ; Lowercase # L& CYRILLIC SMALL LETTER HWE +A697 ; Lowercase # L& CYRILLIC SMALL LETTER SHWE +A699 ; Lowercase # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Lowercase # L& CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A723 ; Lowercase # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Lowercase # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Lowercase # L& LATIN SMALL LETTER HENG +A729 ; Lowercase # L& LATIN SMALL LETTER TZ +A72B ; Lowercase # L& LATIN SMALL LETTER TRESILLO +A72D ; Lowercase # L& LATIN SMALL LETTER CUATRILLO +A72F..A731 ; Lowercase # L& [3] LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S +A733 ; Lowercase # L& LATIN SMALL LETTER AA +A735 ; Lowercase # L& LATIN SMALL LETTER AO +A737 ; Lowercase # L& LATIN SMALL LETTER AU +A739 ; Lowercase # L& LATIN SMALL LETTER AV +A73B ; Lowercase # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Lowercase # L& LATIN SMALL LETTER AY +A73F ; Lowercase # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Lowercase # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Lowercase # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Lowercase # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Lowercase # L& LATIN SMALL LETTER BROKEN L +A749 ; Lowercase # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Lowercase # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Lowercase # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Lowercase # L& LATIN SMALL LETTER OO +A751 ; Lowercase # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Lowercase # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Lowercase # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Lowercase # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Lowercase # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Lowercase # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Lowercase # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Lowercase # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Lowercase # L& LATIN SMALL LETTER VY +A763 ; Lowercase # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Lowercase # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Lowercase # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Lowercase # L& LATIN SMALL LETTER VEND +A76B ; Lowercase # L& LATIN SMALL LETTER ET +A76D ; Lowercase # L& LATIN SMALL LETTER IS +A76F ; Lowercase # L& LATIN SMALL LETTER CON +A770 ; Lowercase # Lm MODIFIER LETTER US +A771..A778 ; Lowercase # L& [8] LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM +A77A ; Lowercase # L& LATIN SMALL LETTER INSULAR D +A77C ; Lowercase # L& LATIN SMALL LETTER INSULAR F +A77F ; Lowercase # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Lowercase # L& LATIN SMALL LETTER TURNED L +A783 ; Lowercase # L& LATIN SMALL LETTER INSULAR R +A785 ; Lowercase # L& LATIN SMALL LETTER INSULAR S +A787 ; Lowercase # L& LATIN SMALL LETTER INSULAR T +A78C ; Lowercase # L& LATIN SMALL LETTER SALTILLO +A78E ; Lowercase # L& LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A791 ; Lowercase # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A795 ; Lowercase # L& [3] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER H WITH PALATAL HOOK +A797 ; Lowercase # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Lowercase # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Lowercase # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Lowercase # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Lowercase # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Lowercase # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Lowercase # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Lowercase # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Lowercase # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Lowercase # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7AF ; Lowercase # L& LATIN LETTER SMALL CAPITAL Q +A7B5 ; Lowercase # L& LATIN SMALL LETTER BETA +A7B7 ; Lowercase # L& LATIN SMALL LETTER OMEGA +A7B9 ; Lowercase # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Lowercase # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Lowercase # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Lowercase # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Lowercase # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Lowercase # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Lowercase # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Lowercase # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D1 ; Lowercase # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Lowercase # L& LATIN SMALL LETTER DOUBLE THORN +A7D5 ; Lowercase # L& LATIN SMALL LETTER DOUBLE WYNN +A7D7 ; Lowercase # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Lowercase # L& LATIN SMALL LETTER SIGMOID S +A7F6 ; Lowercase # L& LATIN SMALL LETTER REVERSED HALF H +A7F8..A7F9 ; Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Lowercase # L& LATIN LETTER SMALL CAPITAL TURNED M +AB30..AB5A ; Lowercase # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Lowercase # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB70..ABBF ; Lowercase # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Lowercase # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Lowercase # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Lowercase # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Lowercase # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Lowercase # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Lowercase # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Lowercase # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Lowercase # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Lowercase # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10780 ; Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10CC0..10CF2 ; Lowercase # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +118C0..118DF ; Lowercase # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Lowercase # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1D41A..1D433 ; Lowercase # L& [26] MATHEMATICAL BOLD SMALL A..MATHEMATICAL BOLD SMALL Z +1D44E..1D454 ; Lowercase # L& [7] MATHEMATICAL ITALIC SMALL A..MATHEMATICAL ITALIC SMALL G +1D456..1D467 ; Lowercase # L& [18] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL Z +1D482..1D49B ; Lowercase # L& [26] MATHEMATICAL BOLD ITALIC SMALL A..MATHEMATICAL BOLD ITALIC SMALL Z +1D4B6..1D4B9 ; Lowercase # L& [4] MATHEMATICAL SCRIPT SMALL A..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Lowercase # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Lowercase # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D4CF ; Lowercase # L& [11] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL SCRIPT SMALL Z +1D4EA..1D503 ; Lowercase # L& [26] MATHEMATICAL BOLD SCRIPT SMALL A..MATHEMATICAL BOLD SCRIPT SMALL Z +1D51E..1D537 ; Lowercase # L& [26] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL FRAKTUR SMALL Z +1D552..1D56B ; Lowercase # L& [26] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL DOUBLE-STRUCK SMALL Z +1D586..1D59F ; Lowercase # L& [26] MATHEMATICAL BOLD FRAKTUR SMALL A..MATHEMATICAL BOLD FRAKTUR SMALL Z +1D5BA..1D5D3 ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF SMALL A..MATHEMATICAL SANS-SERIF SMALL Z +1D5EE..1D607 ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF BOLD SMALL A..MATHEMATICAL SANS-SERIF BOLD SMALL Z +1D622..1D63B ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF ITALIC SMALL A..MATHEMATICAL SANS-SERIF ITALIC SMALL Z +1D656..1D66F ; Lowercase # L& [26] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL A..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL Z +1D68A..1D6A5 ; Lowercase # L& [28] MATHEMATICAL MONOSPACE SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6C2..1D6DA ; Lowercase # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6E1 ; Lowercase # L& [6] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL BOLD PI SYMBOL +1D6FC..1D714 ; Lowercase # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D71B ; Lowercase # L& [6] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL ITALIC PI SYMBOL +1D736..1D74E ; Lowercase # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D755 ; Lowercase # L& [6] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC PI SYMBOL +1D770..1D788 ; Lowercase # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D78F ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD PI SYMBOL +1D7AA..1D7C2 ; Lowercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7C9 ; Lowercase # L& [6] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC PI SYMBOL +1D7CB ; Lowercase # L& MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Lowercase # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Lowercase # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E922..1E943 ; Lowercase # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 2471 + +# ================================================ + +# Derived Property: Uppercase +# Generated from: Lu + Other_Uppercase + +0041..005A ; Uppercase # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00C0..00D6 ; Uppercase # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DE ; Uppercase # L& [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN +0100 ; Uppercase # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Uppercase # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Uppercase # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Uppercase # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Uppercase # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Uppercase # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Uppercase # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Uppercase # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Uppercase # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Uppercase # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Uppercase # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Uppercase # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Uppercase # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Uppercase # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Uppercase # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Uppercase # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Uppercase # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Uppercase # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Uppercase # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Uppercase # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Uppercase # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Uppercase # L& LATIN CAPITAL LIGATURE IJ +0134 ; Uppercase # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Uppercase # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Uppercase # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Uppercase # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Uppercase # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Uppercase # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Uppercase # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Uppercase # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Uppercase # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Uppercase # L& LATIN CAPITAL LETTER N WITH CARON +014A ; Uppercase # L& LATIN CAPITAL LETTER ENG +014C ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Uppercase # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Uppercase # L& LATIN CAPITAL LIGATURE OE +0154 ; Uppercase # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Uppercase # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Uppercase # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Uppercase # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Uppercase # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Uppercase # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Uppercase # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Uppercase # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Uppercase # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Uppercase # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Uppercase # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Uppercase # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Uppercase # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Uppercase # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Uppercase # L& LATIN CAPITAL LETTER Z WITH CARON +0181..0182 ; Uppercase # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Uppercase # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Uppercase # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Uppercase # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Uppercase # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Uppercase # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Uppercase # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Uppercase # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Uppercase # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Uppercase # L& LATIN CAPITAL LETTER OI +01A4 ; Uppercase # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Uppercase # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Uppercase # L& LATIN CAPITAL LETTER ESH +01AC ; Uppercase # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Uppercase # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Uppercase # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Uppercase # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Uppercase # L& LATIN CAPITAL LETTER TONE FIVE +01C4 ; Uppercase # L& LATIN CAPITAL LETTER DZ WITH CARON +01C7 ; Uppercase # L& LATIN CAPITAL LETTER LJ +01CA ; Uppercase # L& LATIN CAPITAL LETTER NJ +01CD ; Uppercase # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Uppercase # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Uppercase # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Uppercase # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Uppercase # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Uppercase # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Uppercase # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Uppercase # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Uppercase # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Uppercase # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Uppercase # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1 ; Uppercase # L& LATIN CAPITAL LETTER DZ +01F4 ; Uppercase # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Uppercase # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Uppercase # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Uppercase # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Uppercase # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Uppercase # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Uppercase # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Uppercase # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Uppercase # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Uppercase # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Uppercase # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Uppercase # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Uppercase # L& LATIN CAPITAL LETTER YOGH +021E ; Uppercase # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Uppercase # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Uppercase # L& LATIN CAPITAL LETTER OU +0224 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Uppercase # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Uppercase # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Uppercase # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Uppercase # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Uppercase # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Uppercase # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Uppercase # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Uppercase # L& LATIN CAPITAL LETTER Y WITH STROKE +0370 ; Uppercase # L& GREEK CAPITAL LETTER HETA +0372 ; Uppercase # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Uppercase # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Uppercase # L& GREEK CAPITAL LETTER YOT +0386 ; Uppercase # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Uppercase # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Uppercase # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Uppercase # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Uppercase # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Uppercase # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03CF ; Uppercase # L& GREEK CAPITAL KAI SYMBOL +03D2..03D4 ; Uppercase # L& [3] GREEK UPSILON WITH HOOK SYMBOL..GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL +03D8 ; Uppercase # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Uppercase # L& GREEK LETTER STIGMA +03DC ; Uppercase # L& GREEK LETTER DIGAMMA +03DE ; Uppercase # L& GREEK LETTER KOPPA +03E0 ; Uppercase # L& GREEK LETTER SAMPI +03E2 ; Uppercase # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Uppercase # L& COPTIC CAPITAL LETTER FEI +03E6 ; Uppercase # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Uppercase # L& COPTIC CAPITAL LETTER HORI +03EA ; Uppercase # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Uppercase # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Uppercase # L& COPTIC CAPITAL LETTER DEI +03F4 ; Uppercase # L& GREEK CAPITAL THETA SYMBOL +03F7 ; Uppercase # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Uppercase # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Uppercase # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Uppercase # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Uppercase # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Uppercase # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Uppercase # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Uppercase # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Uppercase # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Uppercase # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Uppercase # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Uppercase # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Uppercase # L& CYRILLIC CAPITAL LETTER UK +047A ; Uppercase # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Uppercase # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Uppercase # L& CYRILLIC CAPITAL LETTER OT +0480 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Uppercase # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Uppercase # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Uppercase # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Uppercase # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Uppercase # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Uppercase # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Uppercase # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Uppercase # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Uppercase # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Uppercase # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Uppercase # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Uppercase # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Uppercase # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Uppercase # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Uppercase # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Uppercase # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Uppercase # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Uppercase # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Uppercase # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Uppercase # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Uppercase # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Uppercase # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Uppercase # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Uppercase # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Uppercase # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Uppercase # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Uppercase # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Uppercase # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Uppercase # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Uppercase # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Uppercase # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Uppercase # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Uppercase # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Uppercase # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Uppercase # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Uppercase # L& CYRILLIC CAPITAL LETTER YAE +051A ; Uppercase # L& CYRILLIC CAPITAL LETTER QA +051C ; Uppercase # L& CYRILLIC CAPITAL LETTER WE +051E ; Uppercase # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Uppercase # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Uppercase # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Uppercase # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Uppercase # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Uppercase # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Uppercase # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +10A0..10C5 ; Uppercase # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Uppercase # L& GEORGIAN CAPITAL LETTER YN +10CD ; Uppercase # L& GEORGIAN CAPITAL LETTER AEN +13A0..13F5 ; Uppercase # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C90..1CBA ; Uppercase # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Uppercase # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Uppercase # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Uppercase # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Uppercase # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Uppercase # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Uppercase # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Uppercase # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Uppercase # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Uppercase # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Uppercase # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Uppercase # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Uppercase # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Uppercase # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Uppercase # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Uppercase # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Uppercase # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Uppercase # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Uppercase # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Uppercase # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Uppercase # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Uppercase # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Uppercase # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Uppercase # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Uppercase # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Uppercase # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Uppercase # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Uppercase # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Uppercase # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Uppercase # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Uppercase # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Uppercase # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Uppercase # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Uppercase # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Uppercase # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Uppercase # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Uppercase # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Uppercase # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Uppercase # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Uppercase # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Uppercase # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Uppercase # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Uppercase # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Uppercase # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Uppercase # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Uppercase # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Uppercase # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Uppercase # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Uppercase # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Uppercase # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Uppercase # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Uppercase # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Uppercase # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Uppercase # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9E ; Uppercase # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Uppercase # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Uppercase # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Uppercase # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Uppercase # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Uppercase # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Uppercase # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Uppercase # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Uppercase # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Uppercase # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Uppercase # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Uppercase # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Uppercase # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Uppercase # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Uppercase # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Uppercase # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Uppercase # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Uppercase # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Uppercase # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Uppercase # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Uppercase # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Uppercase # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Uppercase # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Uppercase # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Uppercase # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1FB8..1FBB ; Uppercase # L& [4] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH OXIA +1FC8..1FCB ; Uppercase # L& [4] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH OXIA +1FD8..1FDB ; Uppercase # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Uppercase # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF8..1FFB ; Uppercase # L& [4] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH OXIA +2102 ; Uppercase # L& DOUBLE-STRUCK CAPITAL C +2107 ; Uppercase # L& EULER CONSTANT +210B..210D ; Uppercase # L& [3] SCRIPT CAPITAL H..DOUBLE-STRUCK CAPITAL H +2110..2112 ; Uppercase # L& [3] SCRIPT CAPITAL I..SCRIPT CAPITAL L +2115 ; Uppercase # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Uppercase # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Uppercase # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Uppercase # L& OHM SIGN +2128 ; Uppercase # L& BLACK-LETTER CAPITAL Z +212A..212D ; Uppercase # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +2130..2133 ; Uppercase # L& [4] SCRIPT CAPITAL E..SCRIPT CAPITAL M +213E..213F ; Uppercase # L& [2] DOUBLE-STRUCK CAPITAL GAMMA..DOUBLE-STRUCK CAPITAL PI +2145 ; Uppercase # L& DOUBLE-STRUCK ITALIC CAPITAL D +2160..216F ; Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Uppercase # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Uppercase # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Uppercase # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Uppercase # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Uppercase # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Uppercase # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Uppercase # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Uppercase # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Uppercase # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Uppercase # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Uppercase # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Uppercase # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Uppercase # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Uppercase # L& COPTIC CAPITAL LETTER EIE +2C8A ; Uppercase # L& COPTIC CAPITAL LETTER SOU +2C8C ; Uppercase # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Uppercase # L& COPTIC CAPITAL LETTER HATE +2C90 ; Uppercase # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Uppercase # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Uppercase # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Uppercase # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Uppercase # L& COPTIC CAPITAL LETTER MI +2C9A ; Uppercase # L& COPTIC CAPITAL LETTER NI +2C9C ; Uppercase # L& COPTIC CAPITAL LETTER KSI +2C9E ; Uppercase # L& COPTIC CAPITAL LETTER O +2CA0 ; Uppercase # L& COPTIC CAPITAL LETTER PI +2CA2 ; Uppercase # L& COPTIC CAPITAL LETTER RO +2CA4 ; Uppercase # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Uppercase # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Uppercase # L& COPTIC CAPITAL LETTER UA +2CAA ; Uppercase # L& COPTIC CAPITAL LETTER FI +2CAC ; Uppercase # L& COPTIC CAPITAL LETTER KHI +2CAE ; Uppercase # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Uppercase # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Uppercase # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Uppercase # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Uppercase # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Uppercase # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Uppercase # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Uppercase # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Uppercase # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Uppercase # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Uppercase # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Uppercase # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Uppercase # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Uppercase # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Uppercase # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Uppercase # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Uppercase # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Uppercase # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Uppercase # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Uppercase # L& CYRILLIC CAPITAL LETTER YN +A660 ; Uppercase # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Uppercase # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Uppercase # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Uppercase # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Uppercase # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Uppercase # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Uppercase # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Uppercase # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Uppercase # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Uppercase # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Uppercase # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Uppercase # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Uppercase # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Uppercase # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Uppercase # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Uppercase # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Uppercase # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Uppercase # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Uppercase # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Uppercase # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Uppercase # L& LATIN CAPITAL LETTER HENG +A728 ; Uppercase # L& LATIN CAPITAL LETTER TZ +A72A ; Uppercase # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Uppercase # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Uppercase # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Uppercase # L& LATIN CAPITAL LETTER AA +A734 ; Uppercase # L& LATIN CAPITAL LETTER AO +A736 ; Uppercase # L& LATIN CAPITAL LETTER AU +A738 ; Uppercase # L& LATIN CAPITAL LETTER AV +A73A ; Uppercase # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Uppercase # L& LATIN CAPITAL LETTER AY +A73E ; Uppercase # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Uppercase # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Uppercase # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Uppercase # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Uppercase # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Uppercase # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Uppercase # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Uppercase # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Uppercase # L& LATIN CAPITAL LETTER OO +A750 ; Uppercase # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Uppercase # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Uppercase # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Uppercase # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Uppercase # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Uppercase # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Uppercase # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Uppercase # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Uppercase # L& LATIN CAPITAL LETTER VY +A762 ; Uppercase # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Uppercase # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Uppercase # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Uppercase # L& LATIN CAPITAL LETTER VEND +A76A ; Uppercase # L& LATIN CAPITAL LETTER ET +A76C ; Uppercase # L& LATIN CAPITAL LETTER IS +A76E ; Uppercase # L& LATIN CAPITAL LETTER CON +A779 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Uppercase # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Uppercase # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Uppercase # L& LATIN CAPITAL LETTER TURNED L +A782 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Uppercase # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Uppercase # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Uppercase # L& LATIN CAPITAL LETTER TURNED H +A790 ; Uppercase # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Uppercase # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Uppercase # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Uppercase # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Uppercase # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Uppercase # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Uppercase # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Uppercase # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Uppercase # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Uppercase # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Uppercase # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Uppercase # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Uppercase # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Uppercase # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Uppercase # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Uppercase # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Uppercase # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Uppercase # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Uppercase # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7D0 ; Uppercase # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Uppercase # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Uppercase # L& LATIN CAPITAL LETTER SIGMOID S +A7F5 ; Uppercase # L& LATIN CAPITAL LETTER REVERSED HALF H +FF21..FF3A ; Uppercase # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Uppercase # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Uppercase # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Uppercase # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Uppercase # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Uppercase # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Uppercase # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Uppercase # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +118A0..118BF ; Uppercase # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Uppercase # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1D400..1D419 ; Uppercase # L& [26] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL BOLD CAPITAL Z +1D434..1D44D ; Uppercase # L& [26] MATHEMATICAL ITALIC CAPITAL A..MATHEMATICAL ITALIC CAPITAL Z +1D468..1D481 ; Uppercase # L& [26] MATHEMATICAL BOLD ITALIC CAPITAL A..MATHEMATICAL BOLD ITALIC CAPITAL Z +1D49C ; Uppercase # L& MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Uppercase # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Uppercase # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Uppercase # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Uppercase # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B5 ; Uppercase # L& [8] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT CAPITAL Z +1D4D0..1D4E9 ; Uppercase # L& [26] MATHEMATICAL BOLD SCRIPT CAPITAL A..MATHEMATICAL BOLD SCRIPT CAPITAL Z +1D504..1D505 ; Uppercase # L& [2] MATHEMATICAL FRAKTUR CAPITAL A..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Uppercase # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Uppercase # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Uppercase # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D538..1D539 ; Uppercase # L& [2] MATHEMATICAL DOUBLE-STRUCK CAPITAL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Uppercase # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Uppercase # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Uppercase # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Uppercase # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D56C..1D585 ; Uppercase # L& [26] MATHEMATICAL BOLD FRAKTUR CAPITAL A..MATHEMATICAL BOLD FRAKTUR CAPITAL Z +1D5A0..1D5B9 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF CAPITAL A..MATHEMATICAL SANS-SERIF CAPITAL Z +1D5D4..1D5ED ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF BOLD CAPITAL A..MATHEMATICAL SANS-SERIF BOLD CAPITAL Z +1D608..1D621 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF ITALIC CAPITAL Z +1D63C..1D655 ; Uppercase # L& [26] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL A..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL Z +1D670..1D689 ; Uppercase # L& [26] MATHEMATICAL MONOSPACE CAPITAL A..MATHEMATICAL MONOSPACE CAPITAL Z +1D6A8..1D6C0 ; Uppercase # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6E2..1D6FA ; Uppercase # L& [25] MATHEMATICAL ITALIC CAPITAL ALPHA..MATHEMATICAL ITALIC CAPITAL OMEGA +1D71C..1D734 ; Uppercase # L& [25] MATHEMATICAL BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D756..1D76E ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D790..1D7A8 ; Uppercase # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7CA ; Uppercase # L& MATHEMATICAL BOLD CAPITAL DIGAMMA +1E900..1E921 ; Uppercase # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA +1F130..1F149 ; Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1951 + +# ================================================ + +# Derived Property: Cased (Cased) +# As defined by Unicode Standard Definition D135 +# C has the Lowercase or Uppercase property or has a General_Category value of Titlecase_Letter. + +0041..005A ; Cased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Cased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Cased # Lo FEMININE ORDINAL INDICATOR +00B5 ; Cased # L& MICRO SIGN +00BA ; Cased # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Cased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Cased # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Cased # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BC..01BF ; Cased # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C4..0293 ; Cased # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0295..02AF ; Cased # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Cased # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Cased # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Cased # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Cased # Mn COMBINING GREEK YPOGEGRAMMENI +0370..0373 ; Cased # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; Cased # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Cased # Lm GREEK YPOGEGRAMMENI +037B..037D ; Cased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Cased # L& GREEK CAPITAL LETTER YOT +0386 ; Cased # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Cased # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Cased # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Cased # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Cased # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; Cased # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Cased # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Cased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0560..0588 ; Cased # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +10A0..10C5 ; Cased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Cased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Cased # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Cased # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Cased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13A0..13F5 ; Cased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Cased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Cased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; Cased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Cased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1D00..1D2B ; Cased # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Cased # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Cased # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Cased # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Cased # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Cased # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; Cased # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Cased # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Cased # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Cased # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Cased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Cased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Cased # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Cased # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Cased # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Cased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Cased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Cased # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Cased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Cased # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Cased # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Cased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Cased # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Cased # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Cased # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; Cased # L& DOUBLE-STRUCK CAPITAL C +2107 ; Cased # L& EULER CONSTANT +210A..2113 ; Cased # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Cased # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Cased # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Cased # L& DOUBLE-STRUCK CAPITAL Z +2126 ; Cased # L& OHM SIGN +2128 ; Cased # L& BLACK-LETTER CAPITAL Z +212A..212D ; Cased # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212F..2134 ; Cased # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2139 ; Cased # L& INFORMATION SOURCE +213C..213F ; Cased # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Cased # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; Cased # L& TURNED SMALL F +2160..217F ; Cased # Nl [32] ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2183..2184 ; Cased # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +24B6..24E9 ; Cased # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C7B ; Cased # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Cased # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Cased # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; Cased # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Cased # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Cased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Cased # L& GEORGIAN SMALL LETTER YN +2D2D ; Cased # L& GEORGIAN SMALL LETTER AEN +A640..A66D ; Cased # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A680..A69B ; Cased # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Cased # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A722..A76F ; Cased # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Cased # Lm MODIFIER LETTER US +A771..A787 ; Cased # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A78B..A78E ; Cased # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A790..A7CA ; Cased # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; Cased # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Cased # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; Cased # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F5..A7F6 ; Cased # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F8..A7F9 ; Cased # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Cased # L& LATIN LETTER SMALL CAPITAL TURNED M +AB30..AB5A ; Cased # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Cased # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Cased # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB70..ABBF ; Cased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Cased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Cased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Cased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Cased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10400..1044F ; Cased # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +104B0..104D3 ; Cased # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Cased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10570..1057A ; Cased # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Cased # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Cased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Cased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Cased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Cased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Cased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Cased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10780 ; Cased # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Cased # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Cased # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Cased # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10C80..10CB2 ; Cased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Cased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +118A0..118DF ; Cased # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E40..16E7F ; Cased # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1D400..1D454 ; Cased # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Cased # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Cased # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Cased # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Cased # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Cased # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Cased # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Cased # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Cased # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Cased # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Cased # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Cased # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Cased # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Cased # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Cased # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Cased # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Cased # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Cased # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Cased # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Cased # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Cased # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Cased # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Cased # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Cased # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Cased # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Cased # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Cased # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Cased # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; Cased # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0B..1DF1E ; Cased # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E900..1E943 ; Cased # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1F130..1F149 ; Cased # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Cased # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Cased # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 4453 + +# ================================================ + +# Derived Property: Case_Ignorable (CI) +# As defined by Unicode Standard Definition D136 +# C is defined to be case-ignorable if +# Word_Break(C) = MidLetter or MidNumLet or Single_Quote, or +# General_Category(C) = Nonspacing_Mark (Mn), Enclosing_Mark (Me), Format (Cf), Modifier_Letter (Lm), or Modifier_Symbol (Sk). + +0027 ; Case_Ignorable # Po APOSTROPHE +002E ; Case_Ignorable # Po FULL STOP +003A ; Case_Ignorable # Po COLON +005E ; Case_Ignorable # Sk CIRCUMFLEX ACCENT +0060 ; Case_Ignorable # Sk GRAVE ACCENT +00A8 ; Case_Ignorable # Sk DIAERESIS +00AD ; Case_Ignorable # Cf SOFT HYPHEN +00AF ; Case_Ignorable # Sk MACRON +00B4 ; Case_Ignorable # Sk ACUTE ACCENT +00B7 ; Case_Ignorable # Po MIDDLE DOT +00B8 ; Case_Ignorable # Sk CEDILLA +02B0..02C1 ; Case_Ignorable # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Case_Ignorable # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Case_Ignorable # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Case_Ignorable # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Case_Ignorable # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Case_Ignorable # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Case_Ignorable # Lm MODIFIER LETTER VOICING +02ED ; Case_Ignorable # Sk MODIFIER LETTER UNASPIRATED +02EE ; Case_Ignorable # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Case_Ignorable # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..036F ; Case_Ignorable # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0374 ; Case_Ignorable # Lm GREEK NUMERAL SIGN +0375 ; Case_Ignorable # Sk GREEK LOWER NUMERAL SIGN +037A ; Case_Ignorable # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Case_Ignorable # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0387 ; Case_Ignorable # Po GREEK ANO TELEIA +0483..0487 ; Case_Ignorable # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Case_Ignorable # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0559 ; Case_Ignorable # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055F ; Case_Ignorable # Po ARMENIAN ABBREVIATION MARK +0591..05BD ; Case_Ignorable # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Case_Ignorable # Mn HEBREW POINT RAFE +05C1..05C2 ; Case_Ignorable # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Case_Ignorable # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Case_Ignorable # Mn HEBREW POINT QAMATS QATAN +05F4 ; Case_Ignorable # Po HEBREW PUNCTUATION GERSHAYIM +0600..0605 ; Case_Ignorable # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +0610..061A ; Case_Ignorable # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061C ; Case_Ignorable # Cf ARABIC LETTER MARK +0640 ; Case_Ignorable # Lm ARABIC TATWEEL +064B..065F ; Case_Ignorable # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Case_Ignorable # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Case_Ignorable # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DD ; Case_Ignorable # Cf ARABIC END OF AYAH +06DF..06E4 ; Case_Ignorable # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Case_Ignorable # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Case_Ignorable # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Case_Ignorable # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +070F ; Case_Ignorable # Cf SYRIAC ABBREVIATION MARK +0711 ; Case_Ignorable # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Case_Ignorable # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Case_Ignorable # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Case_Ignorable # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Case_Ignorable # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; Case_Ignorable # Lm NKO LAJANYALAN +07FD ; Case_Ignorable # Mn NKO DANTAYALAN +0816..0819 ; Case_Ignorable # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Case_Ignorable # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Case_Ignorable # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Case_Ignorable # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; Case_Ignorable # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Case_Ignorable # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0888 ; Case_Ignorable # Sk ARABIC RAISED ROUND DOT +0890..0891 ; Case_Ignorable # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0898..089F ; Case_Ignorable # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08C9 ; Case_Ignorable # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; Case_Ignorable # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E2 ; Case_Ignorable # Cf ARABIC DISPUTED END OF AYAH +08E3..0902 ; Case_Ignorable # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN OE +093C ; Case_Ignorable # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Case_Ignorable # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Case_Ignorable # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Case_Ignorable # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Case_Ignorable # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0971 ; Case_Ignorable # Lm DEVANAGARI SIGN HIGH SPACING DOT +0981 ; Case_Ignorable # Mn BENGALI SIGN CANDRABINDU +09BC ; Case_Ignorable # Mn BENGALI SIGN NUKTA +09C1..09C4 ; Case_Ignorable # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Case_Ignorable # Mn BENGALI SIGN VIRAMA +09E2..09E3 ; Case_Ignorable # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Case_Ignorable # Mn BENGALI SANDHI MARK +0A01..0A02 ; Case_Ignorable # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Case_Ignorable # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Case_Ignorable # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Case_Ignorable # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Case_Ignorable # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Case_Ignorable # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Case_Ignorable # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Case_Ignorable # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Case_Ignorable # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Case_Ignorable # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Case_Ignorable # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Case_Ignorable # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Case_Ignorable # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Case_Ignorable # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Case_Ignorable # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Case_Ignorable # Mn ORIYA SIGN CANDRABINDU +0B3C ; Case_Ignorable # Mn ORIYA SIGN NUKTA +0B3F ; Case_Ignorable # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Case_Ignorable # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Case_Ignorable # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Case_Ignorable # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B62..0B63 ; Case_Ignorable # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Case_Ignorable # Mn TAMIL SIGN ANUSVARA +0BC0 ; Case_Ignorable # Mn TAMIL VOWEL SIGN II +0BCD ; Case_Ignorable # Mn TAMIL SIGN VIRAMA +0C00 ; Case_Ignorable # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Case_Ignorable # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Case_Ignorable # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Case_Ignorable # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Case_Ignorable # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Case_Ignorable # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Case_Ignorable # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Case_Ignorable # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Case_Ignorable # Mn KANNADA SIGN CANDRABINDU +0CBC ; Case_Ignorable # Mn KANNADA SIGN NUKTA +0CBF ; Case_Ignorable # Mn KANNADA VOWEL SIGN I +0CC6 ; Case_Ignorable # Mn KANNADA VOWEL SIGN E +0CCC..0CCD ; Case_Ignorable # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CE2..0CE3 ; Case_Ignorable # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Case_Ignorable # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Case_Ignorable # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D41..0D44 ; Case_Ignorable # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Case_Ignorable # Mn MALAYALAM SIGN VIRAMA +0D62..0D63 ; Case_Ignorable # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Case_Ignorable # Mn SINHALA SIGN CANDRABINDU +0DCA ; Case_Ignorable # Mn SINHALA SIGN AL-LAKUNA +0DD2..0DD4 ; Case_Ignorable # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Case_Ignorable # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0E31 ; Case_Ignorable # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Case_Ignorable # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E46 ; Case_Ignorable # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; Case_Ignorable # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Case_Ignorable # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Case_Ignorable # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC6 ; Case_Ignorable # Lm LAO KO LA +0EC8..0ECD ; Case_Ignorable # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0F18..0F19 ; Case_Ignorable # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Case_Ignorable # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Case_Ignorable # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Case_Ignorable # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Case_Ignorable # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Case_Ignorable # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Case_Ignorable # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Case_Ignorable # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Case_Ignorable # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Case_Ignorable # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Case_Ignorable # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Case_Ignorable # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Case_Ignorable # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Case_Ignorable # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Case_Ignorable # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Case_Ignorable # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Case_Ignorable # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Case_Ignorable # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Case_Ignorable # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Case_Ignorable # Mn MYANMAR VOWEL SIGN AITON AI +10FC ; Case_Ignorable # Lm MODIFIER LETTER GEORGIAN NAR +135D..135F ; Case_Ignorable # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Case_Ignorable # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; Case_Ignorable # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Case_Ignorable # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Case_Ignorable # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Case_Ignorable # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Case_Ignorable # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Case_Ignorable # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Case_Ignorable # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; Case_Ignorable # Lm KHMER SIGN LEK TOO +17DD ; Case_Ignorable # Mn KHMER SIGN ATTHACAN +180B..180D ; Case_Ignorable # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Case_Ignorable # Cf MONGOLIAN VOWEL SEPARATOR +180F ; Case_Ignorable # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1843 ; Case_Ignorable # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1885..1886 ; Case_Ignorable # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Case_Ignorable # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Case_Ignorable # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Case_Ignorable # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Case_Ignorable # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Case_Ignorable # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Case_Ignorable # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Case_Ignorable # Mn BUGINESE VOWEL SIGN AE +1A56 ; Case_Ignorable # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Case_Ignorable # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Case_Ignorable # Mn TAI THAM SIGN SAKOT +1A62 ; Case_Ignorable # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Case_Ignorable # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Case_Ignorable # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Case_Ignorable # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AA7 ; Case_Ignorable # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; Case_Ignorable # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Case_Ignorable # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Case_Ignorable # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Case_Ignorable # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Case_Ignorable # Mn BALINESE SIGN REREKAN +1B36..1B3A ; Case_Ignorable # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; Case_Ignorable # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; Case_Ignorable # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; Case_Ignorable # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Case_Ignorable # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Case_Ignorable # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Case_Ignorable # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; Case_Ignorable # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Case_Ignorable # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Case_Ignorable # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Case_Ignorable # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Case_Ignorable # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Case_Ignorable # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Case_Ignorable # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Case_Ignorable # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Case_Ignorable # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Case_Ignorable # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Case_Ignorable # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Case_Ignorable # Mn VEDIC SIGN TIRYAK +1CF4 ; Case_Ignorable # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Case_Ignorable # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Case_Ignorable # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Case_Ignorable # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Case_Ignorable # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; Case_Ignorable # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Case_Ignorable # Sk GREEK KORONIS +1FBF..1FC1 ; Case_Ignorable # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Case_Ignorable # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Case_Ignorable # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Case_Ignorable # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Case_Ignorable # Sk [2] GREEK OXIA..GREEK DASIA +200B..200F ; Case_Ignorable # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +2018 ; Case_Ignorable # Pi LEFT SINGLE QUOTATION MARK +2019 ; Case_Ignorable # Pf RIGHT SINGLE QUOTATION MARK +2024 ; Case_Ignorable # Po ONE DOT LEADER +2027 ; Case_Ignorable # Po HYPHENATION POINT +202A..202E ; Case_Ignorable # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Case_Ignorable # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; Case_Ignorable # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2071 ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Case_Ignorable # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Case_Ignorable # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; Case_Ignorable # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Case_Ignorable # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Case_Ignorable # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Case_Ignorable # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Case_Ignorable # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2C7C..2C7D ; Case_Ignorable # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2CEF..2CF1 ; Case_Ignorable # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D6F ; Case_Ignorable # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; Case_Ignorable # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Case_Ignorable # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +2E2F ; Case_Ignorable # Lm VERTICAL TILDE +3005 ; Case_Ignorable # Lm IDEOGRAPHIC ITERATION MARK +302A..302D ; Case_Ignorable # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3031..3035 ; Case_Ignorable # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +303B ; Case_Ignorable # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +3099..309A ; Case_Ignorable # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Case_Ignorable # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; Case_Ignorable # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Case_Ignorable # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Case_Ignorable # Lm YI SYLLABLE WU +A4F8..A4FD ; Case_Ignorable # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A60C ; Case_Ignorable # Lm VAI SYLLABLE LENGTHENER +A66F ; Case_Ignorable # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Case_Ignorable # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Case_Ignorable # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; Case_Ignorable # Lm CYRILLIC PAYEROK +A69C..A69D ; Case_Ignorable # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; Case_Ignorable # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Case_Ignorable # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A700..A716 ; Case_Ignorable # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Case_Ignorable # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Case_Ignorable # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A770 ; Case_Ignorable # Lm MODIFIER LETTER US +A788 ; Case_Ignorable # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Case_Ignorable # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F2..A7F4 ; Case_Ignorable # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F8..A7F9 ; Case_Ignorable # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A802 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Case_Ignorable # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Case_Ignorable # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Case_Ignorable # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Case_Ignorable # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Case_Ignorable # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Case_Ignorable # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Case_Ignorable # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Case_Ignorable # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Case_Ignorable # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Case_Ignorable # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Case_Ignorable # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Case_Ignorable # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9CF ; Case_Ignorable # Lm JAVANESE PANGRANGKEP +A9E5 ; Case_Ignorable # Mn MYANMAR SIGN SHAN SAW +A9E6 ; Case_Ignorable # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA29..AA2E ; Case_Ignorable # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Case_Ignorable # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Case_Ignorable # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Case_Ignorable # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Case_Ignorable # Mn CHAM CONSONANT SIGN FINAL M +AA70 ; Case_Ignorable # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA7C ; Case_Ignorable # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Case_Ignorable # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Case_Ignorable # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Case_Ignorable # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Case_Ignorable # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Case_Ignorable # Mn TAI VIET TONE MAI THO +AADD ; Case_Ignorable # Lm TAI VIET SYMBOL SAM +AAEC..AAED ; Case_Ignorable # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF3..AAF4 ; Case_Ignorable # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF6 ; Case_Ignorable # Mn MEETEI MAYEK VIRAMA +AB5B ; Case_Ignorable # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Case_Ignorable # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Case_Ignorable # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Case_Ignorable # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +ABE5 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Case_Ignorable # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Case_Ignorable # Mn MEETEI MAYEK APUN IYEK +FB1E ; Case_Ignorable # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FBB2..FBC2 ; Case_Ignorable # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FE00..FE0F ; Case_Ignorable # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE13 ; Case_Ignorable # Po PRESENTATION FORM FOR VERTICAL COLON +FE20..FE2F ; Case_Ignorable # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE52 ; Case_Ignorable # Po SMALL FULL STOP +FE55 ; Case_Ignorable # Po SMALL COLON +FEFF ; Case_Ignorable # Cf ZERO WIDTH NO-BREAK SPACE +FF07 ; Case_Ignorable # Po FULLWIDTH APOSTROPHE +FF0E ; Case_Ignorable # Po FULLWIDTH FULL STOP +FF1A ; Case_Ignorable # Po FULLWIDTH COLON +FF3E ; Case_Ignorable # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Case_Ignorable # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Case_Ignorable # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Case_Ignorable # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Case_Ignorable # Sk FULLWIDTH MACRON +FFF9..FFFB ; Case_Ignorable # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +101FD ; Case_Ignorable # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Case_Ignorable # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Case_Ignorable # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10780..10785 ; Case_Ignorable # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Case_Ignorable # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Case_Ignorable # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10A01..10A03 ; Case_Ignorable # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Case_Ignorable # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Case_Ignorable # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Case_Ignorable # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Case_Ignorable # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Case_Ignorable # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Case_Ignorable # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Case_Ignorable # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10F46..10F50 ; Case_Ignorable # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Case_Ignorable # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Case_Ignorable # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Case_Ignorable # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Case_Ignorable # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Case_Ignorable # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Case_Ignorable # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Case_Ignorable # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Case_Ignorable # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BD ; Case_Ignorable # Cf KAITHI NUMBER SIGN +110C2 ; Case_Ignorable # Mn KAITHI VOWEL SIGN VOCALIC R +110CD ; Case_Ignorable # Cf KAITHI NUMBER SIGN ABOVE +11100..11102 ; Case_Ignorable # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Case_Ignorable # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Case_Ignorable # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Case_Ignorable # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Case_Ignorable # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Case_Ignorable # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; Case_Ignorable # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Case_Ignorable # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Case_Ignorable # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Case_Ignorable # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; Case_Ignorable # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Case_Ignorable # Mn KHOJKI SIGN SUKUN +112DF ; Case_Ignorable # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Case_Ignorable # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Case_Ignorable # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Case_Ignorable # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11340 ; Case_Ignorable # Mn GRANTHA VOWEL SIGN II +11366..1136C ; Case_Ignorable # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Case_Ignorable # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; Case_Ignorable # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Case_Ignorable # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Case_Ignorable # Mn NEWA SIGN NUKTA +1145E ; Case_Ignorable # Mn NEWA SANDHI MARK +114B3..114B8 ; Case_Ignorable # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Case_Ignorable # Mn TIRHUTA VOWEL SIGN SHORT E +114BF..114C0 ; Case_Ignorable # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Case_Ignorable # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115B2..115B5 ; Case_Ignorable # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Case_Ignorable # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Case_Ignorable # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Case_Ignorable # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Case_Ignorable # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Case_Ignorable # Mn MODI SIGN ANUSVARA +1163F..11640 ; Case_Ignorable # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Case_Ignorable # Mn TAKRI SIGN ANUSVARA +116AD ; Case_Ignorable # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Case_Ignorable # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Case_Ignorable # Mn TAKRI SIGN NUKTA +1171D..1171F ; Case_Ignorable # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Case_Ignorable # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Case_Ignorable # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Case_Ignorable # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Case_Ignorable # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193B..1193C ; Case_Ignorable # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; Case_Ignorable # Mn DIVES AKURU VIRAMA +11943 ; Case_Ignorable # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Case_Ignorable # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Case_Ignorable # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Case_Ignorable # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Case_Ignorable # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Case_Ignorable # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Case_Ignorable # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Case_Ignorable # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Case_Ignorable # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Case_Ignorable # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Case_Ignorable # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Case_Ignorable # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Case_Ignorable # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Case_Ignorable # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Case_Ignorable # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Case_Ignorable # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Case_Ignorable # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Case_Ignorable # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Case_Ignorable # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Case_Ignorable # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Case_Ignorable # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Case_Ignorable # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Case_Ignorable # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Case_Ignorable # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Case_Ignorable # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Case_Ignorable # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Case_Ignorable # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Case_Ignorable # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +13430..13438 ; Case_Ignorable # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT +16AF0..16AF4 ; Case_Ignorable # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Case_Ignorable # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; Case_Ignorable # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16F4F ; Case_Ignorable # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Case_Ignorable # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Case_Ignorable # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Case_Ignorable # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Case_Ignorable # Lm OLD CHINESE ITERATION MARK +16FE4 ; Case_Ignorable # Mn KHITAN SMALL SCRIPT FILLER +1AFF0..1AFF3 ; Case_Ignorable # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Case_Ignorable # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Case_Ignorable # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1BC9D..1BC9E ; Case_Ignorable # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BCA0..1BCA3 ; Case_Ignorable # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF00..1CF2D ; Case_Ignorable # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Case_Ignorable # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Case_Ignorable # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D173..1D17A ; Case_Ignorable # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D17B..1D182 ; Case_Ignorable # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Case_Ignorable # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Case_Ignorable # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Case_Ignorable # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Case_Ignorable # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Case_Ignorable # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Case_Ignorable # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Case_Ignorable # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Case_Ignorable # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Case_Ignorable # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Case_Ignorable # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Case_Ignorable # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Case_Ignorable # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Case_Ignorable # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E130..1E136 ; Case_Ignorable # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; Case_Ignorable # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E2AE ; Case_Ignorable # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Case_Ignorable # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; Case_Ignorable # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Case_Ignorable # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; Case_Ignorable # Lm ADLAM NASALIZATION MARK +1F3FB..1F3FF ; Case_Ignorable # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +E0001 ; Case_Ignorable # Cf LANGUAGE TAG +E0020..E007F ; Case_Ignorable # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Case_Ignorable # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2602 + +# ================================================ + +# Derived Property: Changes_When_Lowercased (CWL) +# Characters whose normalized forms are not stable under a toLowercase mapping. +# For more information, see D139 in Section 3.13, "Default Case Algorithms". +# Changes_When_Lowercased(X) is true when toLowercase(toNFD(X)) != toNFD(X) + +0041..005A ; Changes_When_Lowercased # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00C0..00D6 ; Changes_When_Lowercased # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DE ; Changes_When_Lowercased # L& [7] LATIN CAPITAL LETTER O WITH STROKE..LATIN CAPITAL LETTER THORN +0100 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Changes_When_Lowercased # L& LATIN CAPITAL LIGATURE IJ +0134 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CARON +014A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ENG +014C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Changes_When_Lowercased # L& LATIN CAPITAL LIGATURE OE +0154 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH CARON +0181..0182 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OI +01A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Changes_When_Lowercased # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ESH +01AC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TONE FIVE +01C4..01C5 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7..01C8 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER LJ..LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA..01CB ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER NJ..LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1..01F2 ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER YOGH +021E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OU +0224 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH STROKE +0370 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER HETA +0372 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Lowercased # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Changes_When_Lowercased # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Changes_When_Lowercased # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Changes_When_Lowercased # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03CF ; Changes_When_Lowercased # L& GREEK CAPITAL KAI SYMBOL +03D8 ; Changes_When_Lowercased # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Changes_When_Lowercased # L& GREEK LETTER STIGMA +03DC ; Changes_When_Lowercased # L& GREEK LETTER DIGAMMA +03DE ; Changes_When_Lowercased # L& GREEK LETTER KOPPA +03E0 ; Changes_When_Lowercased # L& GREEK LETTER SAMPI +03E2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER FEI +03E6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER HORI +03EA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DEI +03F4 ; Changes_When_Lowercased # L& GREEK CAPITAL THETA SYMBOL +03F7 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Changes_When_Lowercased # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Changes_When_Lowercased # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER UK +047A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER OT +0480 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Changes_When_Lowercased # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YAE +051A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER QA +051C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER WE +051E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Lowercased # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +10A0..10C5 ; Changes_When_Lowercased # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Lowercased # L& GEORGIAN CAPITAL LETTER AEN +13A0..13F5 ; Changes_When_Lowercased # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +1C90..1CBA ; Changes_When_Lowercased # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Lowercased # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Changes_When_Lowercased # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Changes_When_Lowercased # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Changes_When_Lowercased # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F88..1F8F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1F98..1F9F ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FA8..1FAF ; Changes_When_Lowercased # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB8..1FBC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER ALPHA WITH VRACHY..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC8..1FCC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER EPSILON WITH VARIA..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD8..1FDB ; Changes_When_Lowercased # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF8..1FFC ; Changes_When_Lowercased # L& [5] GREEK CAPITAL LETTER OMICRON WITH VARIA..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Lowercased # L& OHM SIGN +212A..212B ; Changes_When_Lowercased # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Lowercased # L& TURNED CAPITAL F +2160..216F ; Changes_When_Lowercased # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Changes_When_Lowercased # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Changes_When_Lowercased # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Changes_When_Lowercased # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Changes_When_Lowercased # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER EIE +2C8A ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SOU +2C8C ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER HATE +2C90 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER MI +2C9A ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER NI +2C9C ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KSI +2C9E ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER O +2CA0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER PI +2CA2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER RO +2CA4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER UA +2CAA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER FI +2CAC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER KHI +2CAE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Lowercased # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER YN +A660 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Changes_When_Lowercased # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER HENG +A728 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TZ +A72A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AA +A734 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AO +A736 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AU +A738 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AV +A73A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER AY +A73E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OO +A750 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VY +A762 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VEND +A76A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ET +A76C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER IS +A76E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CON +A779 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Changes_When_Lowercased # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED L +A782 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER TURNED H +A790 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Changes_When_Lowercased # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Changes_When_Lowercased # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Changes_When_Lowercased # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7D0 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER SIGMOID S +A7F5 ; Changes_When_Lowercased # L& LATIN CAPITAL LETTER REVERSED HALF H +FF21..FF3A ; Changes_When_Lowercased # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Changes_When_Lowercased # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Changes_When_Lowercased # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Changes_When_Lowercased # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Lowercased # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Lowercased # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Lowercased # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Changes_When_Lowercased # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +118A0..118BF ; Changes_When_Lowercased # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Changes_When_Lowercased # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1E900..1E921 ; Changes_When_Lowercased # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA + +# Total code points: 1433 + +# ================================================ + +# Derived Property: Changes_When_Uppercased (CWU) +# Characters whose normalized forms are not stable under a toUppercase mapping. +# For more information, see D140 in Section 3.13, "Default Case Algorithms". +# Changes_When_Uppercased(X) is true when toUppercase(toNFD(X)) != toNFD(X) + +0061..007A ; Changes_When_Uppercased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Uppercased # L& MICRO SIGN +00DF..00F6 ; Changes_When_Uppercased # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Changes_When_Uppercased # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CARON +010F ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CARON +0111 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CARON +011D ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH TILDE +012B ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH MACRON +012D ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH BREVE +012F ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Changes_When_Uppercased # L& LATIN SMALL LETTER DOTLESS I +0133 ; Changes_When_Uppercased # L& LATIN SMALL LIGATURE IJ +0135 ; Changes_When_Uppercased # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH CEDILLA +013A ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CARON +0140 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Changes_When_Uppercased # L& LATIN SMALL LETTER ENG +014D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON +014F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Changes_When_Uppercased # L& LATIN SMALL LIGATURE OE +0155 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH CARON +015B ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CARON +0163 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CARON +0167 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE +016B ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH MACRON +016D ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH BREVE +016F ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Changes_When_Uppercased # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE SIX +0188 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH HOOK +018C ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH TOPBAR +0192 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HV +0199..019A ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +019E ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OI +01A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE TWO +01AD ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH STROKE +01B9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EZH REVERSED +01BD ; Changes_When_Uppercased # L& LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Uppercased # L& LATIN LETTER WYNN +01C5..01C6 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON..LATIN SMALL LETTER DZ WITH CARON +01C8..01C9 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER L WITH SMALL LETTER J..LATIN SMALL LETTER LJ +01CB..01CC ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER N WITH SMALL LETTER J..LATIN SMALL LETTER NJ +01CE ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH CARON +01EB ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F0 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON +01F2..01F3 ; Changes_When_Uppercased # L& [2] LATIN CAPITAL LETTER D WITH SMALL LETTER Z..LATIN SMALL LETTER DZ +01F5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Changes_When_Uppercased # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Changes_When_Uppercased # L& LATIN SMALL LETTER YOGH +021F ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CARON +0223 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OU +0225 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH MACRON +023C ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Changes_When_Uppercased # L& LATIN SMALL LETTER J WITH STROKE +024B ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH STROKE +024F..0254 ; Changes_When_Uppercased # L& [6] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263 ; Changes_When_Uppercased # L& LATIN SMALL LETTER GAMMA +0265..0266 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Uppercased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Uppercased # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Uppercased # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Uppercased # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Changes_When_Uppercased # L& GREEK SMALL LETTER HETA +0373 ; Changes_When_Uppercased # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Changes_When_Uppercased # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Uppercased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Changes_When_Uppercased # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Changes_When_Uppercased # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Changes_When_Uppercased # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Changes_When_Uppercased # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Changes_When_Uppercased # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Changes_When_Uppercased # L& GREEK SMALL LETTER STIGMA +03DD ; Changes_When_Uppercased # L& GREEK SMALL LETTER DIGAMMA +03DF ; Changes_When_Uppercased # L& GREEK SMALL LETTER KOPPA +03E1 ; Changes_When_Uppercased # L& GREEK SMALL LETTER SAMPI +03E3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SHEI +03E5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER FEI +03E7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KHEI +03E9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER HORI +03EB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER GANGIA +03ED ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Changes_When_Uppercased # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Changes_When_Uppercased # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Changes_When_Uppercased # L& GREEK SMALL LETTER SHO +03FB ; Changes_When_Uppercased # L& GREEK SMALL LETTER SAN +0430..045F ; Changes_When_Uppercased # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YAT +0465 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KSI +0471 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PSI +0473 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER FITA +0475 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER UK +047B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER OT +0481 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOPPA +048B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHHA +04BD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Changes_When_Uppercased # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER LHA +0517 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER RHA +0519 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YAE +051B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER QA +051D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER WE +051F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZZHE +052D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DCHE +052F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0561..0587 ; Changes_When_Uppercased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +10D0..10FA ; Changes_When_Uppercased # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Changes_When_Uppercased # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13F8..13FD ; Changes_When_Uppercased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Uppercased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1D79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E01 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Changes_When_Uppercased # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Changes_When_Uppercased # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Changes_When_Uppercased # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Changes_When_Uppercased # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Changes_When_Uppercased # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9B ; Changes_When_Uppercased # L& [7] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Changes_When_Uppercased # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Changes_When_Uppercased # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Changes_When_Uppercased # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Changes_When_Uppercased # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Changes_When_Uppercased # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Changes_When_Uppercased # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Changes_When_Uppercased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Uppercased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FCC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Changes_When_Uppercased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Changes_When_Uppercased # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Changes_When_Uppercased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Changes_When_Uppercased # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +1FFC ; Changes_When_Uppercased # L& GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +214E ; Changes_When_Uppercased # L& TURNED SMALL F +2170..217F ; Changes_When_Uppercased # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Changes_When_Uppercased # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Changes_When_Uppercased # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Changes_When_Uppercased # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Changes_When_Uppercased # L& LATIN SMALL LETTER Z WITH DESCENDER +2C73 ; Changes_When_Uppercased # L& LATIN SMALL LETTER W WITH HOOK +2C76 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HALF H +2C81 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER ALFA +2C83 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER VIDA +2C85 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER GAMMA +2C87 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DALDA +2C89 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER EIE +2C8B ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SOU +2C8D ; Changes_When_Uppercased # L& COPTIC SMALL LETTER ZATA +2C8F ; Changes_When_Uppercased # L& COPTIC SMALL LETTER HATE +2C91 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER THETHE +2C93 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER IAUDA +2C95 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KAPA +2C97 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER LAULA +2C99 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER MI +2C9B ; Changes_When_Uppercased # L& COPTIC SMALL LETTER NI +2C9D ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KSI +2C9F ; Changes_When_Uppercased # L& COPTIC SMALL LETTER O +2CA1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER PI +2CA3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER RO +2CA5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SIMA +2CA7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER TAU +2CA9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER UA +2CAB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER FI +2CAD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER KHI +2CAF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER PSI +2CB1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OOU +2CB3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER OLD NUBIAN WAU +2CEC ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Changes_When_Uppercased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Uppercased # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Uppercased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Uppercased # L& GEORGIAN SMALL LETTER AEN +A641 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZELO +A645 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTA +A649 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DJERV +A64B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER YN +A661 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DWE +A683 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZWE +A685 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CCHE +A689 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DZZE +A68B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TWE +A68F ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TSWE +A691 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TSSE +A693 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER TCHE +A695 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER HWE +A697 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER SHWE +A699 ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Changes_When_Uppercased # L& CYRILLIC SMALL LETTER CROSSED O +A723 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Changes_When_Uppercased # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Changes_When_Uppercased # L& LATIN SMALL LETTER HENG +A729 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TZ +A72B ; Changes_When_Uppercased # L& LATIN SMALL LETTER TRESILLO +A72D ; Changes_When_Uppercased # L& LATIN SMALL LETTER CUATRILLO +A72F ; Changes_When_Uppercased # L& LATIN SMALL LETTER CUATRILLO WITH COMMA +A733 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AA +A735 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AO +A737 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AU +A739 ; Changes_When_Uppercased # L& LATIN SMALL LETTER AV +A73B ; Changes_When_Uppercased # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Changes_When_Uppercased # L& LATIN SMALL LETTER AY +A73F ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BROKEN L +A749 ; Changes_When_Uppercased # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Changes_When_Uppercased # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Changes_When_Uppercased # L& LATIN SMALL LETTER OO +A751 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Changes_When_Uppercased # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Changes_When_Uppercased # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Changes_When_Uppercased # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Changes_When_Uppercased # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Changes_When_Uppercased # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VY +A763 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Changes_When_Uppercased # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Changes_When_Uppercased # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Changes_When_Uppercased # L& LATIN SMALL LETTER VEND +A76B ; Changes_When_Uppercased # L& LATIN SMALL LETTER ET +A76D ; Changes_When_Uppercased # L& LATIN SMALL LETTER IS +A76F ; Changes_When_Uppercased # L& LATIN SMALL LETTER CON +A77A ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR D +A77C ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR F +A77F ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Changes_When_Uppercased # L& LATIN SMALL LETTER TURNED L +A783 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR R +A785 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR S +A787 ; Changes_When_Uppercased # L& LATIN SMALL LETTER INSULAR T +A78C ; Changes_When_Uppercased # L& LATIN SMALL LETTER SALTILLO +A791 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A794 ; Changes_When_Uppercased # L& [2] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER C WITH PALATAL HOOK +A797 ; Changes_When_Uppercased # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Changes_When_Uppercased # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Changes_When_Uppercased # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7B5 ; Changes_When_Uppercased # L& LATIN SMALL LETTER BETA +A7B7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OMEGA +A7B9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Changes_When_Uppercased # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Changes_When_Uppercased # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Changes_When_Uppercased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Changes_When_Uppercased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D1 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D7 ; Changes_When_Uppercased # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Changes_When_Uppercased # L& LATIN SMALL LETTER SIGMOID S +A7F6 ; Changes_When_Uppercased # L& LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Uppercased # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Uppercased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Uppercased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Uppercased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Changes_When_Uppercased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Changes_When_Uppercased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Changes_When_Uppercased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Changes_When_Uppercased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Uppercased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Uppercased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Uppercased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10CC0..10CF2 ; Changes_When_Uppercased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +118C0..118DF ; Changes_When_Uppercased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Changes_When_Uppercased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E922..1E943 ; Changes_When_Uppercased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 1525 + +# ================================================ + +# Derived Property: Changes_When_Titlecased (CWT) +# Characters whose normalized forms are not stable under a toTitlecase mapping. +# For more information, see D141 in Section 3.13, "Default Case Algorithms". +# Changes_When_Titlecased(X) is true when toTitlecase(toNFD(X)) != toNFD(X) + +0061..007A ; Changes_When_Titlecased # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Titlecased # L& MICRO SIGN +00DF..00F6 ; Changes_When_Titlecased # L& [24] LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS +00F8..00FF ; Changes_When_Titlecased # L& [8] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS +0101 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH MACRON +0103 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE +0105 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH OGONEK +0107 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH ACUTE +0109 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CIRCUMFLEX +010B ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH DOT ABOVE +010D ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CARON +010F ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CARON +0111 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH STROKE +0113 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON +0115 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH BREVE +0117 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOT ABOVE +0119 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH OGONEK +011B ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CARON +011D ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CIRCUMFLEX +011F ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH BREVE +0121 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH DOT ABOVE +0123 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CEDILLA +0125 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CIRCUMFLEX +0127 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH STROKE +0129 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH TILDE +012B ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH MACRON +012D ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH BREVE +012F ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH OGONEK +0131 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DOTLESS I +0133 ; Changes_When_Titlecased # L& LATIN SMALL LIGATURE IJ +0135 ; Changes_When_Titlecased # L& LATIN SMALL LETTER J WITH CIRCUMFLEX +0137 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH CEDILLA +013A ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH ACUTE +013C ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CEDILLA +013E ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CARON +0140 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH MIDDLE DOT +0142 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH STROKE +0144 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH ACUTE +0146 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH CEDILLA +0148..0149 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +014B ; Changes_When_Titlecased # L& LATIN SMALL LETTER ENG +014D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON +014F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH BREVE +0151 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOUBLE ACUTE +0153 ; Changes_When_Titlecased # L& LATIN SMALL LIGATURE OE +0155 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH ACUTE +0157 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH CEDILLA +0159 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH CARON +015B ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH ACUTE +015D ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CIRCUMFLEX +015F ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CEDILLA +0161 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CARON +0163 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CEDILLA +0165 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CARON +0167 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH STROKE +0169 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE +016B ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH MACRON +016D ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH BREVE +016F ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH RING ABOVE +0171 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOUBLE ACUTE +0173 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH OGONEK +0175 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH CIRCUMFLEX +0177 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH CIRCUMFLEX +017A ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH ACUTE +017C ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DOT ABOVE +017E..0180 ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER Z WITH CARON..LATIN SMALL LETTER B WITH STROKE +0183 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH TOPBAR +0185 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE SIX +0188 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH HOOK +018C ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH TOPBAR +0192 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH HOOK +0195 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HV +0199..019A ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER L WITH BAR +019E ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LONG RIGHT LEG +01A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN +01A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OI +01A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH HOOK +01A8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE TWO +01AD ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH HOOK +01B0 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN +01B4 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH HOOK +01B6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH STROKE +01B9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EZH REVERSED +01BD ; Changes_When_Titlecased # L& LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Titlecased # L& LATIN LETTER WYNN +01C4 ; Changes_When_Titlecased # L& LATIN CAPITAL LETTER DZ WITH CARON +01C6..01C7 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER DZ WITH CARON..LATIN CAPITAL LETTER LJ +01C9..01CA ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER LJ..LATIN CAPITAL LETTER NJ +01CC ; Changes_When_Titlecased # L& LATIN SMALL LETTER NJ +01CE ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CARON +01D0 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH CARON +01D2 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CARON +01D4 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH CARON +01D6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND MACRON +01D8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE +01DA ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS AND CARON +01DC..01DD ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E +01DF ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DIAERESIS AND MACRON +01E1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON +01E3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AE WITH MACRON +01E5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH STROKE +01E7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH CARON +01E9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH CARON +01EB ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH OGONEK +01ED ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH OGONEK AND MACRON +01EF..01F1 ; Changes_When_Titlecased # L& [3] LATIN SMALL LETTER EZH WITH CARON..LATIN CAPITAL LETTER DZ +01F3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER DZ +01F5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH ACUTE +01F9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH GRAVE +01FB ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE +01FD ; Changes_When_Titlecased # L& LATIN SMALL LETTER AE WITH ACUTE +01FF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH STROKE AND ACUTE +0201 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOUBLE GRAVE +0203 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH INVERTED BREVE +0205 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOUBLE GRAVE +0207 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH INVERTED BREVE +0209 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DOUBLE GRAVE +020B ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH INVERTED BREVE +020D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOUBLE GRAVE +020F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH INVERTED BREVE +0211 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOUBLE GRAVE +0213 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH INVERTED BREVE +0215 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOUBLE GRAVE +0217 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH INVERTED BREVE +0219 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH COMMA BELOW +021B ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH COMMA BELOW +021D ; Changes_When_Titlecased # L& LATIN SMALL LETTER YOGH +021F ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CARON +0223 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OU +0225 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH HOOK +0227 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT ABOVE +0229 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CEDILLA +022B ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DIAERESIS AND MACRON +022D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND MACRON +022F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT ABOVE +0231 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON +0233 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH MACRON +023C ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH STROKE +023F..0240 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL +0242 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL STOP +0247 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH STROKE +0249 ; Changes_When_Titlecased # L& LATIN SMALL LETTER J WITH STROKE +024B ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH HOOK TAIL +024D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH STROKE +024F..0254 ; Changes_When_Titlecased # L& [6] LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263 ; Changes_When_Titlecased # L& LATIN SMALL LETTER GAMMA +0265..0266 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Titlecased # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Titlecased # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Titlecased # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Titlecased # Mn COMBINING GREEK YPOGEGRAMMENI +0371 ; Changes_When_Titlecased # L& GREEK SMALL LETTER HETA +0373 ; Changes_When_Titlecased # L& GREEK SMALL LETTER ARCHAIC SAMPI +0377 ; Changes_When_Titlecased # L& GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Titlecased # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +0390 ; Changes_When_Titlecased # L& GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS +03AC..03CE ; Changes_When_Titlecased # L& [35] GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS +03D0..03D1 ; Changes_When_Titlecased # L& [2] GREEK BETA SYMBOL..GREEK THETA SYMBOL +03D5..03D7 ; Changes_When_Titlecased # L& [3] GREEK PHI SYMBOL..GREEK KAI SYMBOL +03D9 ; Changes_When_Titlecased # L& GREEK SMALL LETTER ARCHAIC KOPPA +03DB ; Changes_When_Titlecased # L& GREEK SMALL LETTER STIGMA +03DD ; Changes_When_Titlecased # L& GREEK SMALL LETTER DIGAMMA +03DF ; Changes_When_Titlecased # L& GREEK SMALL LETTER KOPPA +03E1 ; Changes_When_Titlecased # L& GREEK SMALL LETTER SAMPI +03E3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SHEI +03E5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER FEI +03E7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KHEI +03E9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER HORI +03EB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER GANGIA +03ED ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SHIMA +03EF..03F3 ; Changes_When_Titlecased # L& [5] COPTIC SMALL LETTER DEI..GREEK LETTER YOT +03F5 ; Changes_When_Titlecased # L& GREEK LUNATE EPSILON SYMBOL +03F8 ; Changes_When_Titlecased # L& GREEK SMALL LETTER SHO +03FB ; Changes_When_Titlecased # L& GREEK SMALL LETTER SAN +0430..045F ; Changes_When_Titlecased # L& [48] CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE +0461 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OMEGA +0463 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YAT +0465 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED E +0467 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER LITTLE YUS +0469 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS +046B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BIG YUS +046D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED BIG YUS +046F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KSI +0471 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PSI +0473 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER FITA +0475 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IZHITSA +0477 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0479 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER UK +047B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ROUND OMEGA +047D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OMEGA WITH TITLO +047F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER OT +0481 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOPPA +048B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHORT I WITH TAIL +048D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SEMISOFT SIGN +048F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ER WITH TICK +0491 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH UPTURN +0493 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH STROKE +0495 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK +0497 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH DESCENDER +0499 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZE WITH DESCENDER +049B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH DESCENDER +049D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE +049F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH STROKE +04A1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BASHKIR KA +04A3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH DESCENDER +04A5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE EN GHE +04A7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK +04A9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN HA +04AB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ES WITH DESCENDER +04AD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TE WITH DESCENDER +04AF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER STRAIGHT U +04B1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE +04B3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH DESCENDER +04B5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE TE TSE +04B7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH DESCENDER +04B9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE +04BB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHHA +04BD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE +04BF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER +04C2 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH BREVE +04C4 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KA WITH HOOK +04C6 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH TAIL +04C8 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH HOOK +04CA ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH TAIL +04CC ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KHAKASSIAN CHE +04CE..04CF ; Changes_When_Titlecased # L& [2] CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA +04D1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER A WITH BREVE +04D3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER A WITH DIAERESIS +04D5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LIGATURE A IE +04D7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IE WITH BREVE +04D9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SCHWA +04DB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS +04DD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHE WITH DIAERESIS +04DF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZE WITH DIAERESIS +04E1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ABKHASIAN DZE +04E3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER I WITH MACRON +04E5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER I WITH DIAERESIS +04E7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER O WITH DIAERESIS +04E9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BARRED O +04EB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS +04ED ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER E WITH DIAERESIS +04EF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH MACRON +04F1 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH DIAERESIS +04F3 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE +04F5 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CHE WITH DIAERESIS +04F7 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH DESCENDER +04F9 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YERU WITH DIAERESIS +04FB ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK +04FD ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH HOOK +04FF ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HA WITH STROKE +0501 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DE +0503 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DJE +0505 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI ZJE +0507 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI DZJE +0509 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI LJE +050B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI NJE +050D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI SJE +050F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER KOMI TJE +0511 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED ZE +0513 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH HOOK +0515 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER LHA +0517 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER RHA +0519 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YAE +051B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER QA +051D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER WE +051F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ALEUT KA +0521 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK +0523 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK +0525 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER PE WITH DESCENDER +0527 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHHA WITH DESCENDER +0529 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EN WITH LEFT HOOK +052B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZZHE +052D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DCHE +052F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER EL WITH DESCENDER +0561..0587 ; Changes_When_Titlecased # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +13F8..13FD ; Changes_When_Titlecased # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Titlecased # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1D79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E01 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH RING BELOW +1E03 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH DOT ABOVE +1E05 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH DOT BELOW +1E07 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH LINE BELOW +1E09 ; Changes_When_Titlecased # L& LATIN SMALL LETTER C WITH CEDILLA AND ACUTE +1E0B ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH DOT ABOVE +1E0D ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH DOT BELOW +1E0F ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH LINE BELOW +1E11 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CEDILLA +1E13 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW +1E15 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON AND GRAVE +1E17 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH MACRON AND ACUTE +1E19 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW +1E1B ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH TILDE BELOW +1E1D ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CEDILLA AND BREVE +1E1F ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH DOT ABOVE +1E21 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH MACRON +1E23 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DOT ABOVE +1E25 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DOT BELOW +1E27 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DIAERESIS +1E29 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH CEDILLA +1E2B ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH BREVE BELOW +1E2D ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH TILDE BELOW +1E2F ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE +1E31 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH ACUTE +1E33 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DOT BELOW +1E35 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH LINE BELOW +1E37 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOT BELOW +1E39 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOT BELOW AND MACRON +1E3B ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH LINE BELOW +1E3D ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW +1E3F ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH ACUTE +1E41 ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH DOT ABOVE +1E43 ; Changes_When_Titlecased # L& LATIN SMALL LETTER M WITH DOT BELOW +1E45 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DOT ABOVE +1E47 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DOT BELOW +1E49 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH LINE BELOW +1E4B ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW +1E4D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND ACUTE +1E4F ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH TILDE AND DIAERESIS +1E51 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON AND GRAVE +1E53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH MACRON AND ACUTE +1E55 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH ACUTE +1E57 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH DOT ABOVE +1E59 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT ABOVE +1E5B ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT BELOW +1E5D ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH DOT BELOW AND MACRON +1E5F ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH LINE BELOW +1E61 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT ABOVE +1E63 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT BELOW +1E65 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE +1E67 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH CARON AND DOT ABOVE +1E69 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6B ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH DOT ABOVE +1E6D ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH DOT BELOW +1E6F ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH LINE BELOW +1E71 ; Changes_When_Titlecased # L& LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW +1E73 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DIAERESIS BELOW +1E75 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE BELOW +1E77 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW +1E79 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH TILDE AND ACUTE +1E7B ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH MACRON AND DIAERESIS +1E7D ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH TILDE +1E7F ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH DOT BELOW +1E81 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH GRAVE +1E83 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH ACUTE +1E85 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DIAERESIS +1E87 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DOT ABOVE +1E89 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH DOT BELOW +1E8B ; Changes_When_Titlecased # L& LATIN SMALL LETTER X WITH DOT ABOVE +1E8D ; Changes_When_Titlecased # L& LATIN SMALL LETTER X WITH DIAERESIS +1E8F ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH DOT ABOVE +1E91 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH CIRCUMFLEX +1E93 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DOT BELOW +1E95..1E9B ; Changes_When_Titlecased # L& [7] LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1EA1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH DOT BELOW +1EA3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH HOOK ABOVE +1EA5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAB ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE +1EAD ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAF ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND ACUTE +1EB1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND GRAVE +1EB3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE +1EB5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND TILDE +1EB7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER A WITH BREVE AND DOT BELOW +1EB9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH DOT BELOW +1EBB ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH HOOK ABOVE +1EBD ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH TILDE +1EBF ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE +1EC7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH HOOK ABOVE +1ECB ; Changes_When_Titlecased # L& LATIN SMALL LETTER I WITH DOT BELOW +1ECD ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH DOT BELOW +1ECF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HOOK ABOVE +1ED1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE +1ED9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDB ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND ACUTE +1EDD ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND GRAVE +1EDF ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE +1EE1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND TILDE +1EE3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH HORN AND DOT BELOW +1EE5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH DOT BELOW +1EE7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HOOK ABOVE +1EE9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND ACUTE +1EEB ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND GRAVE +1EED ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE +1EEF ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND TILDE +1EF1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH HORN AND DOT BELOW +1EF3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH GRAVE +1EF5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH DOT BELOW +1EF7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH HOOK ABOVE +1EF9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Y WITH TILDE +1EFB ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE-WELSH LL +1EFD ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE-WELSH V +1EFF..1F07 ; Changes_When_Titlecased # L& [9] LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F10..1F15 ; Changes_When_Titlecased # L& [6] GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F27 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI +1F30..1F37 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI +1F40..1F45 ; Changes_When_Titlecased # L& [6] GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F60..1F67 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F70..1F7D ; Changes_When_Titlecased # L& [14] GREEK SMALL LETTER ALPHA WITH VARIA..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1F87 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1F90..1F97 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FA0..1FA7 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI +1FB0..1FB4 ; Changes_When_Titlecased # L& [5] GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FB7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI +1FBE ; Changes_When_Titlecased # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Titlecased # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FC7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI +1FD0..1FD3 ; Changes_When_Titlecased # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FD7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI +1FE0..1FE7 ; Changes_When_Titlecased # L& [8] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI +1FF2..1FF4 ; Changes_When_Titlecased # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FF7 ; Changes_When_Titlecased # L& [2] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI +214E ; Changes_When_Titlecased # L& TURNED SMALL F +2170..217F ; Changes_When_Titlecased # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2184 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED C +24D0..24E9 ; Changes_When_Titlecased # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C30..2C5F ; Changes_When_Titlecased # L& [48] GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +2C61 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH DOUBLE BAR +2C65..2C66 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE +2C68 ; Changes_When_Titlecased # L& LATIN SMALL LETTER H WITH DESCENDER +2C6A ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DESCENDER +2C6C ; Changes_When_Titlecased # L& LATIN SMALL LETTER Z WITH DESCENDER +2C73 ; Changes_When_Titlecased # L& LATIN SMALL LETTER W WITH HOOK +2C76 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HALF H +2C81 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER ALFA +2C83 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER VIDA +2C85 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER GAMMA +2C87 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DALDA +2C89 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER EIE +2C8B ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SOU +2C8D ; Changes_When_Titlecased # L& COPTIC SMALL LETTER ZATA +2C8F ; Changes_When_Titlecased # L& COPTIC SMALL LETTER HATE +2C91 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER THETHE +2C93 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER IAUDA +2C95 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KAPA +2C97 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER LAULA +2C99 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER MI +2C9B ; Changes_When_Titlecased # L& COPTIC SMALL LETTER NI +2C9D ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KSI +2C9F ; Changes_When_Titlecased # L& COPTIC SMALL LETTER O +2CA1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER PI +2CA3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER RO +2CA5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SIMA +2CA7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER TAU +2CA9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER UA +2CAB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER FI +2CAD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER KHI +2CAF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER PSI +2CB1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OOU +2CB3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P ALEF +2CB5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC AIN +2CB7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC EIE +2CB9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P KAPA +2CBB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P NI +2CBD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC NI +2CBF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC OOU +2CC1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER SAMPI +2CC3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CROSSED SHEI +2CC5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC SHEI +2CC7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC ESH +2CC9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER AKHMIMIC KHEI +2CCB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER DIALECT-P HORI +2CCD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HORI +2CCF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HA +2CD1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER L-SHAPED HA +2CD3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HEI +2CD5 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC HAT +2CD7 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC GANGIA +2CD9 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC DJA +2CDB ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD COPTIC SHIMA +2CDD ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN SHIMA +2CDF ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN NGI +2CE1 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN NYI +2CE3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER OLD NUBIAN WAU +2CEC ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI +2CEE ; Changes_When_Titlecased # L& COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF3 ; Changes_When_Titlecased # L& COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Titlecased # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Titlecased # L& GEORGIAN SMALL LETTER AEN +A641 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZEMLYA +A643 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZELO +A645 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED DZE +A647 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTA +A649 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DJERV +A64B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER MONOGRAPH UK +A64D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BROAD OMEGA +A64F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER NEUTRAL YER +A651 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YERU WITH BACK YER +A653 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED YAT +A655 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED YU +A657 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED A +A659 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CLOSED LITTLE YUS +A65B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BLENDED YUS +A65D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS +A65F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER YN +A661 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER REVERSED TSE +A663 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT DE +A665 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT EL +A667 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SOFT EM +A669 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER MONOCULAR O +A66B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER BINOCULAR O +A66D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A681 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DWE +A683 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZWE +A685 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER ZHWE +A687 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CCHE +A689 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DZZE +A68B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK +A68D ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TWE +A68F ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TSWE +A691 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TSSE +A693 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER TCHE +A695 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER HWE +A697 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER SHWE +A699 ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER DOUBLE O +A69B ; Changes_When_Titlecased # L& CYRILLIC SMALL LETTER CROSSED O +A723 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EGYPTOLOGICAL ALEF +A725 ; Changes_When_Titlecased # L& LATIN SMALL LETTER EGYPTOLOGICAL AIN +A727 ; Changes_When_Titlecased # L& LATIN SMALL LETTER HENG +A729 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TZ +A72B ; Changes_When_Titlecased # L& LATIN SMALL LETTER TRESILLO +A72D ; Changes_When_Titlecased # L& LATIN SMALL LETTER CUATRILLO +A72F ; Changes_When_Titlecased # L& LATIN SMALL LETTER CUATRILLO WITH COMMA +A733 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AA +A735 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AO +A737 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AU +A739 ; Changes_When_Titlecased # L& LATIN SMALL LETTER AV +A73B ; Changes_When_Titlecased # L& LATIN SMALL LETTER AV WITH HORIZONTAL BAR +A73D ; Changes_When_Titlecased # L& LATIN SMALL LETTER AY +A73F ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED C WITH DOT +A741 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH STROKE +A743 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH DIAGONAL STROKE +A745 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE +A747 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BROKEN L +A749 ; Changes_When_Titlecased # L& LATIN SMALL LETTER L WITH HIGH STROKE +A74B ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH LONG STROKE OVERLAY +A74D ; Changes_When_Titlecased # L& LATIN SMALL LETTER O WITH LOOP +A74F ; Changes_When_Titlecased # L& LATIN SMALL LETTER OO +A751 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER +A753 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH FLOURISH +A755 ; Changes_When_Titlecased # L& LATIN SMALL LETTER P WITH SQUIRREL TAIL +A757 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER +A759 ; Changes_When_Titlecased # L& LATIN SMALL LETTER Q WITH DIAGONAL STROKE +A75B ; Changes_When_Titlecased # L& LATIN SMALL LETTER R ROTUNDA +A75D ; Changes_When_Titlecased # L& LATIN SMALL LETTER RUM ROTUNDA +A75F ; Changes_When_Titlecased # L& LATIN SMALL LETTER V WITH DIAGONAL STROKE +A761 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VY +A763 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VISIGOTHIC Z +A765 ; Changes_When_Titlecased # L& LATIN SMALL LETTER THORN WITH STROKE +A767 ; Changes_When_Titlecased # L& LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER +A769 ; Changes_When_Titlecased # L& LATIN SMALL LETTER VEND +A76B ; Changes_When_Titlecased # L& LATIN SMALL LETTER ET +A76D ; Changes_When_Titlecased # L& LATIN SMALL LETTER IS +A76F ; Changes_When_Titlecased # L& LATIN SMALL LETTER CON +A77A ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR D +A77C ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR F +A77F ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED INSULAR G +A781 ; Changes_When_Titlecased # L& LATIN SMALL LETTER TURNED L +A783 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR R +A785 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR S +A787 ; Changes_When_Titlecased # L& LATIN SMALL LETTER INSULAR T +A78C ; Changes_When_Titlecased # L& LATIN SMALL LETTER SALTILLO +A791 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH DESCENDER +A793..A794 ; Changes_When_Titlecased # L& [2] LATIN SMALL LETTER C WITH BAR..LATIN SMALL LETTER C WITH PALATAL HOOK +A797 ; Changes_When_Titlecased # L& LATIN SMALL LETTER B WITH FLOURISH +A799 ; Changes_When_Titlecased # L& LATIN SMALL LETTER F WITH STROKE +A79B ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK AE +A79D ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK OE +A79F ; Changes_When_Titlecased # L& LATIN SMALL LETTER VOLAPUK UE +A7A1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER G WITH OBLIQUE STROKE +A7A3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER K WITH OBLIQUE STROKE +A7A5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER N WITH OBLIQUE STROKE +A7A7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER R WITH OBLIQUE STROKE +A7A9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH OBLIQUE STROKE +A7B5 ; Changes_When_Titlecased # L& LATIN SMALL LETTER BETA +A7B7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OMEGA +A7B9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER U WITH STROKE +A7BB ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL A +A7BD ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL I +A7BF ; Changes_When_Titlecased # L& LATIN SMALL LETTER GLOTTAL U +A7C1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER OLD POLISH O +A7C3 ; Changes_When_Titlecased # L& LATIN SMALL LETTER ANGLICANA W +A7C8 ; Changes_When_Titlecased # L& LATIN SMALL LETTER D WITH SHORT STROKE OVERLAY +A7CA ; Changes_When_Titlecased # L& LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D1 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CLOSED INSULAR G +A7D7 ; Changes_When_Titlecased # L& LATIN SMALL LETTER MIDDLE SCOTS S +A7D9 ; Changes_When_Titlecased # L& LATIN SMALL LETTER SIGMOID S +A7F6 ; Changes_When_Titlecased # L& LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Titlecased # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Titlecased # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Titlecased # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Titlecased # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF41..FF5A ; Changes_When_Titlecased # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10428..1044F ; Changes_When_Titlecased # L& [40] DESERET SMALL LETTER LONG I..DESERET SMALL LETTER EW +104D8..104FB ; Changes_When_Titlecased # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10597..105A1 ; Changes_When_Titlecased # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Titlecased # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Titlecased # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Titlecased # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10CC0..10CF2 ; Changes_When_Titlecased # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +118C0..118DF ; Changes_When_Titlecased # L& [32] WARANG CITI SMALL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E60..16E7F ; Changes_When_Titlecased # L& [32] MEDEFAIDRIN SMALL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E922..1E943 ; Changes_When_Titlecased # L& [34] ADLAM SMALL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 1452 + +# ================================================ + +# Derived Property: Changes_When_Casefolded (CWCF) +# Characters whose normalized forms are not stable under case folding. +# For more information, see D142 in Section 3.13, "Default Case Algorithms". +# Changes_When_Casefolded(X) is true when toCasefold(toNFD(X)) != toNFD(X) + +0041..005A ; Changes_When_Casefolded # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +00B5 ; Changes_When_Casefolded # L& MICRO SIGN +00C0..00D6 ; Changes_When_Casefolded # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00DF ; Changes_When_Casefolded # L& [8] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER SHARP S +0100 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH MACRON +0102 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE +0104 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH OGONEK +0106 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH ACUTE +0108 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CIRCUMFLEX +010A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH DOT ABOVE +010C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CARON +010E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CARON +0110 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH STROKE +0112 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON +0114 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH BREVE +0116 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOT ABOVE +0118 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH OGONEK +011A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CARON +011C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CIRCUMFLEX +011E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH BREVE +0120 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH DOT ABOVE +0122 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CEDILLA +0124 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CIRCUMFLEX +0126 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH STROKE +0128 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH TILDE +012A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH MACRON +012C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH BREVE +012E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH OGONEK +0130 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOT ABOVE +0132 ; Changes_When_Casefolded # L& LATIN CAPITAL LIGATURE IJ +0134 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER J WITH CIRCUMFLEX +0136 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH CEDILLA +0139 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH ACUTE +013B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CEDILLA +013D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CARON +013F ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH MIDDLE DOT +0141 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH STROKE +0143 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH ACUTE +0145 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CEDILLA +0147 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CARON +0149..014A ; Changes_When_Casefolded # L& [2] LATIN SMALL LETTER N PRECEDED BY APOSTROPHE..LATIN CAPITAL LETTER ENG +014C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON +014E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH BREVE +0150 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOUBLE ACUTE +0152 ; Changes_When_Casefolded # L& LATIN CAPITAL LIGATURE OE +0154 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH ACUTE +0156 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH CEDILLA +0158 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH CARON +015A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH ACUTE +015C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CIRCUMFLEX +015E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CEDILLA +0160 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CARON +0162 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CEDILLA +0164 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CARON +0166 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH STROKE +0168 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE +016A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH MACRON +016C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH BREVE +016E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH RING ABOVE +0170 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOUBLE ACUTE +0172 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH OGONEK +0174 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH CIRCUMFLEX +0176 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH CIRCUMFLEX +0178..0179 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE +017B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DOT ABOVE +017D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH CARON +017F ; Changes_When_Casefolded # L& LATIN SMALL LETTER LONG S +0181..0182 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR +0184 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TONE SIX +0186..0187 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK +0189..018B ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR +018E..0191 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK +0193..0194 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA +0196..0198 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK +019C..019D ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK +019F..01A0 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN +01A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OI +01A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH HOOK +01A6..01A7 ; Changes_When_Casefolded # L& [2] LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO +01A9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ESH +01AC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH HOOK +01AE..01AF ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN +01B1..01B3 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK +01B5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH STROKE +01B7..01B8 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED +01BC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TONE FIVE +01C4..01C5 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON +01C7..01C8 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER LJ..LATIN CAPITAL LETTER L WITH SMALL LETTER J +01CA..01CB ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER NJ..LATIN CAPITAL LETTER N WITH SMALL LETTER J +01CD ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CARON +01CF ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH CARON +01D1 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CARON +01D3 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH CARON +01D5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON +01D7 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE +01D9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON +01DB ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE +01DE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON +01E0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON +01E2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AE WITH MACRON +01E4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH STROKE +01E6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH CARON +01E8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH CARON +01EA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH OGONEK +01EC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH OGONEK AND MACRON +01EE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EZH WITH CARON +01F1..01F2 ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER D WITH SMALL LETTER Z +01F4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH ACUTE +01F6..01F8 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE +01FA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE +01FC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AE WITH ACUTE +01FE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH STROKE AND ACUTE +0200 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOUBLE GRAVE +0202 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH INVERTED BREVE +0204 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOUBLE GRAVE +0206 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH INVERTED BREVE +0208 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOUBLE GRAVE +020A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH INVERTED BREVE +020C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOUBLE GRAVE +020E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH INVERTED BREVE +0210 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOUBLE GRAVE +0212 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH INVERTED BREVE +0214 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOUBLE GRAVE +0216 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH INVERTED BREVE +0218 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH COMMA BELOW +021A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH COMMA BELOW +021C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER YOGH +021E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CARON +0220 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OU +0224 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH HOOK +0226 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT ABOVE +0228 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CEDILLA +022A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON +022C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND MACRON +022E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT ABOVE +0230 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON +0232 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH MACRON +023A..023B ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE +023D..023E ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE +0241 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL STOP +0243..0246 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE +0248 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER J WITH STROKE +024A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL +024C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH STROKE +024E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH STROKE +0345 ; Changes_When_Casefolded # Mn COMBINING GREEK YPOGEGRAMMENI +0370 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER HETA +0372 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER ARCHAIC SAMPI +0376 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA +037F ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Casefolded # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..038F ; Changes_When_Casefolded # L& [2] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS +0391..03A1 ; Changes_When_Casefolded # L& [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO +03A3..03AB ; Changes_When_Casefolded # L& [9] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA +03C2 ; Changes_When_Casefolded # L& GREEK SMALL LETTER FINAL SIGMA +03CF..03D1 ; Changes_When_Casefolded # L& [3] GREEK CAPITAL KAI SYMBOL..GREEK THETA SYMBOL +03D5..03D6 ; Changes_When_Casefolded # L& [2] GREEK PHI SYMBOL..GREEK PI SYMBOL +03D8 ; Changes_When_Casefolded # L& GREEK LETTER ARCHAIC KOPPA +03DA ; Changes_When_Casefolded # L& GREEK LETTER STIGMA +03DC ; Changes_When_Casefolded # L& GREEK LETTER DIGAMMA +03DE ; Changes_When_Casefolded # L& GREEK LETTER KOPPA +03E0 ; Changes_When_Casefolded # L& GREEK LETTER SAMPI +03E2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SHEI +03E4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER FEI +03E6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KHEI +03E8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER HORI +03EA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER GANGIA +03EC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SHIMA +03EE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DEI +03F0..03F1 ; Changes_When_Casefolded # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Changes_When_Casefolded # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F7 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER SHO +03F9..03FA ; Changes_When_Casefolded # L& [2] GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN +03FD..042F ; Changes_When_Casefolded # L& [51] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA +0460 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OMEGA +0462 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YAT +0464 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED E +0466 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER LITTLE YUS +0468 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS +046A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BIG YUS +046C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS +046E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KSI +0470 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PSI +0472 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER FITA +0474 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IZHITSA +0476 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT +0478 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER UK +047A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ROUND OMEGA +047C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OMEGA WITH TITLO +047E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER OT +0480 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOPPA +048A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHORT I WITH TAIL +048C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SEMISOFT SIGN +048E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ER WITH TICK +0490 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH UPTURN +0492 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE +0494 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK +0496 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER +0498 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZE WITH DESCENDER +049A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH DESCENDER +049C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE +049E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH STROKE +04A0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BASHKIR KA +04A2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH DESCENDER +04A4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE EN GHE +04A6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK +04A8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN HA +04AA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ES WITH DESCENDER +04AC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TE WITH DESCENDER +04AE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER STRAIGHT U +04B0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE +04B2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH DESCENDER +04B4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE TE TSE +04B6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH DESCENDER +04B8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE +04BA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHHA +04BC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE +04BE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER +04C0..04C1 ; Changes_When_Casefolded # L& [2] CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE +04C3 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KA WITH HOOK +04C5 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH TAIL +04C7 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH HOOK +04C9 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH TAIL +04CB ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KHAKASSIAN CHE +04CD ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EM WITH TAIL +04D0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER A WITH BREVE +04D2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER A WITH DIAERESIS +04D4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LIGATURE A IE +04D6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IE WITH BREVE +04D8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SCHWA +04DA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS +04DC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS +04DE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS +04E0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ABKHASIAN DZE +04E2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER I WITH MACRON +04E4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER I WITH DIAERESIS +04E6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER O WITH DIAERESIS +04E8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BARRED O +04EA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS +04EC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER E WITH DIAERESIS +04EE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH MACRON +04F0 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH DIAERESIS +04F2 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE +04F4 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS +04F6 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH DESCENDER +04F8 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS +04FA ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK +04FC ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH HOOK +04FE ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HA WITH STROKE +0500 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DE +0502 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DJE +0504 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI ZJE +0506 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI DZJE +0508 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI LJE +050A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI NJE +050C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI SJE +050E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER KOMI TJE +0510 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED ZE +0512 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH HOOK +0514 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER LHA +0516 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER RHA +0518 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YAE +051A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER QA +051C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER WE +051E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ALEUT KA +0520 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK +0522 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK +0524 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER PE WITH DESCENDER +0526 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER +0528 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK +052A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZZHE +052C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DCHE +052E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Casefolded # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0587 ; Changes_When_Casefolded # L& ARMENIAN SMALL LIGATURE ECH YIWN +10A0..10C5 ; Changes_When_Casefolded # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casefolded # L& GEORGIAN CAPITAL LETTER AEN +13F8..13FD ; Changes_When_Casefolded # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Casefolded # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; Changes_When_Casefolded # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Casefolded # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1E00 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH RING BELOW +1E02 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT ABOVE +1E04 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH DOT BELOW +1E06 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH LINE BELOW +1E08 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE +1E0A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH DOT ABOVE +1E0C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH DOT BELOW +1E0E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH LINE BELOW +1E10 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CEDILLA +1E12 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW +1E14 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON AND GRAVE +1E16 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH MACRON AND ACUTE +1E18 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW +1E1A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH TILDE BELOW +1E1C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE +1E1E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER F WITH DOT ABOVE +1E20 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH MACRON +1E22 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DOT ABOVE +1E24 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DOT BELOW +1E26 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DIAERESIS +1E28 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH CEDILLA +1E2A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH BREVE BELOW +1E2C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH TILDE BELOW +1E2E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE +1E30 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH ACUTE +1E32 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DOT BELOW +1E34 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH LINE BELOW +1E36 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOT BELOW +1E38 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON +1E3A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH LINE BELOW +1E3C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW +1E3E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH ACUTE +1E40 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH DOT ABOVE +1E42 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER M WITH DOT BELOW +1E44 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DOT ABOVE +1E46 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DOT BELOW +1E48 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH LINE BELOW +1E4A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW +1E4C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND ACUTE +1E4E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS +1E50 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON AND GRAVE +1E52 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH MACRON AND ACUTE +1E54 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH ACUTE +1E56 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH DOT ABOVE +1E58 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT ABOVE +1E5A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT BELOW +1E5C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON +1E5E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH LINE BELOW +1E60 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT ABOVE +1E62 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT BELOW +1E64 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE +1E66 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE +1E68 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE +1E6A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH DOT ABOVE +1E6C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH DOT BELOW +1E6E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH LINE BELOW +1E70 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW +1E72 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DIAERESIS BELOW +1E74 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE BELOW +1E76 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW +1E78 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH TILDE AND ACUTE +1E7A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS +1E7C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH TILDE +1E7E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH DOT BELOW +1E80 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH GRAVE +1E82 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH ACUTE +1E84 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DIAERESIS +1E86 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DOT ABOVE +1E88 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH DOT BELOW +1E8A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER X WITH DOT ABOVE +1E8C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER X WITH DIAERESIS +1E8E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH DOT ABOVE +1E90 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH CIRCUMFLEX +1E92 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DOT BELOW +1E94 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH LINE BELOW +1E9A..1E9B ; Changes_When_Casefolded # L& [2] LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SHARP S +1EA0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH DOT BELOW +1EA2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH HOOK ABOVE +1EA4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE +1EA6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE +1EA8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE +1EAA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE +1EAC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW +1EAE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND ACUTE +1EB0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND GRAVE +1EB2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE +1EB4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND TILDE +1EB6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW +1EB8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH DOT BELOW +1EBA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH HOOK ABOVE +1EBC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH TILDE +1EBE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE +1EC0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE +1EC2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE +1EC4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE +1EC6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW +1EC8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH HOOK ABOVE +1ECA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER I WITH DOT BELOW +1ECC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH DOT BELOW +1ECE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HOOK ABOVE +1ED0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE +1ED2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE +1ED4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE +1ED6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE +1ED8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW +1EDA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND ACUTE +1EDC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND GRAVE +1EDE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE +1EE0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND TILDE +1EE2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW +1EE4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH DOT BELOW +1EE6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HOOK ABOVE +1EE8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND ACUTE +1EEA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND GRAVE +1EEC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE +1EEE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND TILDE +1EF0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW +1EF2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH GRAVE +1EF4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH DOT BELOW +1EF6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH HOOK ABOVE +1EF8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH TILDE +1EFA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE-WELSH LL +1EFC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE-WELSH V +1EFE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Y WITH LOOP +1F08..1F0F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI +1F18..1F1D ; Changes_When_Casefolded # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F28..1F2F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI +1F38..1F3F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI +1F48..1F4D ; Changes_When_Casefolded # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F59 ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F ; Changes_When_Casefolded # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F68..1F6F ; Changes_When_Casefolded # L& [8] GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI +1F80..1FAF ; Changes_When_Casefolded # L& [48] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI +1FB2..1FB4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB7..1FBC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC7..1FCC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD8..1FDB ; Changes_When_Casefolded # L& [4] GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE8..1FEC ; Changes_When_Casefolded # L& [5] GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Changes_When_Casefolded # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF7..1FFC ; Changes_When_Casefolded # L& [6] GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Casefolded # L& OHM SIGN +212A..212B ; Changes_When_Casefolded # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Casefolded # L& TURNED CAPITAL F +2160..216F ; Changes_When_Casefolded # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +2183 ; Changes_When_Casefolded # L& ROMAN NUMERAL REVERSED ONE HUNDRED +24B6..24CF ; Changes_When_Casefolded # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +2C00..2C2F ; Changes_When_Casefolded # L& [48] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI +2C60 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH DOUBLE BAR +2C62..2C64 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL +2C67 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER H WITH DESCENDER +2C69 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DESCENDER +2C6B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Z WITH DESCENDER +2C6D..2C70 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA +2C72 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER W WITH HOOK +2C75 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER HALF H +2C7E..2C80 ; Changes_When_Casefolded # L& [3] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC CAPITAL LETTER ALFA +2C82 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER VIDA +2C84 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER GAMMA +2C86 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DALDA +2C88 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER EIE +2C8A ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SOU +2C8C ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER ZATA +2C8E ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER HATE +2C90 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER THETHE +2C92 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER IAUDA +2C94 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KAPA +2C96 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER LAULA +2C98 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER MI +2C9A ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER NI +2C9C ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KSI +2C9E ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER O +2CA0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER PI +2CA2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER RO +2CA4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SIMA +2CA6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER TAU +2CA8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER UA +2CAA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER FI +2CAC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER KHI +2CAE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER PSI +2CB0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OOU +2CB2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P ALEF +2CB4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC AIN +2CB6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE +2CB8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P KAPA +2CBA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P NI +2CBC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI +2CBE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC OOU +2CC0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER SAMPI +2CC2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CROSSED SHEI +2CC4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC SHEI +2CC6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC ESH +2CC8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER AKHMIMIC KHEI +2CCA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER DIALECT-P HORI +2CCC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HORI +2CCE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HA +2CD0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER L-SHAPED HA +2CD2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HEI +2CD4 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC HAT +2CD6 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC GANGIA +2CD8 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC DJA +2CDA ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD COPTIC SHIMA +2CDC ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN SHIMA +2CDE ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN NGI +2CE0 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN NYI +2CE2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER OLD NUBIAN WAU +2CEB ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI +2CED ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA +2CF2 ; Changes_When_Casefolded # L& COPTIC CAPITAL LETTER BOHAIRIC KHEI +A640 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZEMLYA +A642 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZELO +A644 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED DZE +A646 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTA +A648 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DJERV +A64A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER MONOGRAPH UK +A64C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BROAD OMEGA +A64E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER NEUTRAL YER +A650 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YERU WITH BACK YER +A652 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED YAT +A654 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED YU +A656 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED A +A658 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS +A65A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BLENDED YUS +A65C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS +A65E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER YN +A660 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER REVERSED TSE +A662 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT DE +A664 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT EL +A666 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SOFT EM +A668 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER MONOCULAR O +A66A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER BINOCULAR O +A66C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O +A680 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DWE +A682 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZWE +A684 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER ZHWE +A686 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CCHE +A688 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DZZE +A68A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK +A68C ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TWE +A68E ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TSWE +A690 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TSSE +A692 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER TCHE +A694 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER HWE +A696 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER SHWE +A698 ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER DOUBLE O +A69A ; Changes_When_Casefolded # L& CYRILLIC CAPITAL LETTER CROSSED O +A722 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF +A724 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER EGYPTOLOGICAL AIN +A726 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER HENG +A728 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TZ +A72A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TRESILLO +A72C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CUATRILLO +A72E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CUATRILLO WITH COMMA +A732 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AA +A734 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AO +A736 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AU +A738 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AV +A73A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR +A73C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER AY +A73E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED C WITH DOT +A740 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH STROKE +A742 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH DIAGONAL STROKE +A744 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE +A746 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER BROKEN L +A748 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER L WITH HIGH STROKE +A74A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY +A74C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER O WITH LOOP +A74E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OO +A750 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER +A752 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH FLOURISH +A754 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER P WITH SQUIRREL TAIL +A756 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER +A758 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE +A75A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R ROTUNDA +A75C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER RUM ROTUNDA +A75E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER V WITH DIAGONAL STROKE +A760 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VY +A762 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VISIGOTHIC Z +A764 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER THORN WITH STROKE +A766 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER +A768 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VEND +A76A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ET +A76C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER IS +A76E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CON +A779 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR D +A77B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR F +A77D..A77E ; Changes_When_Casefolded # L& [2] LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G +A780 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED L +A782 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR R +A784 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR S +A786 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER INSULAR T +A78B ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SALTILLO +A78D ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER TURNED H +A790 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH DESCENDER +A792 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER C WITH BAR +A796 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER B WITH FLOURISH +A798 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER F WITH STROKE +A79A ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK AE +A79C ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK OE +A79E ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER VOLAPUK UE +A7A0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER G WITH OBLIQUE STROKE +A7A2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER K WITH OBLIQUE STROKE +A7A4 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER N WITH OBLIQUE STROKE +A7A6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER R WITH OBLIQUE STROKE +A7A8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH OBLIQUE STROKE +A7AA..A7AE ; Changes_When_Casefolded # L& [5] LATIN CAPITAL LETTER H WITH HOOK..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7B4 ; Changes_When_Casefolded # L& [5] LATIN CAPITAL LETTER TURNED K..LATIN CAPITAL LETTER BETA +A7B6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OMEGA +A7B8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER U WITH STROKE +A7BA ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL A +A7BC ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL I +A7BE ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER GLOTTAL U +A7C0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER OLD POLISH O +A7C2 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER ANGLICANA W +A7C4..A7C7 ; Changes_When_Casefolded # L& [4] LATIN CAPITAL LETTER C WITH PALATAL HOOK..LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY +A7C9 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY +A7D0 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER CLOSED INSULAR G +A7D6 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER MIDDLE SCOTS S +A7D8 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER SIGMOID S +A7F5 ; Changes_When_Casefolded # L& LATIN CAPITAL LETTER REVERSED HALF H +AB70..ABBF ; Changes_When_Casefolded # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Casefolded # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Casefolded # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Changes_When_Casefolded # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +10400..10427 ; Changes_When_Casefolded # L& [40] DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW +104B0..104D3 ; Changes_When_Casefolded # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +10570..1057A ; Changes_When_Casefolded # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Casefolded # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Casefolded # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Casefolded # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10C80..10CB2 ; Changes_When_Casefolded # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +118A0..118BF ; Changes_When_Casefolded # L& [32] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI CAPITAL LETTER VIYO +16E40..16E5F ; Changes_When_Casefolded # L& [32] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN CAPITAL LETTER Y +1E900..1E921 ; Changes_When_Casefolded # L& [34] ADLAM CAPITAL LETTER ALIF..ADLAM CAPITAL LETTER SHA + +# Total code points: 1506 + +# ================================================ + +# Derived Property: Changes_When_Casemapped (CWCM) +# Characters whose normalized forms are not stable under case mapping. +# For more information, see D143 in Section 3.13, "Default Case Algorithms". +# Changes_When_Casemapped(X) is true when CWL(X), or CWT(X), or CWU(X) + +0041..005A ; Changes_When_Casemapped # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Changes_When_Casemapped # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00B5 ; Changes_When_Casemapped # L& MICRO SIGN +00C0..00D6 ; Changes_When_Casemapped # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Changes_When_Casemapped # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..0137 ; Changes_When_Casemapped # L& [64] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER K WITH CEDILLA +0139..018C ; Changes_When_Casemapped # L& [84] LATIN CAPITAL LETTER L WITH ACUTE..LATIN SMALL LETTER D WITH TOPBAR +018E..019A ; Changes_When_Casemapped # L& [13] LATIN CAPITAL LETTER REVERSED E..LATIN SMALL LETTER L WITH BAR +019C..01A9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER ESH +01AC..01B9 ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER T WITH HOOK..LATIN SMALL LETTER EZH REVERSED +01BC..01BD ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER TONE FIVE..LATIN SMALL LETTER TONE FIVE +01BF ; Changes_When_Casemapped # L& LATIN LETTER WYNN +01C4..0220 ; Changes_When_Casemapped # L& [93] LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER N WITH LONG RIGHT LEG +0222..0233 ; Changes_When_Casemapped # L& [18] LATIN CAPITAL LETTER OU..LATIN SMALL LETTER Y WITH MACRON +023A..0254 ; Changes_When_Casemapped # L& [27] LATIN CAPITAL LETTER A WITH STROKE..LATIN SMALL LETTER OPEN O +0256..0257 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER D WITH TAIL..LATIN SMALL LETTER D WITH HOOK +0259 ; Changes_When_Casemapped # L& LATIN SMALL LETTER SCHWA +025B..025C ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER OPEN E..LATIN SMALL LETTER REVERSED OPEN E +0260..0261 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER G WITH HOOK..LATIN SMALL LETTER SCRIPT G +0263 ; Changes_When_Casemapped # L& LATIN SMALL LETTER GAMMA +0265..0266 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER TURNED H..LATIN SMALL LETTER H WITH HOOK +0268..026C ; Changes_When_Casemapped # L& [5] LATIN SMALL LETTER I WITH STROKE..LATIN SMALL LETTER L WITH BELT +026F ; Changes_When_Casemapped # L& LATIN SMALL LETTER TURNED M +0271..0272 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER M WITH HOOK..LATIN SMALL LETTER N WITH LEFT HOOK +0275 ; Changes_When_Casemapped # L& LATIN SMALL LETTER BARRED O +027D ; Changes_When_Casemapped # L& LATIN SMALL LETTER R WITH TAIL +0280 ; Changes_When_Casemapped # L& LATIN LETTER SMALL CAPITAL R +0282..0283 ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER S WITH HOOK..LATIN SMALL LETTER ESH +0287..028C ; Changes_When_Casemapped # L& [6] LATIN SMALL LETTER TURNED T..LATIN SMALL LETTER TURNED V +0292 ; Changes_When_Casemapped # L& LATIN SMALL LETTER EZH +029D..029E ; Changes_When_Casemapped # L& [2] LATIN SMALL LETTER J WITH CROSSED-TAIL..LATIN SMALL LETTER TURNED K +0345 ; Changes_When_Casemapped # Mn COMBINING GREEK YPOGEGRAMMENI +0370..0373 ; Changes_When_Casemapped # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0376..0377 ; Changes_When_Casemapped # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; Changes_When_Casemapped # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER YOT +0386 ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Changes_When_Casemapped # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Changes_When_Casemapped # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03D1 ; Changes_When_Casemapped # L& [47] GREEK CAPITAL LETTER SIGMA..GREEK THETA SYMBOL +03D5..03F5 ; Changes_When_Casemapped # L& [33] GREEK PHI SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F7..03FB ; Changes_When_Casemapped # L& [5] GREEK CAPITAL LETTER SHO..GREEK SMALL LETTER SAN +03FD..0481 ; Changes_When_Casemapped # L& [133] GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC SMALL LETTER KOPPA +048A..052F ; Changes_When_Casemapped # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Changes_When_Casemapped # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0561..0587 ; Changes_When_Casemapped # L& [39] ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LIGATURE ECH YIWN +10A0..10C5 ; Changes_When_Casemapped # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER YN +10CD ; Changes_When_Casemapped # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Changes_When_Casemapped # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FD..10FF ; Changes_When_Casemapped # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +13A0..13F5 ; Changes_When_Casemapped # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Changes_When_Casemapped # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1C80..1C88 ; Changes_When_Casemapped # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; Changes_When_Casemapped # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Changes_When_Casemapped # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1D79 ; Changes_When_Casemapped # L& LATIN SMALL LETTER INSULAR G +1D7D ; Changes_When_Casemapped # L& LATIN SMALL LETTER P WITH STROKE +1D8E ; Changes_When_Casemapped # L& LATIN SMALL LETTER Z WITH PALATAL HOOK +1E00..1E9B ; Changes_When_Casemapped # L& [156] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER LONG S WITH DOT ABOVE +1E9E ; Changes_When_Casemapped # L& LATIN CAPITAL LETTER SHARP S +1EA0..1F15 ; Changes_When_Casemapped # L& [118] LATIN CAPITAL LETTER A WITH DOT BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Changes_When_Casemapped # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Changes_When_Casemapped # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Changes_When_Casemapped # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Changes_When_Casemapped # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Changes_When_Casemapped # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Changes_When_Casemapped # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Changes_When_Casemapped # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; Changes_When_Casemapped # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; Changes_When_Casemapped # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; Changes_When_Casemapped # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Changes_When_Casemapped # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; Changes_When_Casemapped # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; Changes_When_Casemapped # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Changes_When_Casemapped # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2126 ; Changes_When_Casemapped # L& OHM SIGN +212A..212B ; Changes_When_Casemapped # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Changes_When_Casemapped # L& TURNED CAPITAL F +214E ; Changes_When_Casemapped # L& TURNED SMALL F +2160..217F ; Changes_When_Casemapped # Nl [32] ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +2183..2184 ; Changes_When_Casemapped # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +24B6..24E9 ; Changes_When_Casemapped # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C00..2C70 ; Changes_When_Casemapped # L& [113] GLAGOLITIC CAPITAL LETTER AZU..LATIN CAPITAL LETTER TURNED ALPHA +2C72..2C73 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER W WITH HOOK..LATIN SMALL LETTER W WITH HOOK +2C75..2C76 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER HALF H..LATIN SMALL LETTER HALF H +2C7E..2CE3 ; Changes_When_Casemapped # L& [102] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SMALL LETTER OLD NUBIAN WAU +2CEB..2CEE ; Changes_When_Casemapped # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Changes_When_Casemapped # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; Changes_When_Casemapped # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER YN +2D2D ; Changes_When_Casemapped # L& GEORGIAN SMALL LETTER AEN +A640..A66D ; Changes_When_Casemapped # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A680..A69B ; Changes_When_Casemapped # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A722..A72F ; Changes_When_Casemapped # L& [14] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CUATRILLO WITH COMMA +A732..A76F ; Changes_When_Casemapped # L& [62] LATIN CAPITAL LETTER AA..LATIN SMALL LETTER CON +A779..A787 ; Changes_When_Casemapped # L& [15] LATIN CAPITAL LETTER INSULAR D..LATIN SMALL LETTER INSULAR T +A78B..A78D ; Changes_When_Casemapped # L& [3] LATIN CAPITAL LETTER SALTILLO..LATIN CAPITAL LETTER TURNED H +A790..A794 ; Changes_When_Casemapped # L& [5] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER C WITH PALATAL HOOK +A796..A7AE ; Changes_When_Casemapped # L& [25] LATIN CAPITAL LETTER B WITH FLOURISH..LATIN CAPITAL LETTER SMALL CAPITAL I +A7B0..A7CA ; Changes_When_Casemapped # L& [27] LATIN CAPITAL LETTER TURNED K..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D6..A7D9 ; Changes_When_Casemapped # L& [4] LATIN CAPITAL LETTER MIDDLE SCOTS S..LATIN SMALL LETTER SIGMOID S +A7F5..A7F6 ; Changes_When_Casemapped # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +AB53 ; Changes_When_Casemapped # L& LATIN SMALL LETTER CHI +AB70..ABBF ; Changes_When_Casemapped # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +FB00..FB06 ; Changes_When_Casemapped # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Changes_When_Casemapped # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FF21..FF3A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Changes_When_Casemapped # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10400..1044F ; Changes_When_Casemapped # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +104B0..104D3 ; Changes_When_Casemapped # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Changes_When_Casemapped # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10570..1057A ; Changes_When_Casemapped # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Changes_When_Casemapped # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Changes_When_Casemapped # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Changes_When_Casemapped # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Changes_When_Casemapped # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Changes_When_Casemapped # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Changes_When_Casemapped # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Changes_When_Casemapped # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10C80..10CB2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Changes_When_Casemapped # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +118A0..118DF ; Changes_When_Casemapped # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +16E40..16E7F ; Changes_When_Casemapped # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +1E900..1E943 ; Changes_When_Casemapped # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA + +# Total code points: 2927 + +# ================================================ + +# Derived Property: ID_Start +# Characters that can start an identifier. +# Generated from: +# Lu + Ll + Lt + Lm + Lo + Nl +# + Other_ID_Start +# - Pattern_Syntax +# - Pattern_White_Space +# NOTE: See UAX #31 for more information + +0041..005A ; ID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; ID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; ID_Start # Lo FEMININE ORDINAL INDICATOR +00B5 ; ID_Start # L& MICRO SIGN +00BA ; ID_Start # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ID_Start # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; ID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; ID_Start # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; ID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; ID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; ID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; ID_Start # Lm MODIFIER LETTER VOICING +02EE ; ID_Start # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; ID_Start # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; ID_Start # Lm GREEK NUMERAL SIGN +0376..0377 ; ID_Start # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; ID_Start # Lm GREEK YPOGEGRAMMENI +037B..037D ; ID_Start # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; ID_Start # L& GREEK CAPITAL LETTER YOT +0386 ; ID_Start # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; ID_Start # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ID_Start # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ID_Start # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; ID_Start # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ID_Start # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; ID_Start # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; ID_Start # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ID_Start # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; ID_Start # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05D0..05EA ; ID_Start # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; ID_Start # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0620..063F ; ID_Start # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; ID_Start # Lm ARABIC TATWEEL +0641..064A ; ID_Start # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; ID_Start # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; ID_Start # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ID_Start # Lo ARABIC LETTER AE +06E5..06E6 ; ID_Start # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; ID_Start # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; ID_Start # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ID_Start # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; ID_Start # Lo SYRIAC LETTER ALAPH +0712..072F ; ID_Start # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; ID_Start # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; ID_Start # Lo THAANA LETTER NAA +07CA..07EA ; ID_Start # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; ID_Start # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; ID_Start # Lm NKO LAJANYALAN +0800..0815 ; ID_Start # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; ID_Start # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; ID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; ID_Start # Lm SAMARITAN MODIFIER LETTER I +0840..0858 ; ID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; ID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; ID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; ID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; ID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; ID_Start # Lm ARABIC SMALL FARSI YEH +0904..0939 ; ID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; ID_Start # Lo DEVANAGARI SIGN AVAGRAHA +0950 ; ID_Start # Lo DEVANAGARI OM +0958..0961 ; ID_Start # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0971 ; ID_Start # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; ID_Start # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; ID_Start # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ID_Start # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ID_Start # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ID_Start # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ID_Start # Lo BENGALI LETTER LA +09B6..09B9 ; ID_Start # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; ID_Start # Lo BENGALI SIGN AVAGRAHA +09CE ; ID_Start # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; ID_Start # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; ID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; ID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ID_Start # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ID_Start # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ID_Start # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ID_Start # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; ID_Start # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ID_Start # Lo GURMUKHI LETTER FA +0A72..0A74 ; ID_Start # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; ID_Start # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ID_Start # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ID_Start # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ID_Start # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ID_Start # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ID_Start # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; ID_Start # Lo GUJARATI SIGN AVAGRAHA +0AD0 ; ID_Start # Lo GUJARATI OM +0AE0..0AE1 ; ID_Start # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; ID_Start # Lo GUJARATI LETTER ZHA +0B05..0B0C ; ID_Start # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ID_Start # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ID_Start # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ID_Start # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ID_Start # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ID_Start # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; ID_Start # Lo ORIYA SIGN AVAGRAHA +0B5C..0B5D ; ID_Start # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ID_Start # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; ID_Start # Lo ORIYA LETTER WA +0B83 ; ID_Start # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ID_Start # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ID_Start # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ID_Start # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ID_Start # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ID_Start # Lo TAMIL LETTER JA +0B9E..0B9F ; ID_Start # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ID_Start # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ID_Start # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ID_Start # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; ID_Start # Lo TAMIL OM +0C05..0C0C ; ID_Start # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ID_Start # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ID_Start # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; ID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; ID_Start # Lo TELUGU SIGN AVAGRAHA +0C58..0C5A ; ID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; ID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; ID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; ID_Start # Lo KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; ID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ID_Start # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ID_Start # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; ID_Start # Lo KANNADA SIGN AVAGRAHA +0CDD..0CDE ; ID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; ID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; ID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; ID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ID_Start # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; ID_Start # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; ID_Start # Lo MALAYALAM SIGN AVAGRAHA +0D4E ; ID_Start # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ID_Start # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; ID_Start # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; ID_Start # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; ID_Start # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ID_Start # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ID_Start # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ID_Start # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ID_Start # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0E01..0E30 ; ID_Start # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; ID_Start # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E40..0E45 ; ID_Start # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; ID_Start # Lm THAI CHARACTER MAIYAMOK +0E81..0E82 ; ID_Start # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; ID_Start # Lo LAO LETTER KHO TAM +0E86..0E8A ; ID_Start # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; ID_Start # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; ID_Start # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; ID_Start # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; ID_Start # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; ID_Start # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; ID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; ID_Start # Lm LAO KO LA +0EDC..0EDF ; ID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; ID_Start # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; ID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; ID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; ID_Start # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +1000..102A ; ID_Start # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +103F ; ID_Start # Lo MYANMAR LETTER GREAT SA +1050..1055 ; ID_Start # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +105A..105D ; ID_Start # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; ID_Start # Lo MYANMAR LETTER SGAW KAREN SHA +1065..1066 ; ID_Start # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +106E..1070 ; ID_Start # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; ID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +108E ; ID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA +10A0..10C5 ; ID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Start # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; ID_Start # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ID_Start # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; ID_Start # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; ID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ID_Start # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ID_Start # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ID_Start # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ID_Start # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ID_Start # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ID_Start # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ID_Start # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ID_Start # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; ID_Start # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; ID_Start # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; ID_Start # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; ID_Start # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; ID_Start # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; ID_Start # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ID_Start # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ID_Start # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; ID_Start # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; ID_Start # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; ID_Start # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; ID_Start # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; ID_Start # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ID_Start # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; ID_Start # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17D7 ; ID_Start # Lm KHMER SIGN LEK TOO +17DC ; ID_Start # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; ID_Start # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ID_Start # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; ID_Start # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; ID_Start # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; ID_Start # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; ID_Start # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; ID_Start # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; ID_Start # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1950..196D ; ID_Start # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; ID_Start # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; ID_Start # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; ID_Start # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; ID_Start # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A20..1A54 ; ID_Start # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1AA7 ; ID_Start # Lm TAI THAM SIGN MAI YAMOK +1B05..1B33 ; ID_Start # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; ID_Start # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; ID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; ID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; ID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; ID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; ID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; ID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; ID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; ID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; ID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; ID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; ID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; ID_Start # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; ID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; ID_Start # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; ID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; ID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; ID_Start # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; ID_Start # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ID_Start # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ID_Start # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ID_Start # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ID_Start # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ID_Start # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ID_Start # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ID_Start # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ID_Start # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ID_Start # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ID_Start # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ID_Start # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ID_Start # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ID_Start # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ID_Start # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; ID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; ID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; ID_Start # L& DOUBLE-STRUCK CAPITAL C +2107 ; ID_Start # L& EULER CONSTANT +210A..2113 ; ID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ID_Start # L& DOUBLE-STRUCK CAPITAL N +2118 ; ID_Start # Sm SCRIPT CAPITAL P +2119..211D ; ID_Start # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ID_Start # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ID_Start # L& OHM SIGN +2128 ; ID_Start # L& BLACK-LETTER CAPITAL Z +212A..212D ; ID_Start # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; ID_Start # So ESTIMATED SYMBOL +212F..2134 ; ID_Start # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; ID_Start # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ID_Start # L& INFORMATION SOURCE +213C..213F ; ID_Start # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ID_Start # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; ID_Start # L& TURNED SMALL F +2160..2182 ; ID_Start # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; ID_Start # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; ID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; ID_Start # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; ID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; ID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; ID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; ID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; ID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; ID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; ID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ID_Start # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; ID_Start # Lm IDEOGRAPHIC ITERATION MARK +3006 ; ID_Start # Lo IDEOGRAPHIC CLOSING MARK +3007 ; ID_Start # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; ID_Start # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; ID_Start # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; ID_Start # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; ID_Start # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ID_Start # Lo MASU MARK +3041..3096 ; ID_Start # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309B..309C ; ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; ID_Start # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; ID_Start # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; ID_Start # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; ID_Start # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; ID_Start # Lo KATAKANA DIGRAPH KOTO +3105..312F ; ID_Start # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; ID_Start # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; ID_Start # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; ID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; ID_Start # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; ID_Start # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; ID_Start # Lm YI SYLLABLE WU +A016..A48C ; ID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; ID_Start # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; ID_Start # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; ID_Start # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; ID_Start # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; ID_Start # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; ID_Start # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; ID_Start # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; ID_Start # Lo CYRILLIC LETTER MULTIOCULAR O +A67F ; ID_Start # Lm CYRILLIC PAYEROK +A680..A69B ; ID_Start # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; ID_Start # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; ID_Start # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; ID_Start # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; ID_Start # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; ID_Start # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; ID_Start # Lm MODIFIER LETTER US +A771..A787 ; ID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; ID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; ID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; ID_Start # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; ID_Start # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; ID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; ID_Start # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; ID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; ID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; ID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; ID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; ID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; ID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; ID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; ID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; ID_Start # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; ID_Start # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; ID_Start # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; ID_Start # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; ID_Start # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; ID_Start # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; ID_Start # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; ID_Start # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; ID_Start # Lo [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; ID_Start # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; ID_Start # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9CF ; ID_Start # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; ID_Start # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; ID_Start # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; ID_Start # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; ID_Start # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; ID_Start # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; ID_Start # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; ID_Start # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA60..AA6F ; ID_Start # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; ID_Start # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; ID_Start # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; ID_Start # Lo MYANMAR LETTER AITON RA +AA7E..AAAF ; ID_Start # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; ID_Start # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; ID_Start # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; ID_Start # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; ID_Start # Lo TAI VIET TONE MAI NUENG +AAC2 ; ID_Start # Lo TAI VIET TONE MAI SONG +AADB..AADC ; ID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; ID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; ID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB01..AB06 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; ID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; ID_Start # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; ID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; ID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; ID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; ID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; ID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; ID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; ID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; ID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; ID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; ID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; ID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; ID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; ID_Start # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; ID_Start # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; ID_Start # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; ID_Start # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; ID_Start # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; ID_Start # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; ID_Start # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; ID_Start # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ID_Start # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ID_Start # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ID_Start # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ID_Start # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE70..FE74 ; ID_Start # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ID_Start # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; ID_Start # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; ID_Start # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; ID_Start # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; ID_Start # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; ID_Start # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; ID_Start # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; ID_Start # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ID_Start # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ID_Start # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ID_Start # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ID_Start # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ID_Start # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ID_Start # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ID_Start # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ID_Start # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ID_Start # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; ID_Start # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; ID_Start # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; ID_Start # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; ID_Start # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; ID_Start # Nl GOTHIC LETTER NINETY +10342..10349 ; ID_Start # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; ID_Start # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; ID_Start # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; ID_Start # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ID_Start # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ID_Start # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ID_Start # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ID_Start # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ID_Start # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; ID_Start # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ID_Start # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; ID_Start # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; ID_Start # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; ID_Start # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; ID_Start # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; ID_Start # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; ID_Start # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; ID_Start # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; ID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; ID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; ID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; ID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; ID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; ID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; ID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; ID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; ID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; ID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ID_Start # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ID_Start # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ID_Start # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; ID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; ID_Start # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; ID_Start # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; ID_Start # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; ID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; ID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; ID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; ID_Start # Lo KHAROSHTHI LETTER A +10A10..10A13 ; ID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; ID_Start # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; ID_Start # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; ID_Start # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; ID_Start # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; ID_Start # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; ID_Start # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; ID_Start # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; ID_Start # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; ID_Start # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; ID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; ID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; ID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; ID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; ID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; ID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; ID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; ID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; ID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; ID_Start # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; ID_Start # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; ID_Start # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; ID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; ID_Start # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; ID_Start # Lo BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; ID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; ID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; ID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; ID_Start # Lo CHAKMA LETTER LHAA +11147 ; ID_Start # Lo CHAKMA LETTER VAA +11150..11172 ; ID_Start # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; ID_Start # Lo MAHAJANI LIGATURE SHRI +11183..111B2 ; ID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; ID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; ID_Start # Lo SHARADA EKAM +111DC ; ID_Start # Lo SHARADA HEADSTROKE +11200..11211 ; ID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; ID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +11280..11286 ; ID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; ID_Start # Lo MULTANI LETTER GHA +1128A..1128D ; ID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; ID_Start # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; ID_Start # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; ID_Start # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; ID_Start # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; ID_Start # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; ID_Start # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; ID_Start # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; ID_Start # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; ID_Start # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; ID_Start # Lo GRANTHA SIGN AVAGRAHA +11350 ; ID_Start # Lo GRANTHA OM +1135D..11361 ; ID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; ID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; ID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; ID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; ID_Start # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; ID_Start # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; ID_Start # Lo TIRHUTA OM +11580..115AE ; ID_Start # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; ID_Start # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; ID_Start # Lo [48] MODI LETTER A..MODI LETTER LLA +11644 ; ID_Start # Lo MODI SIGN HUVA +11680..116AA ; ID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; ID_Start # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; ID_Start # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; ID_Start # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; ID_Start # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +118A0..118DF ; ID_Start # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; ID_Start # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; ID_Start # Lo DIVES AKURU LETTER O +1190C..11913 ; ID_Start # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; ID_Start # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; ID_Start # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; ID_Start # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; ID_Start # Lo DIVES AKURU INITIAL RA +119A0..119A7 ; ID_Start # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; ID_Start # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; ID_Start # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; ID_Start # Lo NANDINAGARI HEADSTROKE +11A00 ; ID_Start # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; ID_Start # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; ID_Start # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; ID_Start # Lo SOYOMBO LETTER A +11A5C..11A89 ; ID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; ID_Start # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; ID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; ID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; ID_Start # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; ID_Start # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; ID_Start # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ID_Start # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ID_Start # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; ID_Start # Lo MASARAM GONDI REPHA +11D60..11D65 ; ID_Start # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; ID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; ID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; ID_Start # Lo GUNJALA GONDI OM +11EE0..11EF2 ; ID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11FB0 ; ID_Start # Lo LISU LETTER YHA +12000..12399 ; ID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; ID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; ID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; ID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342E ; ID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; ID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; ID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; ID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; ID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; ID_Start # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; ID_Start # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; ID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; ID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; ID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; ID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; ID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; ID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; ID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; ID_Start # Lm OLD CHINESE ITERATION MARK +17000..187F7 ; ID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; ID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; ID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; ID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; ID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; ID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; ID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; ID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; ID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; ID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; ID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; ID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; ID_Start # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; ID_Start # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1D400..1D454 ; ID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ID_Start # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ID_Start # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ID_Start # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ID_Start # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ID_Start # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ID_Start # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ID_Start # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ID_Start # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ID_Start # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ID_Start # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ID_Start # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ID_Start # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ID_Start # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ID_Start # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ID_Start # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ID_Start # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ID_Start # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ID_Start # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ID_Start # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ID_Start # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ID_Start # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ID_Start # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; ID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; ID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; ID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; ID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E100..1E12C ; ID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; ID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; ID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; ID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; ID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E7E0..1E7E6 ; ID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; ID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; ID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; ID_Start # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; ID_Start # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; ID_Start # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; ID_Start # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; ID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +20000..2A6DF ; ID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; ID_Start # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; ID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; ID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; ID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; ID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; ID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 131997 + +# ================================================ + +# Derived Property: ID_Continue +# Characters that can continue an identifier. +# Generated from: +# ID_Start +# + Mn + Mc + Nd + Pc +# + Other_ID_Continue +# - Pattern_Syntax +# - Pattern_White_Space +# NOTE: See UAX #31 for more information + +0030..0039 ; ID_Continue # Nd [10] DIGIT ZERO..DIGIT NINE +0041..005A ; ID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005F ; ID_Continue # Pc LOW LINE +0061..007A ; ID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; ID_Continue # Lo FEMININE ORDINAL INDICATOR +00B5 ; ID_Continue # L& MICRO SIGN +00B7 ; ID_Continue # Po MIDDLE DOT +00BA ; ID_Continue # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; ID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; ID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; ID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; ID_Continue # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; ID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; ID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; ID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; ID_Continue # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; ID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; ID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; ID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; ID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; ID_Continue # Lm MODIFIER LETTER VOICING +02EE ; ID_Continue # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0300..036F ; ID_Continue # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; ID_Continue # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; ID_Continue # Lm GREEK NUMERAL SIGN +0376..0377 ; ID_Continue # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; ID_Continue # Lm GREEK YPOGEGRAMMENI +037B..037D ; ID_Continue # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; ID_Continue # L& GREEK CAPITAL LETTER YOT +0386 ; ID_Continue # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; ID_Continue # Po GREEK ANO TELEIA +0388..038A ; ID_Continue # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; ID_Continue # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; ID_Continue # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; ID_Continue # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; ID_Continue # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0483..0487 ; ID_Continue # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +048A..052F ; ID_Continue # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; ID_Continue # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; ID_Continue # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; ID_Continue # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0591..05BD ; ID_Continue # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; ID_Continue # Mn HEBREW POINT RAFE +05C1..05C2 ; ID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; ID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; ID_Continue # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; ID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; ID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; ID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; ID_Continue # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; ID_Continue # Lm ARABIC TATWEEL +0641..064A ; ID_Continue # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; ID_Continue # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; ID_Continue # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066E..066F ; ID_Continue # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; ID_Continue # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; ID_Continue # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; ID_Continue # Lo ARABIC LETTER AE +06D6..06DC ; ID_Continue # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; ID_Continue # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; ID_Continue # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; ID_Continue # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; ID_Continue # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; ID_Continue # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; ID_Continue # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; ID_Continue # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; ID_Continue # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; ID_Continue # Lo SYRIAC LETTER ALAPH +0711 ; ID_Continue # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; ID_Continue # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; ID_Continue # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..07A5 ; ID_Continue # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; ID_Continue # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; ID_Continue # Lo THAANA LETTER NAA +07C0..07C9 ; ID_Continue # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; ID_Continue # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; ID_Continue # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; ID_Continue # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; ID_Continue # Lm NKO LAJANYALAN +07FD ; ID_Continue # Mn NKO DANTAYALAN +0800..0815 ; ID_Continue # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; ID_Continue # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; ID_Continue # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; ID_Continue # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; ID_Continue # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; ID_Continue # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; ID_Continue # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; ID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0840..0858 ; ID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; ID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0860..086A ; ID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; ID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; ID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0898..089F ; ID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; ID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; ID_Continue # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; ID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; ID_Continue # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +0903 ; ID_Continue # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; ID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; ID_Continue # Mn DEVANAGARI VOWEL SIGN OE +093B ; ID_Continue # Mc DEVANAGARI VOWEL SIGN OOE +093C ; ID_Continue # Mn DEVANAGARI SIGN NUKTA +093D ; ID_Continue # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; ID_Continue # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; ID_Continue # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; ID_Continue # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; ID_Continue # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; ID_Continue # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; ID_Continue # Lo DEVANAGARI OM +0951..0957 ; ID_Continue # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; ID_Continue # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; ID_Continue # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; ID_Continue # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; ID_Continue # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; ID_Continue # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; ID_Continue # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; ID_Continue # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; ID_Continue # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; ID_Continue # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; ID_Continue # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; ID_Continue # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; ID_Continue # Lo BENGALI LETTER LA +09B6..09B9 ; ID_Continue # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; ID_Continue # Mn BENGALI SIGN NUKTA +09BD ; ID_Continue # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; ID_Continue # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; ID_Continue # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; ID_Continue # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; ID_Continue # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; ID_Continue # Mn BENGALI SIGN VIRAMA +09CE ; ID_Continue # Lo BENGALI LETTER KHANDA TA +09D7 ; ID_Continue # Mc BENGALI AU LENGTH MARK +09DC..09DD ; ID_Continue # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; ID_Continue # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; ID_Continue # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; ID_Continue # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; ID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; ID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA +09FE ; ID_Continue # Mn BENGALI SANDHI MARK +0A01..0A02 ; ID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; ID_Continue # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; ID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; ID_Continue # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; ID_Continue # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; ID_Continue # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; ID_Continue # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; ID_Continue # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; ID_Continue # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; ID_Continue # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; ID_Continue # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; ID_Continue # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; ID_Continue # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; ID_Continue # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; ID_Continue # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; ID_Continue # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; ID_Continue # Lo GURMUKHI LETTER FA +0A66..0A6F ; ID_Continue # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; ID_Continue # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; ID_Continue # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; ID_Continue # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; ID_Continue # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; ID_Continue # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; ID_Continue # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; ID_Continue # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; ID_Continue # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; ID_Continue # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; ID_Continue # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; ID_Continue # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; ID_Continue # Mn GUJARATI SIGN NUKTA +0ABD ; ID_Continue # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; ID_Continue # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; ID_Continue # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; ID_Continue # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; ID_Continue # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; ID_Continue # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; ID_Continue # Mn GUJARATI SIGN VIRAMA +0AD0 ; ID_Continue # Lo GUJARATI OM +0AE0..0AE1 ; ID_Continue # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; ID_Continue # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; ID_Continue # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF9 ; ID_Continue # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; ID_Continue # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; ID_Continue # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; ID_Continue # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; ID_Continue # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; ID_Continue # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; ID_Continue # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; ID_Continue # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; ID_Continue # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; ID_Continue # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; ID_Continue # Mn ORIYA SIGN NUKTA +0B3D ; ID_Continue # Lo ORIYA SIGN AVAGRAHA +0B3E ; ID_Continue # Mc ORIYA VOWEL SIGN AA +0B3F ; ID_Continue # Mn ORIYA VOWEL SIGN I +0B40 ; ID_Continue # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; ID_Continue # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; ID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; ID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; ID_Continue # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; ID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; ID_Continue # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; ID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; ID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; ID_Continue # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; ID_Continue # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B71 ; ID_Continue # Lo ORIYA LETTER WA +0B82 ; ID_Continue # Mn TAMIL SIGN ANUSVARA +0B83 ; ID_Continue # Lo TAMIL SIGN VISARGA +0B85..0B8A ; ID_Continue # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; ID_Continue # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; ID_Continue # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; ID_Continue # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; ID_Continue # Lo TAMIL LETTER JA +0B9E..0B9F ; ID_Continue # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; ID_Continue # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; ID_Continue # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; ID_Continue # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; ID_Continue # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; ID_Continue # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; ID_Continue # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; ID_Continue # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; ID_Continue # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; ID_Continue # Mn TAMIL SIGN VIRAMA +0BD0 ; ID_Continue # Lo TAMIL OM +0BD7 ; ID_Continue # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; ID_Continue # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C00 ; ID_Continue # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; ID_Continue # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; ID_Continue # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; ID_Continue # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; ID_Continue # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; ID_Continue # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; ID_Continue # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; ID_Continue # Mn TELUGU SIGN NUKTA +0C3D ; ID_Continue # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; ID_Continue # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; ID_Continue # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; ID_Continue # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; ID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; ID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; ID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; ID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; ID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; ID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; ID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C80 ; ID_Continue # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; ID_Continue # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; ID_Continue # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; ID_Continue # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; ID_Continue # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; ID_Continue # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; ID_Continue # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; ID_Continue # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; ID_Continue # Mn KANNADA SIGN NUKTA +0CBD ; ID_Continue # Lo KANNADA SIGN AVAGRAHA +0CBE ; ID_Continue # Mc KANNADA VOWEL SIGN AA +0CBF ; ID_Continue # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; ID_Continue # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; ID_Continue # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; ID_Continue # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; ID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; ID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; ID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; ID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; ID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; ID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; ID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; ID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D00..0D01 ; ID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; ID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; ID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; ID_Continue # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; ID_Continue # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; ID_Continue # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; ID_Continue # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; ID_Continue # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; ID_Continue # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; ID_Continue # Mn MALAYALAM SIGN VIRAMA +0D4E ; ID_Continue # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; ID_Continue # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; ID_Continue # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; ID_Continue # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; ID_Continue # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; ID_Continue # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D7A..0D7F ; ID_Continue # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; ID_Continue # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; ID_Continue # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; ID_Continue # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; ID_Continue # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; ID_Continue # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; ID_Continue # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; ID_Continue # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; ID_Continue # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; ID_Continue # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; ID_Continue # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; ID_Continue # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; ID_Continue # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; ID_Continue # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; ID_Continue # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; ID_Continue # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; ID_Continue # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; ID_Continue # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; ID_Continue # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; ID_Continue # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; ID_Continue # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; ID_Continue # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E50..0E59 ; ID_Continue # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E81..0E82 ; ID_Continue # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; ID_Continue # Lo LAO LETTER KHO TAM +0E86..0E8A ; ID_Continue # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; ID_Continue # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; ID_Continue # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; ID_Continue # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; ID_Continue # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; ID_Continue # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; ID_Continue # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; ID_Continue # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; ID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; ID_Continue # Lm LAO KO LA +0EC8..0ECD ; ID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0ED0..0ED9 ; ID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; ID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; ID_Continue # Lo TIBETAN SYLLABLE OM +0F18..0F19 ; ID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F20..0F29 ; ID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F35 ; ID_Continue # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; ID_Continue # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; ID_Continue # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; ID_Continue # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; ID_Continue # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; ID_Continue # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; ID_Continue # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; ID_Continue # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; ID_Continue # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; ID_Continue # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; ID_Continue # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; ID_Continue # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; ID_Continue # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; ID_Continue # Mn TIBETAN SYMBOL PADMA GDAN +1000..102A ; ID_Continue # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; ID_Continue # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; ID_Continue # Mc MYANMAR VOWEL SIGN E +1032..1037 ; ID_Continue # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; ID_Continue # Mc MYANMAR SIGN VISARGA +1039..103A ; ID_Continue # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; ID_Continue # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; ID_Continue # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; ID_Continue # Lo MYANMAR LETTER GREAT SA +1040..1049 ; ID_Continue # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1050..1055 ; ID_Continue # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; ID_Continue # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; ID_Continue # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; ID_Continue # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; ID_Continue # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; ID_Continue # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; ID_Continue # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; ID_Continue # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; ID_Continue # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; ID_Continue # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; ID_Continue # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; ID_Continue # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; ID_Continue # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; ID_Continue # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; ID_Continue # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; ID_Continue # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; ID_Continue # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; ID_Continue # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; ID_Continue # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; ID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; ID_Continue # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; ID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; ID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; ID_Continue # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; ID_Continue # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; ID_Continue # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; ID_Continue # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; ID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; ID_Continue # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; ID_Continue # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; ID_Continue # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; ID_Continue # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; ID_Continue # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; ID_Continue # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; ID_Continue # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; ID_Continue # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1369..1371 ; ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +1380..138F ; ID_Continue # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; ID_Continue # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; ID_Continue # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; ID_Continue # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; ID_Continue # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; ID_Continue # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; ID_Continue # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; ID_Continue # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; ID_Continue # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; ID_Continue # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; ID_Continue # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; ID_Continue # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; ID_Continue # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; ID_Continue # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; ID_Continue # Mc HANUNOO SIGN PAMUDPOD +1740..1751 ; ID_Continue # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; ID_Continue # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; ID_Continue # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; ID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; ID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; ID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; ID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; ID_Continue # Mc KHMER VOWEL SIGN AA +17B7..17BD ; ID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; ID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; ID_Continue # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; ID_Continue # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; ID_Continue # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; ID_Continue # Lm KHMER SIGN LEK TOO +17DC ; ID_Continue # Lo KHMER SIGN AVAKRAHASANYA +17DD ; ID_Continue # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; ID_Continue # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +180B..180D ; ID_Continue # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; ID_Continue # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; ID_Continue # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; ID_Continue # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; ID_Continue # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; ID_Continue # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; ID_Continue # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; ID_Continue # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; ID_Continue # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; ID_Continue # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; ID_Continue # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; ID_Continue # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; ID_Continue # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; ID_Continue # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; ID_Continue # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; ID_Continue # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; ID_Continue # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; ID_Continue # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; ID_Continue # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; ID_Continue # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; ID_Continue # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1946..194F ; ID_Continue # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; ID_Continue # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; ID_Continue # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; ID_Continue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; ID_Continue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; ID_Continue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; ID_Continue # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; ID_Continue # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; ID_Continue # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; ID_Continue # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; ID_Continue # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; ID_Continue # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; ID_Continue # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; ID_Continue # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; ID_Continue # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; ID_Continue # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; ID_Continue # Mn TAI THAM SIGN SAKOT +1A61 ; ID_Continue # Mc TAI THAM VOWEL SIGN A +1A62 ; ID_Continue # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; ID_Continue # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; ID_Continue # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; ID_Continue # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; ID_Continue # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; ID_Continue # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; ID_Continue # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; ID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA7 ; ID_Continue # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; ID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; ID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; ID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; ID_Continue # Mc BALINESE SIGN BISAH +1B05..1B33 ; ID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; ID_Continue # Mn BALINESE SIGN REREKAN +1B35 ; ID_Continue # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; ID_Continue # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; ID_Continue # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; ID_Continue # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; ID_Continue # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; ID_Continue # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; ID_Continue # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; ID_Continue # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; ID_Continue # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B6B..1B73 ; ID_Continue # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; ID_Continue # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; ID_Continue # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; ID_Continue # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; ID_Continue # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; ID_Continue # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; ID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; ID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; ID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; ID_Continue # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; ID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; ID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; ID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE6 ; ID_Continue # Mn BATAK SIGN TOMPI +1BE7 ; ID_Continue # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; ID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; ID_Continue # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; ID_Continue # Mn BATAK VOWEL SIGN KARO O +1BEE ; ID_Continue # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; ID_Continue # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; ID_Continue # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C00..1C23 ; ID_Continue # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; ID_Continue # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; ID_Continue # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; ID_Continue # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; ID_Continue # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C40..1C49 ; ID_Continue # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; ID_Continue # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; ID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; ID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; ID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; ID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; ID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; ID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CD0..1CD2 ; ID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; ID_Continue # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; ID_Continue # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; ID_Continue # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; ID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; ID_Continue # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; ID_Continue # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; ID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; ID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; ID_Continue # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; ID_Continue # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; ID_Continue # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; ID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; ID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; ID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; ID_Continue # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; ID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; ID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; ID_Continue # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1F15 ; ID_Continue # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; ID_Continue # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; ID_Continue # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; ID_Continue # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; ID_Continue # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; ID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; ID_Continue # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; ID_Continue # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; ID_Continue # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; ID_Continue # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; ID_Continue # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; ID_Continue # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; ID_Continue # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; ID_Continue # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; ID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; ID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; ID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +203F..2040 ; ID_Continue # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; ID_Continue # Pc INVERTED UNDERTIE +2071 ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; ID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; ID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; ID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; ID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; ID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2102 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL C +2107 ; ID_Continue # L& EULER CONSTANT +210A..2113 ; ID_Continue # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL N +2118 ; ID_Continue # Sm SCRIPT CAPITAL P +2119..211D ; ID_Continue # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; ID_Continue # L& DOUBLE-STRUCK CAPITAL Z +2126 ; ID_Continue # L& OHM SIGN +2128 ; ID_Continue # L& BLACK-LETTER CAPITAL Z +212A..212D ; ID_Continue # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; ID_Continue # So ESTIMATED SYMBOL +212F..2134 ; ID_Continue # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; ID_Continue # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; ID_Continue # L& INFORMATION SOURCE +213C..213F ; ID_Continue # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; ID_Continue # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; ID_Continue # L& TURNED SMALL F +2160..2182 ; ID_Continue # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; ID_Continue # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; ID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; ID_Continue # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; ID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; ID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; ID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; ID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; ID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; ID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; ID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; ID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; ID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; ID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; ID_Continue # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; ID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; ID_Continue # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +3005 ; ID_Continue # Lm IDEOGRAPHIC ITERATION MARK +3006 ; ID_Continue # Lo IDEOGRAPHIC CLOSING MARK +3007 ; ID_Continue # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; ID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; ID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; ID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; ID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; ID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; ID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; ID_Continue # Lo MASU MARK +3041..3096 ; ID_Continue # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; ID_Continue # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; ID_Continue # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; ID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; ID_Continue # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; ID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; ID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; ID_Continue # Lo KATAKANA DIGRAPH KOTO +3105..312F ; ID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; ID_Continue # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; ID_Continue # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; ID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; ID_Continue # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; ID_Continue # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; ID_Continue # Lm YI SYLLABLE WU +A016..A48C ; ID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; ID_Continue # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; ID_Continue # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; ID_Continue # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; ID_Continue # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; ID_Continue # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; ID_Continue # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; ID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; ID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; ID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; ID_Continue # Mn COMBINING CYRILLIC VZMET +A674..A67D ; ID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; ID_Continue # Lm CYRILLIC PAYEROK +A680..A69B ; ID_Continue # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; ID_Continue # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; ID_Continue # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; ID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; ID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; ID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; ID_Continue # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; ID_Continue # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; ID_Continue # Lm MODIFIER LETTER US +A771..A787 ; ID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; ID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; ID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; ID_Continue # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; ID_Continue # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; ID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; ID_Continue # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; ID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; ID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; ID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; ID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; ID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; ID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; ID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; ID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; ID_Continue # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; ID_Continue # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; ID_Continue # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; ID_Continue # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; ID_Continue # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; ID_Continue # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; ID_Continue # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; ID_Continue # Mc SYLOTI NAGRI VOWEL SIGN OO +A82C ; ID_Continue # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A840..A873 ; ID_Continue # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; ID_Continue # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; ID_Continue # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; ID_Continue # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; ID_Continue # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8D0..A8D9 ; ID_Continue # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; ID_Continue # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; ID_Continue # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; ID_Continue # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; ID_Continue # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; ID_Continue # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; ID_Continue # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; ID_Continue # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; ID_Continue # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A930..A946 ; ID_Continue # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; ID_Continue # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; ID_Continue # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A960..A97C ; ID_Continue # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; ID_Continue # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; ID_Continue # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; ID_Continue # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; ID_Continue # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; ID_Continue # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; ID_Continue # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; ID_Continue # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; ID_Continue # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; ID_Continue # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9CF ; ID_Continue # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; ID_Continue # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9E0..A9E4 ; ID_Continue # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; ID_Continue # Mn MYANMAR SIGN SHAN SAW +A9E6 ; ID_Continue # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; ID_Continue # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; ID_Continue # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; ID_Continue # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; ID_Continue # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; ID_Continue # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; ID_Continue # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; ID_Continue # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; ID_Continue # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; ID_Continue # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; ID_Continue # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; ID_Continue # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; ID_Continue # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; ID_Continue # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; ID_Continue # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; ID_Continue # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA60..AA6F ; ID_Continue # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; ID_Continue # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; ID_Continue # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; ID_Continue # Lo MYANMAR LETTER AITON RA +AA7B ; ID_Continue # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; ID_Continue # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; ID_Continue # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; ID_Continue # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; ID_Continue # Mn TAI VIET MAI KANG +AAB1 ; ID_Continue # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; ID_Continue # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; ID_Continue # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; ID_Continue # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; ID_Continue # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; ID_Continue # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; ID_Continue # Lo TAI VIET TONE MAI NUENG +AAC1 ; ID_Continue # Mn TAI VIET TONE MAI THO +AAC2 ; ID_Continue # Lo TAI VIET TONE MAI SONG +AADB..AADC ; ID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; ID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; ID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; ID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; ID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; ID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; ID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; ID_Continue # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; ID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; ID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; ID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; ID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; ID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; ID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; ID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; ID_Continue # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; ID_Continue # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; ID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; ID_Continue # Mc MEETEI MAYEK LUM IYEK +ABED ; ID_Continue # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; ID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; ID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; ID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; ID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; ID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; ID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; ID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; ID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; ID_Continue # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; ID_Continue # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; ID_Continue # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; ID_Continue # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; ID_Continue # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; ID_Continue # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; ID_Continue # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; ID_Continue # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; ID_Continue # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FD3D ; ID_Continue # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; ID_Continue # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; ID_Continue # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDFB ; ID_Continue # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FE00..FE0F ; ID_Continue # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; ID_Continue # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE33..FE34 ; ID_Continue # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; ID_Continue # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE70..FE74 ; ID_Continue # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; ID_Continue # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF10..FF19 ; ID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF3A ; ID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3F ; ID_Continue # Pc FULLWIDTH LOW LINE +FF41..FF5A ; ID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; ID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; ID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; ID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; ID_Continue # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; ID_Continue # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; ID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; ID_Continue # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; ID_Continue # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; ID_Continue # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; ID_Continue # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; ID_Continue # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; ID_Continue # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; ID_Continue # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; ID_Continue # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +101FD ; ID_Continue # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; ID_Continue # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; ID_Continue # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; ID_Continue # Mn COPTIC EPACT THOUSANDS MARK +10300..1031F ; ID_Continue # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; ID_Continue # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; ID_Continue # Nl GOTHIC LETTER NINETY +10342..10349 ; ID_Continue # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; ID_Continue # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; ID_Continue # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; ID_Continue # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; ID_Continue # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; ID_Continue # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; ID_Continue # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; ID_Continue # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; ID_Continue # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; ID_Continue # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; ID_Continue # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; ID_Continue # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; ID_Continue # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; ID_Continue # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; ID_Continue # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; ID_Continue # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; ID_Continue # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; ID_Continue # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; ID_Continue # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; ID_Continue # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; ID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; ID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; ID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; ID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; ID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; ID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; ID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; ID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; ID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; ID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; ID_Continue # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; ID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; ID_Continue # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; ID_Continue # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; ID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; ID_Continue # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; ID_Continue # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; ID_Continue # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; ID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; ID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; ID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; ID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; ID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; ID_Continue # Lo KHAROSHTHI LETTER A +10A01..10A03 ; ID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; ID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; ID_Continue # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; ID_Continue # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; ID_Continue # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; ID_Continue # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; ID_Continue # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; ID_Continue # Mn KHAROSHTHI VIRAMA +10A60..10A7C ; ID_Continue # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; ID_Continue # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; ID_Continue # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; ID_Continue # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; ID_Continue # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10B00..10B35 ; ID_Continue # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; ID_Continue # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; ID_Continue # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; ID_Continue # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; ID_Continue # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; ID_Continue # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; ID_Continue # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; ID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; ID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; ID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E80..10EA9 ; ID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; ID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; ID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; ID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; ID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; ID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; ID_Continue # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F70..10F81 ; ID_Continue # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; ID_Continue # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10FB0..10FC4 ; ID_Continue # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; ID_Continue # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; ID_Continue # Mc BRAHMI SIGN CANDRABINDU +11001 ; ID_Continue # Mn BRAHMI SIGN ANUSVARA +11002 ; ID_Continue # Mc BRAHMI SIGN VISARGA +11003..11037 ; ID_Continue # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; ID_Continue # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11066..1106F ; ID_Continue # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; ID_Continue # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; ID_Continue # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; ID_Continue # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; ID_Continue # Lo BRAHMI LETTER OLD TAMIL LLA +1107F..11081 ; ID_Continue # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +11082 ; ID_Continue # Mc KAITHI SIGN VISARGA +11083..110AF ; ID_Continue # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; ID_Continue # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; ID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; ID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; ID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; ID_Continue # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; ID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; ID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; ID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; ID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; ID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; ID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; ID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; ID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11144 ; ID_Continue # Lo CHAKMA LETTER LHAA +11145..11146 ; ID_Continue # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; ID_Continue # Lo CHAKMA LETTER VAA +11150..11172 ; ID_Continue # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; ID_Continue # Mn MAHAJANI SIGN NUKTA +11176 ; ID_Continue # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; ID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; ID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; ID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; ID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; ID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; ID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; ID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C9..111CC ; ID_Continue # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CE ; ID_Continue # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; ID_Continue # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; ID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; ID_Continue # Lo SHARADA EKAM +111DC ; ID_Continue # Lo SHARADA HEADSTROKE +11200..11211 ; ID_Continue # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; ID_Continue # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; ID_Continue # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; ID_Continue # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; ID_Continue # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; ID_Continue # Mn KHOJKI SIGN ANUSVARA +11235 ; ID_Continue # Mc KHOJKI SIGN VIRAMA +11236..11237 ; ID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; ID_Continue # Mn KHOJKI SIGN SUKUN +11280..11286 ; ID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; ID_Continue # Lo MULTANI LETTER GHA +1128A..1128D ; ID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; ID_Continue # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; ID_Continue # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; ID_Continue # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; ID_Continue # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; ID_Continue # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; ID_Continue # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; ID_Continue # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; ID_Continue # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; ID_Continue # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; ID_Continue # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; ID_Continue # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; ID_Continue # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; ID_Continue # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; ID_Continue # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; ID_Continue # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; ID_Continue # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; ID_Continue # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; ID_Continue # Mn GRANTHA VOWEL SIGN II +11341..11344 ; ID_Continue # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; ID_Continue # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; ID_Continue # Lo GRANTHA OM +11357 ; ID_Continue # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; ID_Continue # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; ID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; ID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; ID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434 ; ID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; ID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; ID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; ID_Continue # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; ID_Continue # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; ID_Continue # Mc NEWA SIGN VISARGA +11446 ; ID_Continue # Mn NEWA SIGN NUKTA +11447..1144A ; ID_Continue # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +11450..11459 ; ID_Continue # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145E ; ID_Continue # Mn NEWA SANDHI MARK +1145F..11461 ; ID_Continue # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; ID_Continue # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; ID_Continue # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; ID_Continue # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; ID_Continue # Mc TIRHUTA VOWEL SIGN E +114BA ; ID_Continue # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; ID_Continue # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; ID_Continue # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; ID_Continue # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; ID_Continue # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; ID_Continue # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; ID_Continue # Lo TIRHUTA OM +114D0..114D9 ; ID_Continue # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; ID_Continue # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; ID_Continue # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; ID_Continue # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; ID_Continue # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; ID_Continue # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; ID_Continue # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; ID_Continue # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115D8..115DB ; ID_Continue # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; ID_Continue # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; ID_Continue # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; ID_Continue # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; ID_Continue # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; ID_Continue # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; ID_Continue # Mn MODI SIGN ANUSVARA +1163E ; ID_Continue # Mc MODI SIGN VISARGA +1163F..11640 ; ID_Continue # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11644 ; ID_Continue # Lo MODI SIGN HUVA +11650..11659 ; ID_Continue # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; ID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; ID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; ID_Continue # Mc TAKRI SIGN VISARGA +116AD ; ID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; ID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; ID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; ID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; ID_Continue # Mn TAKRI SIGN NUKTA +116B8 ; ID_Continue # Lo TAKRI LETTER ARCHAIC KHA +116C0..116C9 ; ID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; ID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; ID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; ID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; ID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; ID_Continue # Mc AHOM VOWEL SIGN E +11727..1172B ; ID_Continue # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; ID_Continue # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +11740..11746 ; ID_Continue # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; ID_Continue # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; ID_Continue # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; ID_Continue # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; ID_Continue # Mc DOGRA SIGN VISARGA +11839..1183A ; ID_Continue # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +118A0..118DF ; ID_Continue # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; ID_Continue # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118FF..11906 ; ID_Continue # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; ID_Continue # Lo DIVES AKURU LETTER O +1190C..11913 ; ID_Continue # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; ID_Continue # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; ID_Continue # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; ID_Continue # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; ID_Continue # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; ID_Continue # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; ID_Continue # Mc DIVES AKURU SIGN HALANTA +1193E ; ID_Continue # Mn DIVES AKURU VIRAMA +1193F ; ID_Continue # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; ID_Continue # Mc DIVES AKURU MEDIAL YA +11941 ; ID_Continue # Lo DIVES AKURU INITIAL RA +11942 ; ID_Continue # Mc DIVES AKURU MEDIAL RA +11943 ; ID_Continue # Mn DIVES AKURU SIGN NUKTA +11950..11959 ; ID_Continue # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; ID_Continue # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; ID_Continue # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; ID_Continue # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; ID_Continue # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; ID_Continue # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; ID_Continue # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; ID_Continue # Mn NANDINAGARI SIGN VIRAMA +119E1 ; ID_Continue # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; ID_Continue # Lo NANDINAGARI HEADSTROKE +119E4 ; ID_Continue # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; ID_Continue # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; ID_Continue # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; ID_Continue # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; ID_Continue # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; ID_Continue # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; ID_Continue # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; ID_Continue # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; ID_Continue # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; ID_Continue # Lo SOYOMBO LETTER A +11A51..11A56 ; ID_Continue # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; ID_Continue # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; ID_Continue # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; ID_Continue # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; ID_Continue # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; ID_Continue # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; ID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9D ; ID_Continue # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; ID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; ID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; ID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; ID_Continue # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; ID_Continue # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; ID_Continue # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; ID_Continue # Mc BHAIKSUKI SIGN VISARGA +11C3F ; ID_Continue # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; ID_Continue # Lo BHAIKSUKI SIGN AVAGRAHA +11C50..11C59 ; ID_Continue # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C72..11C8F ; ID_Continue # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; ID_Continue # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; ID_Continue # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; ID_Continue # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; ID_Continue # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; ID_Continue # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; ID_Continue # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; ID_Continue # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; ID_Continue # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; ID_Continue # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; ID_Continue # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; ID_Continue # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; ID_Continue # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; ID_Continue # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; ID_Continue # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; ID_Continue # Lo MASARAM GONDI REPHA +11D47 ; ID_Continue # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; ID_Continue # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; ID_Continue # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; ID_Continue # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; ID_Continue # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; ID_Continue # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; ID_Continue # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; ID_Continue # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; ID_Continue # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; ID_Continue # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; ID_Continue # Mn GUNJALA GONDI VIRAMA +11D98 ; ID_Continue # Lo GUNJALA GONDI OM +11DA0..11DA9 ; ID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; ID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; ID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; ID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11FB0 ; ID_Continue # Lo LISU LETTER YHA +12000..12399 ; ID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; ID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; ID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; ID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342E ; ID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; ID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; ID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; ID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; ID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A70..16ABE ; ID_Continue # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; ID_Continue # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; ID_Continue # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; ID_Continue # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B00..16B2F ; ID_Continue # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; ID_Continue # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; ID_Continue # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B50..16B59 ; ID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B63..16B77 ; ID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; ID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; ID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; ID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; ID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; ID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; ID_Continue # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; ID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; ID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; ID_Continue # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; ID_Continue # Lm OLD CHINESE ITERATION MARK +16FE4 ; ID_Continue # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; ID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; ID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; ID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; ID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; ID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; ID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; ID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; ID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; ID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; ID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; ID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; ID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; ID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; ID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; ID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9D..1BC9E ; ID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; ID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; ID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; ID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; ID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; ID_Continue # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; ID_Continue # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; ID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; ID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; ID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D400..1D454 ; ID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; ID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; ID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; ID_Continue # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; ID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; ID_Continue # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; ID_Continue # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; ID_Continue # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; ID_Continue # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; ID_Continue # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; ID_Continue # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; ID_Continue # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; ID_Continue # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; ID_Continue # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; ID_Continue # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; ID_Continue # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; ID_Continue # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; ID_Continue # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; ID_Continue # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; ID_Continue # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; ID_Continue # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; ID_Continue # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; ID_Continue # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; ID_Continue # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; ID_Continue # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; ID_Continue # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; ID_Continue # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; ID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; ID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; ID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DA00..1DA36 ; ID_Continue # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; ID_Continue # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; ID_Continue # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; ID_Continue # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; ID_Continue # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; ID_Continue # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; ID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; ID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; ID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E000..1E006 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; ID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; ID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; ID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; ID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E100..1E12C ; ID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; ID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; ID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; ID_Continue # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; ID_Continue # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; ID_Continue # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; ID_Continue # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; ID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; ID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; ID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E7E0..1E7E6 ; ID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; ID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; ID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; ID_Continue # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; ID_Continue # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8D0..1E8D6 ; ID_Continue # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; ID_Continue # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; ID_Continue # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; ID_Continue # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; ID_Continue # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1EE00..1EE03 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; ID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; ID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; ID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; ID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; ID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; ID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; ID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; ID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; ID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; ID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; ID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; ID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; ID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1FBF0..1FBF9 ; ID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; ID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; ID_Continue # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; ID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; ID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; ID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; ID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; ID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 135072 + +# ================================================ + +# Derived Property: XID_Start +# ID_Start modified for closure under NFKx +# Modified as described in UAX #15 +# NOTE: Does NOT remove the non-NFKx characters. +# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) +# NOTE: See UAX #31 for more information + +0041..005A ; XID_Start # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; XID_Start # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; XID_Start # Lo FEMININE ORDINAL INDICATOR +00B5 ; XID_Start # L& MICRO SIGN +00BA ; XID_Start # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; XID_Start # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; XID_Start # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; XID_Start # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; XID_Start # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; XID_Start # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; XID_Start # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; XID_Start # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; XID_Start # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; XID_Start # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; XID_Start # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; XID_Start # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; XID_Start # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; XID_Start # Lm MODIFIER LETTER VOICING +02EE ; XID_Start # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0370..0373 ; XID_Start # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; XID_Start # Lm GREEK NUMERAL SIGN +0376..0377 ; XID_Start # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; XID_Start # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; XID_Start # L& GREEK CAPITAL LETTER YOT +0386 ; XID_Start # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; XID_Start # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; XID_Start # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; XID_Start # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; XID_Start # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; XID_Start # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +048A..052F ; XID_Start # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; XID_Start # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; XID_Start # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; XID_Start # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +05D0..05EA ; XID_Start # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; XID_Start # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0620..063F ; XID_Start # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; XID_Start # Lm ARABIC TATWEEL +0641..064A ; XID_Start # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +066E..066F ; XID_Start # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; XID_Start # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; XID_Start # Lo ARABIC LETTER AE +06E5..06E6 ; XID_Start # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EE..06EF ; XID_Start # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06FA..06FC ; XID_Start # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; XID_Start # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; XID_Start # Lo SYRIAC LETTER ALAPH +0712..072F ; XID_Start # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; XID_Start # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; XID_Start # Lo THAANA LETTER NAA +07CA..07EA ; XID_Start # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; XID_Start # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; XID_Start # Lm NKO LAJANYALAN +0800..0815 ; XID_Start # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; XID_Start # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; XID_Start # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; XID_Start # Lm SAMARITAN MODIFIER LETTER I +0840..0858 ; XID_Start # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0860..086A ; XID_Start # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; XID_Start # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; XID_Start # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; XID_Start # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; XID_Start # Lm ARABIC SMALL FARSI YEH +0904..0939 ; XID_Start # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093D ; XID_Start # Lo DEVANAGARI SIGN AVAGRAHA +0950 ; XID_Start # Lo DEVANAGARI OM +0958..0961 ; XID_Start # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0971 ; XID_Start # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; XID_Start # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0985..098C ; XID_Start # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; XID_Start # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; XID_Start # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; XID_Start # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; XID_Start # Lo BENGALI LETTER LA +09B6..09B9 ; XID_Start # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; XID_Start # Lo BENGALI SIGN AVAGRAHA +09CE ; XID_Start # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; XID_Start # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; XID_Start # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09F0..09F1 ; XID_Start # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; XID_Start # Lo BENGALI LETTER VEDIC ANUSVARA +0A05..0A0A ; XID_Start # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; XID_Start # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; XID_Start # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; XID_Start # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; XID_Start # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; XID_Start # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; XID_Start # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A59..0A5C ; XID_Start # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; XID_Start # Lo GURMUKHI LETTER FA +0A72..0A74 ; XID_Start # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A85..0A8D ; XID_Start # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; XID_Start # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; XID_Start # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; XID_Start # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; XID_Start # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; XID_Start # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; XID_Start # Lo GUJARATI SIGN AVAGRAHA +0AD0 ; XID_Start # Lo GUJARATI OM +0AE0..0AE1 ; XID_Start # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AF9 ; XID_Start # Lo GUJARATI LETTER ZHA +0B05..0B0C ; XID_Start # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; XID_Start # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; XID_Start # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; XID_Start # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; XID_Start # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; XID_Start # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; XID_Start # Lo ORIYA SIGN AVAGRAHA +0B5C..0B5D ; XID_Start # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; XID_Start # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B71 ; XID_Start # Lo ORIYA LETTER WA +0B83 ; XID_Start # Lo TAMIL SIGN VISARGA +0B85..0B8A ; XID_Start # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; XID_Start # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; XID_Start # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; XID_Start # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; XID_Start # Lo TAMIL LETTER JA +0B9E..0B9F ; XID_Start # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; XID_Start # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; XID_Start # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; XID_Start # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BD0 ; XID_Start # Lo TAMIL OM +0C05..0C0C ; XID_Start # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; XID_Start # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; XID_Start # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; XID_Start # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; XID_Start # Lo TELUGU SIGN AVAGRAHA +0C58..0C5A ; XID_Start # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; XID_Start # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; XID_Start # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C80 ; XID_Start # Lo KANNADA SIGN SPACING CANDRABINDU +0C85..0C8C ; XID_Start # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; XID_Start # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; XID_Start # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; XID_Start # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; XID_Start # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; XID_Start # Lo KANNADA SIGN AVAGRAHA +0CDD..0CDE ; XID_Start # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; XID_Start # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CF1..0CF2 ; XID_Start # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D04..0D0C ; XID_Start # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; XID_Start # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; XID_Start # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; XID_Start # Lo MALAYALAM SIGN AVAGRAHA +0D4E ; XID_Start # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; XID_Start # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D5F..0D61 ; XID_Start # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D7A..0D7F ; XID_Start # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D85..0D96 ; XID_Start # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; XID_Start # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; XID_Start # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; XID_Start # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; XID_Start # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0E01..0E30 ; XID_Start # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32 ; XID_Start # Lo THAI CHARACTER SARA AA +0E40..0E45 ; XID_Start # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; XID_Start # Lm THAI CHARACTER MAIYAMOK +0E81..0E82 ; XID_Start # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; XID_Start # Lo LAO LETTER KHO TAM +0E86..0E8A ; XID_Start # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; XID_Start # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; XID_Start # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; XID_Start # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2 ; XID_Start # Lo LAO VOWEL SIGN AA +0EBD ; XID_Start # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; XID_Start # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; XID_Start # Lm LAO KO LA +0EDC..0EDF ; XID_Start # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; XID_Start # Lo TIBETAN SYLLABLE OM +0F40..0F47 ; XID_Start # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; XID_Start # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F88..0F8C ; XID_Start # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +1000..102A ; XID_Start # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +103F ; XID_Start # Lo MYANMAR LETTER GREAT SA +1050..1055 ; XID_Start # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +105A..105D ; XID_Start # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; XID_Start # Lo MYANMAR LETTER SGAW KAREN SHA +1065..1066 ; XID_Start # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +106E..1070 ; XID_Start # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; XID_Start # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +108E ; XID_Start # Lo MYANMAR LETTER RUMAI PALAUNG FA +10A0..10C5 ; XID_Start # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Start # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Start # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; XID_Start # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; XID_Start # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; XID_Start # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; XID_Start # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; XID_Start # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; XID_Start # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; XID_Start # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; XID_Start # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; XID_Start # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; XID_Start # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; XID_Start # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; XID_Start # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1380..138F ; XID_Start # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; XID_Start # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; XID_Start # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; XID_Start # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; XID_Start # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; XID_Start # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; XID_Start # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; XID_Start # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; XID_Start # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; XID_Start # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +171F..1731 ; XID_Start # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1740..1751 ; XID_Start # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; XID_Start # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; XID_Start # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; XID_Start # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17D7 ; XID_Start # Lm KHMER SIGN LEK TOO +17DC ; XID_Start # Lo KHMER SIGN AVAKRAHASANYA +1820..1842 ; XID_Start # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; XID_Start # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; XID_Start # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; XID_Start # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; XID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; XID_Start # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; XID_Start # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; XID_Start # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; XID_Start # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1950..196D ; XID_Start # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; XID_Start # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; XID_Start # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; XID_Start # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +1A00..1A16 ; XID_Start # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A20..1A54 ; XID_Start # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1AA7 ; XID_Start # Lm TAI THAM SIGN MAI YAMOK +1B05..1B33 ; XID_Start # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B45..1B4C ; XID_Start # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B83..1BA0 ; XID_Start # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BAE..1BAF ; XID_Start # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BBA..1BE5 ; XID_Start # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1C00..1C23 ; XID_Start # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C4D..1C4F ; XID_Start # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C5A..1C77 ; XID_Start # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; XID_Start # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; XID_Start # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; XID_Start # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; XID_Start # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CE9..1CEC ; XID_Start # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; XID_Start # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; XID_Start # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CFA ; XID_Start # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; XID_Start # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; XID_Start # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Start # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; XID_Start # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; XID_Start # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; XID_Start # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; XID_Start # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; XID_Start # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; XID_Start # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; XID_Start # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; XID_Start # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; XID_Start # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; XID_Start # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; XID_Start # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; XID_Start # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; XID_Start # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; XID_Start # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; XID_Start # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; XID_Start # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; XID_Start # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; XID_Start # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; XID_Start # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; XID_Start # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +2071 ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; XID_Start # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; XID_Start # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2102 ; XID_Start # L& DOUBLE-STRUCK CAPITAL C +2107 ; XID_Start # L& EULER CONSTANT +210A..2113 ; XID_Start # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; XID_Start # L& DOUBLE-STRUCK CAPITAL N +2118 ; XID_Start # Sm SCRIPT CAPITAL P +2119..211D ; XID_Start # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; XID_Start # L& DOUBLE-STRUCK CAPITAL Z +2126 ; XID_Start # L& OHM SIGN +2128 ; XID_Start # L& BLACK-LETTER CAPITAL Z +212A..212D ; XID_Start # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; XID_Start # So ESTIMATED SYMBOL +212F..2134 ; XID_Start # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; XID_Start # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; XID_Start # L& INFORMATION SOURCE +213C..213F ; XID_Start # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; XID_Start # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; XID_Start # L& TURNED SMALL F +2160..2182 ; XID_Start # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; XID_Start # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; XID_Start # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; XID_Start # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Start # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; XID_Start # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; XID_Start # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; XID_Start # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; XID_Start # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; XID_Start # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Start # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Start # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; XID_Start # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D80..2D96 ; XID_Start # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; XID_Start # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +3005 ; XID_Start # Lm IDEOGRAPHIC ITERATION MARK +3006 ; XID_Start # Lo IDEOGRAPHIC CLOSING MARK +3007 ; XID_Start # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; XID_Start # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3031..3035 ; XID_Start # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; XID_Start # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; XID_Start # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; XID_Start # Lo MASU MARK +3041..3096 ; XID_Start # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; XID_Start # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; XID_Start # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; XID_Start # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; XID_Start # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; XID_Start # Lo KATAKANA DIGRAPH KOTO +3105..312F ; XID_Start # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; XID_Start # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; XID_Start # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; XID_Start # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; XID_Start # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; XID_Start # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; XID_Start # Lm YI SYLLABLE WU +A016..A48C ; XID_Start # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; XID_Start # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; XID_Start # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; XID_Start # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; XID_Start # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; XID_Start # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A62A..A62B ; XID_Start # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; XID_Start # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; XID_Start # Lo CYRILLIC LETTER MULTIOCULAR O +A67F ; XID_Start # Lm CYRILLIC PAYEROK +A680..A69B ; XID_Start # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; XID_Start # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; XID_Start # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; XID_Start # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A717..A71F ; XID_Start # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; XID_Start # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; XID_Start # Lm MODIFIER LETTER US +A771..A787 ; XID_Start # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; XID_Start # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; XID_Start # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; XID_Start # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; XID_Start # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; XID_Start # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; XID_Start # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; XID_Start # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; XID_Start # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; XID_Start # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; XID_Start # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; XID_Start # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; XID_Start # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; XID_Start # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; XID_Start # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; XID_Start # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; XID_Start # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A840..A873 ; XID_Start # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A882..A8B3 ; XID_Start # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8F2..A8F7 ; XID_Start # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; XID_Start # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; XID_Start # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A90A..A925 ; XID_Start # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A930..A946 ; XID_Start # Lo [23] REJANG LETTER KA..REJANG LETTER A +A960..A97C ; XID_Start # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A984..A9B2 ; XID_Start # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9CF ; XID_Start # Lm JAVANESE PANGRANGKEP +A9E0..A9E4 ; XID_Start # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; XID_Start # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; XID_Start # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9FA..A9FE ; XID_Start # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; XID_Start # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA40..AA42 ; XID_Start # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; XID_Start # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA60..AA6F ; XID_Start # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; XID_Start # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; XID_Start # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; XID_Start # Lo MYANMAR LETTER AITON RA +AA7E..AAAF ; XID_Start # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; XID_Start # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; XID_Start # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; XID_Start # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; XID_Start # Lo TAI VIET TONE MAI NUENG +AAC2 ; XID_Start # Lo TAI VIET TONE MAI SONG +AADB..AADC ; XID_Start # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; XID_Start # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Start # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAF2 ; XID_Start # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Start # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AB01..AB06 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; XID_Start # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; XID_Start # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; XID_Start # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; XID_Start # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; XID_Start # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; XID_Start # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; XID_Start # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; XID_Start # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +AC00..D7A3 ; XID_Start # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; XID_Start # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; XID_Start # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; XID_Start # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; XID_Start # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; XID_Start # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; XID_Start # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; XID_Start # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; XID_Start # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; XID_Start # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; XID_Start # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; XID_Start # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; XID_Start # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; XID_Start # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; XID_Start # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FC5D ; XID_Start # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM +FC64..FD3D ; XID_Start # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; XID_Start # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; XID_Start # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDF9 ; XID_Start # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM +FE71 ; XID_Start # Lo ARABIC TATWEEL WITH FATHATAN ABOVE +FE73 ; XID_Start # Lo ARABIC TAIL FRAGMENT +FE77 ; XID_Start # Lo ARABIC FATHA MEDIAL FORM +FE79 ; XID_Start # Lo ARABIC DAMMA MEDIAL FORM +FE7B ; XID_Start # Lo ARABIC KASRA MEDIAL FORM +FE7D ; XID_Start # Lo ARABIC SHADDA MEDIAL FORM +FE7F..FEFC ; XID_Start # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF21..FF3A ; XID_Start # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; XID_Start # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; XID_Start # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; XID_Start # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; XID_Start # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FFA0..FFBE ; XID_Start # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; XID_Start # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; XID_Start # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; XID_Start # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; XID_Start # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; XID_Start # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; XID_Start # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; XID_Start # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; XID_Start # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; XID_Start # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; XID_Start # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10280..1029C ; XID_Start # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; XID_Start # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +10300..1031F ; XID_Start # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; XID_Start # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; XID_Start # Nl GOTHIC LETTER NINETY +10342..10349 ; XID_Start # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; XID_Start # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; XID_Start # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; XID_Start # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; XID_Start # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; XID_Start # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; XID_Start # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; XID_Start # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; XID_Start # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104B0..104D3 ; XID_Start # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; XID_Start # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; XID_Start # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; XID_Start # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; XID_Start # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; XID_Start # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; XID_Start # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; XID_Start # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; XID_Start # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; XID_Start # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; XID_Start # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; XID_Start # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; XID_Start # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; XID_Start # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; XID_Start # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; XID_Start # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; XID_Start # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; XID_Start # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; XID_Start # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; XID_Start # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; XID_Start # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; XID_Start # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; XID_Start # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; XID_Start # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; XID_Start # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; XID_Start # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; XID_Start # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; XID_Start # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; XID_Start # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; XID_Start # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Start # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Start # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; XID_Start # Lo KHAROSHTHI LETTER A +10A10..10A13 ; XID_Start # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; XID_Start # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; XID_Start # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A60..10A7C ; XID_Start # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; XID_Start # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; XID_Start # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; XID_Start # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10B00..10B35 ; XID_Start # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; XID_Start # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; XID_Start # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; XID_Start # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; XID_Start # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; XID_Start # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; XID_Start # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; XID_Start # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10E80..10EA9 ; XID_Start # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EB0..10EB1 ; XID_Start # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; XID_Start # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; XID_Start # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; XID_Start # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F70..10F81 ; XID_Start # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10FB0..10FC4 ; XID_Start # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; XID_Start # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11003..11037 ; XID_Start # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11071..11072 ; XID_Start # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; XID_Start # Lo BRAHMI LETTER OLD TAMIL LLA +11083..110AF ; XID_Start # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110D0..110E8 ; XID_Start # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +11103..11126 ; XID_Start # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11144 ; XID_Start # Lo CHAKMA LETTER LHAA +11147 ; XID_Start # Lo CHAKMA LETTER VAA +11150..11172 ; XID_Start # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11176 ; XID_Start # Lo MAHAJANI LIGATURE SHRI +11183..111B2 ; XID_Start # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111C1..111C4 ; XID_Start # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111DA ; XID_Start # Lo SHARADA EKAM +111DC ; XID_Start # Lo SHARADA HEADSTROKE +11200..11211 ; XID_Start # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; XID_Start # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +11280..11286 ; XID_Start # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; XID_Start # Lo MULTANI LETTER GHA +1128A..1128D ; XID_Start # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; XID_Start # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; XID_Start # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; XID_Start # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +11305..1130C ; XID_Start # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; XID_Start # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; XID_Start # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; XID_Start # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; XID_Start # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; XID_Start # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; XID_Start # Lo GRANTHA SIGN AVAGRAHA +11350 ; XID_Start # Lo GRANTHA OM +1135D..11361 ; XID_Start # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11400..11434 ; XID_Start # Lo [53] NEWA LETTER A..NEWA LETTER HA +11447..1144A ; XID_Start # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1145F..11461 ; XID_Start # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; XID_Start # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114C4..114C5 ; XID_Start # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; XID_Start # Lo TIRHUTA OM +11580..115AE ; XID_Start # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115D8..115DB ; XID_Start # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; XID_Start # Lo [48] MODI LETTER A..MODI LETTER LLA +11644 ; XID_Start # Lo MODI SIGN HUVA +11680..116AA ; XID_Start # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116B8 ; XID_Start # Lo TAKRI LETTER ARCHAIC KHA +11700..1171A ; XID_Start # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11740..11746 ; XID_Start # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; XID_Start # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +118A0..118DF ; XID_Start # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118FF..11906 ; XID_Start # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; XID_Start # Lo DIVES AKURU LETTER O +1190C..11913 ; XID_Start # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; XID_Start # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; XID_Start # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +1193F ; XID_Start # Lo DIVES AKURU PREFIXED NASAL SIGN +11941 ; XID_Start # Lo DIVES AKURU INITIAL RA +119A0..119A7 ; XID_Start # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; XID_Start # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119E1 ; XID_Start # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; XID_Start # Lo NANDINAGARI HEADSTROKE +11A00 ; XID_Start # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; XID_Start # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A3A ; XID_Start # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A50 ; XID_Start # Lo SOYOMBO LETTER A +11A5C..11A89 ; XID_Start # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A9D ; XID_Start # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; XID_Start # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; XID_Start # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; XID_Start # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C40 ; XID_Start # Lo BHAIKSUKI SIGN AVAGRAHA +11C72..11C8F ; XID_Start # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11D00..11D06 ; XID_Start # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; XID_Start # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; XID_Start # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; XID_Start # Lo MASARAM GONDI REPHA +11D60..11D65 ; XID_Start # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; XID_Start # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; XID_Start # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D98 ; XID_Start # Lo GUNJALA GONDI OM +11EE0..11EF2 ; XID_Start # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11FB0 ; XID_Start # Lo LISU LETTER YHA +12000..12399 ; XID_Start # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; XID_Start # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; XID_Start # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; XID_Start # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342E ; XID_Start # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; XID_Start # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; XID_Start # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; XID_Start # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A70..16ABE ; XID_Start # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AD0..16AED ; XID_Start # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16B00..16B2F ; XID_Start # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B40..16B43 ; XID_Start # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B63..16B77 ; XID_Start # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; XID_Start # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; XID_Start # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; XID_Start # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; XID_Start # Lo MIAO LETTER NASALIZATION +16F93..16F9F ; XID_Start # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; XID_Start # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; XID_Start # Lm OLD CHINESE ITERATION MARK +17000..187F7 ; XID_Start # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; XID_Start # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; XID_Start # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; XID_Start # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; XID_Start # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; XID_Start # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; XID_Start # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; XID_Start # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; XID_Start # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; XID_Start # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; XID_Start # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; XID_Start # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; XID_Start # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; XID_Start # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1D400..1D454 ; XID_Start # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; XID_Start # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; XID_Start # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; XID_Start # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; XID_Start # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; XID_Start # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; XID_Start # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; XID_Start # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; XID_Start # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; XID_Start # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; XID_Start # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; XID_Start # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; XID_Start # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; XID_Start # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; XID_Start # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; XID_Start # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; XID_Start # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; XID_Start # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; XID_Start # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; XID_Start # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; XID_Start # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; XID_Start # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; XID_Start # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; XID_Start # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; XID_Start # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; XID_Start # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; XID_Start # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; XID_Start # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1DF00..1DF09 ; XID_Start # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; XID_Start # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; XID_Start # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E100..1E12C ; XID_Start # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; XID_Start # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E14E ; XID_Start # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; XID_Start # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; XID_Start # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E7E0..1E7E6 ; XID_Start # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; XID_Start # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; XID_Start # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; XID_Start # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; XID_Start # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E900..1E943 ; XID_Start # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; XID_Start # Lm ADLAM NASALIZATION MARK +1EE00..1EE03 ; XID_Start # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Start # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Start # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Start # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Start # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Start # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Start # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Start # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Start # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Start # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Start # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Start # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Start # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Start # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Start # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Start # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Start # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Start # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Start # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +20000..2A6DF ; XID_Start # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; XID_Start # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; XID_Start # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; XID_Start # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; XID_Start # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; XID_Start # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; XID_Start # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 131974 + +# ================================================ + +# Derived Property: XID_Continue +# Mod_ID_Continue modified for closure under NFKx +# Modified as described in UAX #15 +# NOTE: Does NOT remove the non-NFKx characters. +# Merely ensures that if isIdentifer(string) then isIdentifier(NFKx(string)) +# NOTE: See UAX #31 for more information + +0030..0039 ; XID_Continue # Nd [10] DIGIT ZERO..DIGIT NINE +0041..005A ; XID_Continue # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005F ; XID_Continue # Pc LOW LINE +0061..007A ; XID_Continue # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; XID_Continue # Lo FEMININE ORDINAL INDICATOR +00B5 ; XID_Continue # L& MICRO SIGN +00B7 ; XID_Continue # Po MIDDLE DOT +00BA ; XID_Continue # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; XID_Continue # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; XID_Continue # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; XID_Continue # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; XID_Continue # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; XID_Continue # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; XID_Continue # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; XID_Continue # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; XID_Continue # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; XID_Continue # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; XID_Continue # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C6..02D1 ; XID_Continue # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02E0..02E4 ; XID_Continue # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02EC ; XID_Continue # Lm MODIFIER LETTER VOICING +02EE ; XID_Continue # Lm MODIFIER LETTER DOUBLE APOSTROPHE +0300..036F ; XID_Continue # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0370..0373 ; XID_Continue # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; XID_Continue # Lm GREEK NUMERAL SIGN +0376..0377 ; XID_Continue # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037B..037D ; XID_Continue # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; XID_Continue # L& GREEK CAPITAL LETTER YOT +0386 ; XID_Continue # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; XID_Continue # Po GREEK ANO TELEIA +0388..038A ; XID_Continue # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; XID_Continue # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; XID_Continue # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; XID_Continue # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F7..0481 ; XID_Continue # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0483..0487 ; XID_Continue # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +048A..052F ; XID_Continue # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; XID_Continue # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; XID_Continue # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0560..0588 ; XID_Continue # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0591..05BD ; XID_Continue # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; XID_Continue # Mn HEBREW POINT RAFE +05C1..05C2 ; XID_Continue # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; XID_Continue # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; XID_Continue # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; XID_Continue # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; XID_Continue # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +0610..061A ; XID_Continue # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +0620..063F ; XID_Continue # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; XID_Continue # Lm ARABIC TATWEEL +0641..064A ; XID_Continue # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +064B..065F ; XID_Continue # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0660..0669 ; XID_Continue # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066E..066F ; XID_Continue # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0670 ; XID_Continue # Mn ARABIC LETTER SUPERSCRIPT ALEF +0671..06D3 ; XID_Continue # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D5 ; XID_Continue # Lo ARABIC LETTER AE +06D6..06DC ; XID_Continue # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; XID_Continue # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; XID_Continue # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; XID_Continue # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; XID_Continue # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; XID_Continue # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; XID_Continue # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; XID_Continue # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FF ; XID_Continue # Lo ARABIC LETTER HEH WITH INVERTED V +0710 ; XID_Continue # Lo SYRIAC LETTER ALAPH +0711 ; XID_Continue # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; XID_Continue # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; XID_Continue # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..07A5 ; XID_Continue # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07A6..07B0 ; XID_Continue # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; XID_Continue # Lo THAANA LETTER NAA +07C0..07C9 ; XID_Continue # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; XID_Continue # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; XID_Continue # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; XID_Continue # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07FA ; XID_Continue # Lm NKO LAJANYALAN +07FD ; XID_Continue # Mn NKO DANTAYALAN +0800..0815 ; XID_Continue # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; XID_Continue # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; XID_Continue # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; XID_Continue # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; XID_Continue # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; XID_Continue # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; XID_Continue # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0840..0858 ; XID_Continue # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; XID_Continue # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0860..086A ; XID_Continue # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; XID_Continue # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0889..088E ; XID_Continue # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0898..089F ; XID_Continue # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; XID_Continue # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; XID_Continue # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; XID_Continue # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; XID_Continue # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +0903 ; XID_Continue # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; XID_Continue # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; XID_Continue # Mn DEVANAGARI VOWEL SIGN OE +093B ; XID_Continue # Mc DEVANAGARI VOWEL SIGN OOE +093C ; XID_Continue # Mn DEVANAGARI SIGN NUKTA +093D ; XID_Continue # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; XID_Continue # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; XID_Continue # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; XID_Continue # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; XID_Continue # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; XID_Continue # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; XID_Continue # Lo DEVANAGARI OM +0951..0957 ; XID_Continue # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; XID_Continue # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; XID_Continue # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; XID_Continue # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0971 ; XID_Continue # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; XID_Continue # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0981 ; XID_Continue # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; XID_Continue # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; XID_Continue # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; XID_Continue # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; XID_Continue # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; XID_Continue # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; XID_Continue # Lo BENGALI LETTER LA +09B6..09B9 ; XID_Continue # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; XID_Continue # Mn BENGALI SIGN NUKTA +09BD ; XID_Continue # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; XID_Continue # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; XID_Continue # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; XID_Continue # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; XID_Continue # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; XID_Continue # Mn BENGALI SIGN VIRAMA +09CE ; XID_Continue # Lo BENGALI LETTER KHANDA TA +09D7 ; XID_Continue # Mc BENGALI AU LENGTH MARK +09DC..09DD ; XID_Continue # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; XID_Continue # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; XID_Continue # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; XID_Continue # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; XID_Continue # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09FC ; XID_Continue # Lo BENGALI LETTER VEDIC ANUSVARA +09FE ; XID_Continue # Mn BENGALI SANDHI MARK +0A01..0A02 ; XID_Continue # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; XID_Continue # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; XID_Continue # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; XID_Continue # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; XID_Continue # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; XID_Continue # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; XID_Continue # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; XID_Continue # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; XID_Continue # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; XID_Continue # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; XID_Continue # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; XID_Continue # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; XID_Continue # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; XID_Continue # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; XID_Continue # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; XID_Continue # Lo GURMUKHI LETTER FA +0A66..0A6F ; XID_Continue # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; XID_Continue # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; XID_Continue # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; XID_Continue # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; XID_Continue # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; XID_Continue # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; XID_Continue # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; XID_Continue # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; XID_Continue # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; XID_Continue # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; XID_Continue # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; XID_Continue # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; XID_Continue # Mn GUJARATI SIGN NUKTA +0ABD ; XID_Continue # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; XID_Continue # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; XID_Continue # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; XID_Continue # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; XID_Continue # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; XID_Continue # Mn GUJARATI SIGN VIRAMA +0AD0 ; XID_Continue # Lo GUJARATI OM +0AE0..0AE1 ; XID_Continue # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; XID_Continue # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; XID_Continue # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF9 ; XID_Continue # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; XID_Continue # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; XID_Continue # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; XID_Continue # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; XID_Continue # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; XID_Continue # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; XID_Continue # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; XID_Continue # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; XID_Continue # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; XID_Continue # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; XID_Continue # Mn ORIYA SIGN NUKTA +0B3D ; XID_Continue # Lo ORIYA SIGN AVAGRAHA +0B3E ; XID_Continue # Mc ORIYA VOWEL SIGN AA +0B3F ; XID_Continue # Mn ORIYA VOWEL SIGN I +0B40 ; XID_Continue # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; XID_Continue # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; XID_Continue # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; XID_Continue # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; XID_Continue # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; XID_Continue # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; XID_Continue # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; XID_Continue # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; XID_Continue # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; XID_Continue # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; XID_Continue # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B71 ; XID_Continue # Lo ORIYA LETTER WA +0B82 ; XID_Continue # Mn TAMIL SIGN ANUSVARA +0B83 ; XID_Continue # Lo TAMIL SIGN VISARGA +0B85..0B8A ; XID_Continue # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; XID_Continue # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; XID_Continue # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; XID_Continue # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; XID_Continue # Lo TAMIL LETTER JA +0B9E..0B9F ; XID_Continue # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; XID_Continue # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; XID_Continue # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; XID_Continue # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; XID_Continue # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; XID_Continue # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; XID_Continue # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; XID_Continue # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; XID_Continue # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; XID_Continue # Mn TAMIL SIGN VIRAMA +0BD0 ; XID_Continue # Lo TAMIL OM +0BD7 ; XID_Continue # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; XID_Continue # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0C00 ; XID_Continue # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; XID_Continue # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; XID_Continue # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; XID_Continue # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; XID_Continue # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; XID_Continue # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; XID_Continue # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; XID_Continue # Mn TELUGU SIGN NUKTA +0C3D ; XID_Continue # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; XID_Continue # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; XID_Continue # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; XID_Continue # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; XID_Continue # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; XID_Continue # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; XID_Continue # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; XID_Continue # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; XID_Continue # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; XID_Continue # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C80 ; XID_Continue # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; XID_Continue # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; XID_Continue # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C85..0C8C ; XID_Continue # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; XID_Continue # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; XID_Continue # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; XID_Continue # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; XID_Continue # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; XID_Continue # Mn KANNADA SIGN NUKTA +0CBD ; XID_Continue # Lo KANNADA SIGN AVAGRAHA +0CBE ; XID_Continue # Mc KANNADA VOWEL SIGN AA +0CBF ; XID_Continue # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; XID_Continue # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; XID_Continue # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; XID_Continue # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; XID_Continue # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; XID_Continue # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; XID_Continue # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; XID_Continue # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; XID_Continue # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; XID_Continue # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; XID_Continue # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; XID_Continue # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D00..0D01 ; XID_Continue # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; XID_Continue # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; XID_Continue # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; XID_Continue # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; XID_Continue # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; XID_Continue # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; XID_Continue # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; XID_Continue # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; XID_Continue # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; XID_Continue # Mn MALAYALAM SIGN VIRAMA +0D4E ; XID_Continue # Lo MALAYALAM LETTER DOT REPH +0D54..0D56 ; XID_Continue # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; XID_Continue # Mc MALAYALAM AU LENGTH MARK +0D5F..0D61 ; XID_Continue # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; XID_Continue # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; XID_Continue # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D7A..0D7F ; XID_Continue # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D81 ; XID_Continue # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; XID_Continue # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; XID_Continue # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; XID_Continue # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; XID_Continue # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; XID_Continue # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; XID_Continue # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; XID_Continue # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; XID_Continue # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; XID_Continue # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; XID_Continue # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; XID_Continue # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; XID_Continue # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; XID_Continue # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E01..0E30 ; XID_Continue # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; XID_Continue # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; XID_Continue # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; XID_Continue # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; XID_Continue # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; XID_Continue # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; XID_Continue # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E50..0E59 ; XID_Continue # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E81..0E82 ; XID_Continue # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; XID_Continue # Lo LAO LETTER KHO TAM +0E86..0E8A ; XID_Continue # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; XID_Continue # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; XID_Continue # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; XID_Continue # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; XID_Continue # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; XID_Continue # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; XID_Continue # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; XID_Continue # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; XID_Continue # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; XID_Continue # Lm LAO KO LA +0EC8..0ECD ; XID_Continue # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0ED0..0ED9 ; XID_Continue # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; XID_Continue # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; XID_Continue # Lo TIBETAN SYLLABLE OM +0F18..0F19 ; XID_Continue # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F20..0F29 ; XID_Continue # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F35 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; XID_Continue # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; XID_Continue # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; XID_Continue # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; XID_Continue # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; XID_Continue # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; XID_Continue # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; XID_Continue # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; XID_Continue # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; XID_Continue # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; XID_Continue # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; XID_Continue # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; XID_Continue # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; XID_Continue # Mn TIBETAN SYMBOL PADMA GDAN +1000..102A ; XID_Continue # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; XID_Continue # Mc MYANMAR VOWEL SIGN E +1032..1037 ; XID_Continue # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; XID_Continue # Mc MYANMAR SIGN VISARGA +1039..103A ; XID_Continue # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; XID_Continue # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; XID_Continue # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; XID_Continue # Lo MYANMAR LETTER GREAT SA +1040..1049 ; XID_Continue # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +1050..1055 ; XID_Continue # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; XID_Continue # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; XID_Continue # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; XID_Continue # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; XID_Continue # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; XID_Continue # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; XID_Continue # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; XID_Continue # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; XID_Continue # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; XID_Continue # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; XID_Continue # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; XID_Continue # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; XID_Continue # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; XID_Continue # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; XID_Continue # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; XID_Continue # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; XID_Continue # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; XID_Continue # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; XID_Continue # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; XID_Continue # Mn MYANMAR VOWEL SIGN AITON AI +10A0..10C5 ; XID_Continue # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; XID_Continue # L& GEORGIAN CAPITAL LETTER YN +10CD ; XID_Continue # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; XID_Continue # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; XID_Continue # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; XID_Continue # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; XID_Continue # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; XID_Continue # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; XID_Continue # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; XID_Continue # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; XID_Continue # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; XID_Continue # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; XID_Continue # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; XID_Continue # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; XID_Continue # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1369..1371 ; XID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +1380..138F ; XID_Continue # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +13A0..13F5 ; XID_Continue # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; XID_Continue # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1401..166C ; XID_Continue # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166F..167F ; XID_Continue # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1681..169A ; XID_Continue # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +16A0..16EA ; XID_Continue # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; XID_Continue # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; XID_Continue # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; XID_Continue # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; XID_Continue # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; XID_Continue # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; XID_Continue # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1732..1733 ; XID_Continue # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; XID_Continue # Mc HANUNOO SIGN PAMUDPOD +1740..1751 ; XID_Continue # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; XID_Continue # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1760..176C ; XID_Continue # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; XID_Continue # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; XID_Continue # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +1780..17B3 ; XID_Continue # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; XID_Continue # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; XID_Continue # Mc KHMER VOWEL SIGN AA +17B7..17BD ; XID_Continue # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; XID_Continue # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; XID_Continue # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; XID_Continue # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; XID_Continue # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D7 ; XID_Continue # Lm KHMER SIGN LEK TOO +17DC ; XID_Continue # Lo KHMER SIGN AVAKRAHASANYA +17DD ; XID_Continue # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; XID_Continue # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +180B..180D ; XID_Continue # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; XID_Continue # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; XID_Continue # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; XID_Continue # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; XID_Continue # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; XID_Continue # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; XID_Continue # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; XID_Continue # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; XID_Continue # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; XID_Continue # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; XID_Continue # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; XID_Continue # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; XID_Continue # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; XID_Continue # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; XID_Continue # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; XID_Continue # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; XID_Continue # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; XID_Continue # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; XID_Continue # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; XID_Continue # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; XID_Continue # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1946..194F ; XID_Continue # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; XID_Continue # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; XID_Continue # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; XID_Continue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; XID_Continue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; XID_Continue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; XID_Continue # No NEW TAI LUE THAM DIGIT ONE +1A00..1A16 ; XID_Continue # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; XID_Continue # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; XID_Continue # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; XID_Continue # Mn BUGINESE VOWEL SIGN AE +1A20..1A54 ; XID_Continue # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; XID_Continue # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; XID_Continue # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; XID_Continue # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; XID_Continue # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; XID_Continue # Mn TAI THAM SIGN SAKOT +1A61 ; XID_Continue # Mc TAI THAM VOWEL SIGN A +1A62 ; XID_Continue # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; XID_Continue # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; XID_Continue # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; XID_Continue # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; XID_Continue # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; XID_Continue # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; XID_Continue # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; XID_Continue # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA7 ; XID_Continue # Lm TAI THAM SIGN MAI YAMOK +1AB0..1ABD ; XID_Continue # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABF..1ACE ; XID_Continue # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; XID_Continue # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; XID_Continue # Mc BALINESE SIGN BISAH +1B05..1B33 ; XID_Continue # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; XID_Continue # Mn BALINESE SIGN REREKAN +1B35 ; XID_Continue # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; XID_Continue # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; XID_Continue # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; XID_Continue # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; XID_Continue # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; XID_Continue # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; XID_Continue # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; XID_Continue # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; XID_Continue # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B6B..1B73 ; XID_Continue # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; XID_Continue # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; XID_Continue # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; XID_Continue # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; XID_Continue # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; XID_Continue # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; XID_Continue # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; XID_Continue # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; XID_Continue # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; XID_Continue # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; XID_Continue # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; XID_Continue # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; XID_Continue # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE6 ; XID_Continue # Mn BATAK SIGN TOMPI +1BE7 ; XID_Continue # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; XID_Continue # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; XID_Continue # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; XID_Continue # Mn BATAK VOWEL SIGN KARO O +1BEE ; XID_Continue # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; XID_Continue # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; XID_Continue # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1C00..1C23 ; XID_Continue # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; XID_Continue # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; XID_Continue # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; XID_Continue # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; XID_Continue # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C40..1C49 ; XID_Continue # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; XID_Continue # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; XID_Continue # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; XID_Continue # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; XID_Continue # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C80..1C88 ; XID_Continue # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; XID_Continue # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; XID_Continue # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CD0..1CD2 ; XID_Continue # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; XID_Continue # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; XID_Continue # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; XID_Continue # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CE9..1CEC ; XID_Continue # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CED ; XID_Continue # Mn VEDIC SIGN TIRYAK +1CEE..1CF3 ; XID_Continue # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF4 ; XID_Continue # Mn VEDIC TONE CANDRA ABOVE +1CF5..1CF6 ; XID_Continue # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; XID_Continue # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; XID_Continue # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1CFA ; XID_Continue # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; XID_Continue # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; XID_Continue # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; XID_Continue # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; XID_Continue # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; XID_Continue # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; XID_Continue # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1DC0..1DFF ; XID_Continue # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1E00..1F15 ; XID_Continue # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; XID_Continue # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; XID_Continue # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; XID_Continue # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; XID_Continue # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; XID_Continue # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; XID_Continue # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; XID_Continue # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; XID_Continue # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBE ; XID_Continue # L& GREEK PROSGEGRAMMENI +1FC2..1FC4 ; XID_Continue # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; XID_Continue # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FD0..1FD3 ; XID_Continue # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; XID_Continue # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FE0..1FEC ; XID_Continue # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FF2..1FF4 ; XID_Continue # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; XID_Continue # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +203F..2040 ; XID_Continue # Pc [2] UNDERTIE..CHARACTER TIE +2054 ; XID_Continue # Pc INVERTED UNDERTIE +2071 ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; XID_Continue # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; XID_Continue # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20D0..20DC ; XID_Continue # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; XID_Continue # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20F0 ; XID_Continue # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2102 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL C +2107 ; XID_Continue # L& EULER CONSTANT +210A..2113 ; XID_Continue # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL N +2118 ; XID_Continue # Sm SCRIPT CAPITAL P +2119..211D ; XID_Continue # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; XID_Continue # L& DOUBLE-STRUCK CAPITAL Z +2126 ; XID_Continue # L& OHM SIGN +2128 ; XID_Continue # L& BLACK-LETTER CAPITAL Z +212A..212D ; XID_Continue # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; XID_Continue # So ESTIMATED SYMBOL +212F..2134 ; XID_Continue # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; XID_Continue # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; XID_Continue # L& INFORMATION SOURCE +213C..213F ; XID_Continue # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; XID_Continue # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214E ; XID_Continue # L& TURNED SMALL F +2160..2182 ; XID_Continue # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; XID_Continue # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; XID_Continue # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C00..2C7B ; XID_Continue # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; XID_Continue # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; XID_Continue # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CEB..2CEE ; XID_Continue # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; XID_Continue # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; XID_Continue # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2D00..2D25 ; XID_Continue # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; XID_Continue # L& GEORGIAN SMALL LETTER YN +2D2D ; XID_Continue # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; XID_Continue # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; XID_Continue # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D7F ; XID_Continue # Mn TIFINAGH CONSONANT JOINER +2D80..2D96 ; XID_Continue # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2DE0..2DFF ; XID_Continue # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +3005 ; XID_Continue # Lm IDEOGRAPHIC ITERATION MARK +3006 ; XID_Continue # Lo IDEOGRAPHIC CLOSING MARK +3007 ; XID_Continue # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; XID_Continue # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +302A..302D ; XID_Continue # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; XID_Continue # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3031..3035 ; XID_Continue # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3038..303A ; XID_Continue # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; XID_Continue # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; XID_Continue # Lo MASU MARK +3041..3096 ; XID_Continue # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +3099..309A ; XID_Continue # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; XID_Continue # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; XID_Continue # Lo HIRAGANA DIGRAPH YORI +30A1..30FA ; XID_Continue # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FC..30FE ; XID_Continue # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; XID_Continue # Lo KATAKANA DIGRAPH KOTO +3105..312F ; XID_Continue # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; XID_Continue # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +31A0..31BF ; XID_Continue # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31F0..31FF ; XID_Continue # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3400..4DBF ; XID_Continue # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..A014 ; XID_Continue # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; XID_Continue # Lm YI SYLLABLE WU +A016..A48C ; XID_Continue # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A4D0..A4F7 ; XID_Continue # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; XID_Continue # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A500..A60B ; XID_Continue # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; XID_Continue # Lm VAI SYLLABLE LENGTHENER +A610..A61F ; XID_Continue # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; XID_Continue # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; XID_Continue # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; XID_Continue # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; XID_Continue # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; XID_Continue # Mn COMBINING CYRILLIC VZMET +A674..A67D ; XID_Continue # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67F ; XID_Continue # Lm CYRILLIC PAYEROK +A680..A69B ; XID_Continue # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; XID_Continue # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; XID_Continue # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6A0..A6E5 ; XID_Continue # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; XID_Continue # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; XID_Continue # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A717..A71F ; XID_Continue # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A722..A76F ; XID_Continue # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; XID_Continue # Lm MODIFIER LETTER US +A771..A787 ; XID_Continue # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; XID_Continue # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A78B..A78E ; XID_Continue # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; XID_Continue # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; XID_Continue # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; XID_Continue # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; XID_Continue # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; XID_Continue # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; XID_Continue # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; XID_Continue # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; XID_Continue # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; XID_Continue # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; XID_Continue # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; XID_Continue # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A802 ; XID_Continue # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; XID_Continue # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; XID_Continue # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; XID_Continue # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; XID_Continue # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; XID_Continue # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; XID_Continue # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; XID_Continue # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; XID_Continue # Mc SYLOTI NAGRI VOWEL SIGN OO +A82C ; XID_Continue # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A840..A873 ; XID_Continue # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A880..A881 ; XID_Continue # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; XID_Continue # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; XID_Continue # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; XID_Continue # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8D0..A8D9 ; XID_Continue # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8E0..A8F1 ; XID_Continue # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; XID_Continue # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8FB ; XID_Continue # Lo DEVANAGARI HEADSTROKE +A8FD..A8FE ; XID_Continue # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; XID_Continue # Mn DEVANAGARI VOWEL SIGN AY +A900..A909 ; XID_Continue # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; XID_Continue # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; XID_Continue # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A930..A946 ; XID_Continue # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; XID_Continue # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; XID_Continue # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A960..A97C ; XID_Continue # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A980..A982 ; XID_Continue # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; XID_Continue # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; XID_Continue # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; XID_Continue # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; XID_Continue # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; XID_Continue # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; XID_Continue # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; XID_Continue # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9CF ; XID_Continue # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; XID_Continue # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9E0..A9E4 ; XID_Continue # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; XID_Continue # Mn MYANMAR SIGN SHAN SAW +A9E6 ; XID_Continue # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; XID_Continue # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; XID_Continue # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; XID_Continue # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; XID_Continue # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; XID_Continue # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; XID_Continue # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; XID_Continue # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; XID_Continue # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; XID_Continue # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; XID_Continue # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; XID_Continue # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; XID_Continue # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; XID_Continue # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; XID_Continue # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA60..AA6F ; XID_Continue # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; XID_Continue # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; XID_Continue # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA7A ; XID_Continue # Lo MYANMAR LETTER AITON RA +AA7B ; XID_Continue # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; XID_Continue # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; XID_Continue # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; XID_Continue # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB0 ; XID_Continue # Mn TAI VIET MAI KANG +AAB1 ; XID_Continue # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; XID_Continue # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; XID_Continue # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; XID_Continue # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; XID_Continue # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; XID_Continue # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; XID_Continue # Lo TAI VIET TONE MAI NUENG +AAC1 ; XID_Continue # Mn TAI VIET TONE MAI THO +AAC2 ; XID_Continue # Lo TAI VIET TONE MAI SONG +AADB..AADC ; XID_Continue # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; XID_Continue # Lm TAI VIET SYMBOL SAM +AAE0..AAEA ; XID_Continue # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; XID_Continue # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF2 ; XID_Continue # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; XID_Continue # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; XID_Continue # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; XID_Continue # Mn MEETEI MAYEK VIRAMA +AB01..AB06 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; XID_Continue # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; XID_Continue # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; XID_Continue # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; XID_Continue # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; XID_Continue # Lm MODIFIER LETTER SMALL TURNED W +AB70..ABBF ; XID_Continue # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; XID_Continue # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; XID_Continue # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; XID_Continue # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEC ; XID_Continue # Mc MEETEI MAYEK LUM IYEK +ABED ; XID_Continue # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; XID_Continue # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; XID_Continue # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; XID_Continue # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; XID_Continue # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; XID_Continue # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; XID_Continue # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; XID_Continue # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; XID_Continue # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; XID_Continue # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; XID_Continue # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; XID_Continue # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB2A..FB36 ; XID_Continue # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; XID_Continue # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; XID_Continue # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; XID_Continue # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; XID_Continue # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; XID_Continue # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBD3..FC5D ; XID_Continue # Lo [139] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF MAKSURA WITH SUPERSCRIPT ALEF ISOLATED FORM +FC64..FD3D ; XID_Continue # Lo [218] ARABIC LIGATURE YEH WITH HAMZA ABOVE WITH REH FINAL FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD50..FD8F ; XID_Continue # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; XID_Continue # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDF0..FDF9 ; XID_Continue # Lo [10] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE SALLA ISOLATED FORM +FE00..FE0F ; XID_Continue # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; XID_Continue # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FE33..FE34 ; XID_Continue # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE4D..FE4F ; XID_Continue # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE71 ; XID_Continue # Lo ARABIC TATWEEL WITH FATHATAN ABOVE +FE73 ; XID_Continue # Lo ARABIC TAIL FRAGMENT +FE77 ; XID_Continue # Lo ARABIC FATHA MEDIAL FORM +FE79 ; XID_Continue # Lo ARABIC DAMMA MEDIAL FORM +FE7B ; XID_Continue # Lo ARABIC KASRA MEDIAL FORM +FE7D ; XID_Continue # Lo ARABIC SHADDA MEDIAL FORM +FE7F..FEFC ; XID_Continue # Lo [126] ARABIC SUKUN MEDIAL FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF10..FF19 ; XID_Continue # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF3A ; XID_Continue # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3F ; XID_Continue # Pc FULLWIDTH LOW LINE +FF41..FF5A ; XID_Continue # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF66..FF6F ; XID_Continue # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; XID_Continue # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; XID_Continue # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FF9E..FF9F ; XID_Continue # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFA0..FFBE ; XID_Continue # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; XID_Continue # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +10000..1000B ; XID_Continue # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; XID_Continue # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; XID_Continue # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; XID_Continue # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; XID_Continue # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; XID_Continue # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; XID_Continue # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10140..10174 ; XID_Continue # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +101FD ; XID_Continue # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +10280..1029C ; XID_Continue # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; XID_Continue # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E0 ; XID_Continue # Mn COPTIC EPACT THOUSANDS MARK +10300..1031F ; XID_Continue # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +1032D..10340 ; XID_Continue # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; XID_Continue # Nl GOTHIC LETTER NINETY +10342..10349 ; XID_Continue # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; XID_Continue # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; XID_Continue # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; XID_Continue # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10380..1039D ; XID_Continue # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +103A0..103C3 ; XID_Continue # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; XID_Continue # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D1..103D5 ; XID_Continue # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; XID_Continue # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; XID_Continue # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; XID_Continue # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; XID_Continue # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; XID_Continue # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; XID_Continue # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; XID_Continue # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +10570..1057A ; XID_Continue # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; XID_Continue # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; XID_Continue # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; XID_Continue # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; XID_Continue # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; XID_Continue # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; XID_Continue # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; XID_Continue # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; XID_Continue # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; XID_Continue # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; XID_Continue # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; XID_Continue # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; XID_Continue # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; XID_Continue # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; XID_Continue # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; XID_Continue # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; XID_Continue # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; XID_Continue # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; XID_Continue # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; XID_Continue # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10860..10876 ; XID_Continue # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10880..1089E ; XID_Continue # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108E0..108F2 ; XID_Continue # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; XID_Continue # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +10900..10915 ; XID_Continue # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10920..10939 ; XID_Continue # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +10980..109B7 ; XID_Continue # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BE..109BF ; XID_Continue # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +10A00 ; XID_Continue # Lo KHAROSHTHI LETTER A +10A01..10A03 ; XID_Continue # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; XID_Continue # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; XID_Continue # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; XID_Continue # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; XID_Continue # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; XID_Continue # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; XID_Continue # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; XID_Continue # Mn KHAROSHTHI VIRAMA +10A60..10A7C ; XID_Continue # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A80..10A9C ; XID_Continue # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10AC0..10AC7 ; XID_Continue # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC9..10AE4 ; XID_Continue # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; XID_Continue # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10B00..10B35 ; XID_Continue # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B40..10B55 ; XID_Continue # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B60..10B72 ; XID_Continue # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B80..10B91 ; XID_Continue # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10C00..10C48 ; XID_Continue # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; XID_Continue # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; XID_Continue # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10D00..10D23 ; XID_Continue # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; XID_Continue # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; XID_Continue # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E80..10EA9 ; XID_Continue # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; XID_Continue # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EB0..10EB1 ; XID_Continue # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; XID_Continue # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F27 ; XID_Continue # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; XID_Continue # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; XID_Continue # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F70..10F81 ; XID_Continue # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; XID_Continue # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10FB0..10FC4 ; XID_Continue # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FE0..10FF6 ; XID_Continue # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; XID_Continue # Mc BRAHMI SIGN CANDRABINDU +11001 ; XID_Continue # Mn BRAHMI SIGN ANUSVARA +11002 ; XID_Continue # Mc BRAHMI SIGN VISARGA +11003..11037 ; XID_Continue # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; XID_Continue # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11066..1106F ; XID_Continue # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; XID_Continue # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; XID_Continue # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; XID_Continue # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; XID_Continue # Lo BRAHMI LETTER OLD TAMIL LLA +1107F..11081 ; XID_Continue # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +11082 ; XID_Continue # Mc KAITHI SIGN VISARGA +11083..110AF ; XID_Continue # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; XID_Continue # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; XID_Continue # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; XID_Continue # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; XID_Continue # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; XID_Continue # Mn KAITHI VOWEL SIGN VOCALIC R +110D0..110E8 ; XID_Continue # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; XID_Continue # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11100..11102 ; XID_Continue # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; XID_Continue # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; XID_Continue # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; XID_Continue # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; XID_Continue # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; XID_Continue # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11144 ; XID_Continue # Lo CHAKMA LETTER LHAA +11145..11146 ; XID_Continue # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; XID_Continue # Lo CHAKMA LETTER VAA +11150..11172 ; XID_Continue # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; XID_Continue # Mn MAHAJANI SIGN NUKTA +11176 ; XID_Continue # Lo MAHAJANI LIGATURE SHRI +11180..11181 ; XID_Continue # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; XID_Continue # Mc SHARADA SIGN VISARGA +11183..111B2 ; XID_Continue # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; XID_Continue # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; XID_Continue # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; XID_Continue # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; XID_Continue # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C9..111CC ; XID_Continue # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CE ; XID_Continue # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; XID_Continue # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; XID_Continue # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; XID_Continue # Lo SHARADA EKAM +111DC ; XID_Continue # Lo SHARADA HEADSTROKE +11200..11211 ; XID_Continue # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; XID_Continue # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; XID_Continue # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; XID_Continue # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; XID_Continue # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; XID_Continue # Mn KHOJKI SIGN ANUSVARA +11235 ; XID_Continue # Mc KHOJKI SIGN VIRAMA +11236..11237 ; XID_Continue # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; XID_Continue # Mn KHOJKI SIGN SUKUN +11280..11286 ; XID_Continue # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; XID_Continue # Lo MULTANI LETTER GHA +1128A..1128D ; XID_Continue # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; XID_Continue # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; XID_Continue # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112B0..112DE ; XID_Continue # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; XID_Continue # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; XID_Continue # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; XID_Continue # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; XID_Continue # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11300..11301 ; XID_Continue # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; XID_Continue # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; XID_Continue # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; XID_Continue # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; XID_Continue # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; XID_Continue # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; XID_Continue # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; XID_Continue # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133B..1133C ; XID_Continue # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133D ; XID_Continue # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; XID_Continue # Mn GRANTHA VOWEL SIGN II +11341..11344 ; XID_Continue # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; XID_Continue # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; XID_Continue # Lo GRANTHA OM +11357 ; XID_Continue # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; XID_Continue # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; XID_Continue # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; XID_Continue # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; XID_Continue # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11400..11434 ; XID_Continue # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; XID_Continue # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; XID_Continue # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; XID_Continue # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; XID_Continue # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; XID_Continue # Mc NEWA SIGN VISARGA +11446 ; XID_Continue # Mn NEWA SIGN NUKTA +11447..1144A ; XID_Continue # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +11450..11459 ; XID_Continue # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145E ; XID_Continue # Mn NEWA SANDHI MARK +1145F..11461 ; XID_Continue # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; XID_Continue # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; XID_Continue # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; XID_Continue # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; XID_Continue # Mc TIRHUTA VOWEL SIGN E +114BA ; XID_Continue # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; XID_Continue # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; XID_Continue # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; XID_Continue # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; XID_Continue # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; XID_Continue # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C7 ; XID_Continue # Lo TIRHUTA OM +114D0..114D9 ; XID_Continue # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; XID_Continue # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; XID_Continue # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; XID_Continue # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; XID_Continue # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; XID_Continue # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; XID_Continue # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; XID_Continue # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115D8..115DB ; XID_Continue # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; XID_Continue # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11600..1162F ; XID_Continue # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; XID_Continue # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; XID_Continue # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; XID_Continue # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; XID_Continue # Mn MODI SIGN ANUSVARA +1163E ; XID_Continue # Mc MODI SIGN VISARGA +1163F..11640 ; XID_Continue # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11644 ; XID_Continue # Lo MODI SIGN HUVA +11650..11659 ; XID_Continue # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11680..116AA ; XID_Continue # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; XID_Continue # Mn TAKRI SIGN ANUSVARA +116AC ; XID_Continue # Mc TAKRI SIGN VISARGA +116AD ; XID_Continue # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; XID_Continue # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; XID_Continue # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; XID_Continue # Mc TAKRI SIGN VIRAMA +116B7 ; XID_Continue # Mn TAKRI SIGN NUKTA +116B8 ; XID_Continue # Lo TAKRI LETTER ARCHAIC KHA +116C0..116C9 ; XID_Continue # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; XID_Continue # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; XID_Continue # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; XID_Continue # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; XID_Continue # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; XID_Continue # Mc AHOM VOWEL SIGN E +11727..1172B ; XID_Continue # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; XID_Continue # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +11740..11746 ; XID_Continue # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; XID_Continue # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; XID_Continue # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; XID_Continue # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; XID_Continue # Mc DOGRA SIGN VISARGA +11839..1183A ; XID_Continue # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +118A0..118DF ; XID_Continue # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; XID_Continue # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118FF..11906 ; XID_Continue # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; XID_Continue # Lo DIVES AKURU LETTER O +1190C..11913 ; XID_Continue # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; XID_Continue # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; XID_Continue # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; XID_Continue # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; XID_Continue # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; XID_Continue # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; XID_Continue # Mc DIVES AKURU SIGN HALANTA +1193E ; XID_Continue # Mn DIVES AKURU VIRAMA +1193F ; XID_Continue # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; XID_Continue # Mc DIVES AKURU MEDIAL YA +11941 ; XID_Continue # Lo DIVES AKURU INITIAL RA +11942 ; XID_Continue # Mc DIVES AKURU MEDIAL RA +11943 ; XID_Continue # Mn DIVES AKURU SIGN NUKTA +11950..11959 ; XID_Continue # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; XID_Continue # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; XID_Continue # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; XID_Continue # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; XID_Continue # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; XID_Continue # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; XID_Continue # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; XID_Continue # Mn NANDINAGARI SIGN VIRAMA +119E1 ; XID_Continue # Lo NANDINAGARI SIGN AVAGRAHA +119E3 ; XID_Continue # Lo NANDINAGARI HEADSTROKE +119E4 ; XID_Continue # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; XID_Continue # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; XID_Continue # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; XID_Continue # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; XID_Continue # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; XID_Continue # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; XID_Continue # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; XID_Continue # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; XID_Continue # Mn ZANABAZAR SQUARE SUBJOINER +11A50 ; XID_Continue # Lo SOYOMBO LETTER A +11A51..11A56 ; XID_Continue # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; XID_Continue # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; XID_Continue # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; XID_Continue # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; XID_Continue # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; XID_Continue # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; XID_Continue # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9D ; XID_Continue # Lo SOYOMBO MARK PLUTA +11AB0..11AF8 ; XID_Continue # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; XID_Continue # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; XID_Continue # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; XID_Continue # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; XID_Continue # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; XID_Continue # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; XID_Continue # Mc BHAIKSUKI SIGN VISARGA +11C3F ; XID_Continue # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; XID_Continue # Lo BHAIKSUKI SIGN AVAGRAHA +11C50..11C59 ; XID_Continue # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C72..11C8F ; XID_Continue # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; XID_Continue # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; XID_Continue # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; XID_Continue # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; XID_Continue # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; XID_Continue # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; XID_Continue # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; XID_Continue # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D00..11D06 ; XID_Continue # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; XID_Continue # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; XID_Continue # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; XID_Continue # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; XID_Continue # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; XID_Continue # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; XID_Continue # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; XID_Continue # Lo MASARAM GONDI REPHA +11D47 ; XID_Continue # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; XID_Continue # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; XID_Continue # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; XID_Continue # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; XID_Continue # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; XID_Continue # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; XID_Continue # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; XID_Continue # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; XID_Continue # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; XID_Continue # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; XID_Continue # Mn GUNJALA GONDI VIRAMA +11D98 ; XID_Continue # Lo GUNJALA GONDI OM +11DA0..11DA9 ; XID_Continue # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; XID_Continue # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; XID_Continue # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; XID_Continue # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11FB0 ; XID_Continue # Lo LISU LETTER YHA +12000..12399 ; XID_Continue # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; XID_Continue # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12480..12543 ; XID_Continue # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; XID_Continue # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +13000..1342E ; XID_Continue # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; XID_Continue # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; XID_Continue # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; XID_Continue # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; XID_Continue # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A70..16ABE ; XID_Continue # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; XID_Continue # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; XID_Continue # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; XID_Continue # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B00..16B2F ; XID_Continue # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; XID_Continue # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B40..16B43 ; XID_Continue # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B50..16B59 ; XID_Continue # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B63..16B77 ; XID_Continue # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; XID_Continue # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; XID_Continue # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16F00..16F4A ; XID_Continue # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; XID_Continue # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; XID_Continue # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; XID_Continue # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; XID_Continue # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; XID_Continue # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; XID_Continue # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; XID_Continue # Lm OLD CHINESE ITERATION MARK +16FE4 ; XID_Continue # Mn KHITAN SMALL SCRIPT FILLER +16FF0..16FF1 ; XID_Continue # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; XID_Continue # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; XID_Continue # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; XID_Continue # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; XID_Continue # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; XID_Continue # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; XID_Continue # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; XID_Continue # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; XID_Continue # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; XID_Continue # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; XID_Continue # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; XID_Continue # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; XID_Continue # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; XID_Continue # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; XID_Continue # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9D..1BC9E ; XID_Continue # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; XID_Continue # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; XID_Continue # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165..1D166 ; XID_Continue # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D167..1D169 ; XID_Continue # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; XID_Continue # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; XID_Continue # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; XID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; XID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; XID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D400..1D454 ; XID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; XID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; XID_Continue # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; XID_Continue # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; XID_Continue # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; XID_Continue # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; XID_Continue # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; XID_Continue # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; XID_Continue # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; XID_Continue # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; XID_Continue # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; XID_Continue # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; XID_Continue # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; XID_Continue # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; XID_Continue # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; XID_Continue # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; XID_Continue # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; XID_Continue # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; XID_Continue # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; XID_Continue # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; XID_Continue # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; XID_Continue # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; XID_Continue # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; XID_Continue # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; XID_Continue # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; XID_Continue # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; XID_Continue # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; XID_Continue # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1DA00..1DA36 ; XID_Continue # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; XID_Continue # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; XID_Continue # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; XID_Continue # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; XID_Continue # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; XID_Continue # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1DF00..1DF09 ; XID_Continue # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; XID_Continue # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; XID_Continue # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E000..1E006 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; XID_Continue # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; XID_Continue # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; XID_Continue # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; XID_Continue # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E100..1E12C ; XID_Continue # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; XID_Continue # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; XID_Continue # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; XID_Continue # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; XID_Continue # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E290..1E2AD ; XID_Continue # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; XID_Continue # Mn TOTO SIGN RISING TONE +1E2C0..1E2EB ; XID_Continue # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; XID_Continue # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; XID_Continue # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E7E0..1E7E6 ; XID_Continue # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; XID_Continue # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; XID_Continue # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; XID_Continue # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; XID_Continue # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8D0..1E8D6 ; XID_Continue # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E900..1E943 ; XID_Continue # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; XID_Continue # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; XID_Continue # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; XID_Continue # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1EE00..1EE03 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; XID_Continue # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; XID_Continue # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; XID_Continue # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; XID_Continue # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; XID_Continue # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; XID_Continue # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; XID_Continue # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; XID_Continue # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; XID_Continue # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; XID_Continue # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; XID_Continue # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; XID_Continue # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; XID_Continue # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1FBF0..1FBF9 ; XID_Continue # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; XID_Continue # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; XID_Continue # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; XID_Continue # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; XID_Continue # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; XID_Continue # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; XID_Continue # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; XID_Continue # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A +E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 135053 + +# ================================================ + +# Derived Property: Default_Ignorable_Code_Point +# Generated from +# Other_Default_Ignorable_Code_Point +# + Cf (Format characters) +# + Variation_Selector +# - White_Space +# - FFF9..FFFB (Interlinear annotation format characters) +# - 13430..13438 (Egyptian hieroglyph format characters) +# - Prepended_Concatenation_Mark (Exceptional format characters that should be visible) + +00AD ; Default_Ignorable_Code_Point # Cf SOFT HYPHEN +034F ; Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +061C ; Default_Ignorable_Code_Point # Cf ARABIC LETTER MARK +115F..1160 ; Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +180B..180D ; Default_Ignorable_Code_Point # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Default_Ignorable_Code_Point # Cf MONGOLIAN VOWEL SEPARATOR +180F ; Default_Ignorable_Code_Point # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +200B..200F ; Default_Ignorable_Code_Point # Cf [5] ZERO WIDTH SPACE..RIGHT-TO-LEFT MARK +202A..202E ; Default_Ignorable_Code_Point # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2060..2064 ; Default_Ignorable_Code_Point # Cf [5] WORD JOINER..INVISIBLE PLUS +2065 ; Default_Ignorable_Code_Point # Cn +2066..206F ; Default_Ignorable_Code_Point # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +3164 ; Default_Ignorable_Code_Point # Lo HANGUL FILLER +FE00..FE0F ; Default_Ignorable_Code_Point # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FEFF ; Default_Ignorable_Code_Point # Cf ZERO WIDTH NO-BREAK SPACE +FFA0 ; Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Default_Ignorable_Code_Point # Cn [9] .. +1BCA0..1BCA3 ; Default_Ignorable_Code_Point # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1D173..1D17A ; Default_Ignorable_Code_Point # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +E0000 ; Default_Ignorable_Code_Point # Cn +E0001 ; Default_Ignorable_Code_Point # Cf LANGUAGE TAG +E0002..E001F ; Default_Ignorable_Code_Point # Cn [30] .. +E0020..E007F ; Default_Ignorable_Code_Point # Cf [96] TAG SPACE..CANCEL TAG +E0080..E00FF ; Default_Ignorable_Code_Point # Cn [128] .. +E0100..E01EF ; Default_Ignorable_Code_Point # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 +E01F0..E0FFF ; Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 4174 + +# ================================================ + +# Derived Property: Grapheme_Extend +# Generated from: Me + Mn + Other_Grapheme_Extend +# Note: depending on an application's interpretation of Co (private use), +# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither. + +0300..036F ; Grapheme_Extend # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0483..0487 ; Grapheme_Extend # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0488..0489 ; Grapheme_Extend # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +0591..05BD ; Grapheme_Extend # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BF ; Grapheme_Extend # Mn HEBREW POINT RAFE +05C1..05C2 ; Grapheme_Extend # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Grapheme_Extend # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Grapheme_Extend # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Grapheme_Extend # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..065F ; Grapheme_Extend # Mn [21] ARABIC FATHATAN..ARABIC WAVY HAMZA BELOW +0670 ; Grapheme_Extend # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Grapheme_Extend # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DF..06E4 ; Grapheme_Extend # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Grapheme_Extend # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06EA..06ED ; Grapheme_Extend # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +0711 ; Grapheme_Extend # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..074A ; Grapheme_Extend # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Grapheme_Extend # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Grapheme_Extend # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07FD ; Grapheme_Extend # Mn NKO DANTAYALAN +0816..0819 ; Grapheme_Extend # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081B..0823 ; Grapheme_Extend # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Grapheme_Extend # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082D ; Grapheme_Extend # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0859..085B ; Grapheme_Extend # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +0898..089F ; Grapheme_Extend # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08CA..08E1 ; Grapheme_Extend # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..0902 ; Grapheme_Extend # Mn [32] ARABIC TURNED DAMMA BELOW..DEVANAGARI SIGN ANUSVARA +093A ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN OE +093C ; Grapheme_Extend # Mn DEVANAGARI SIGN NUKTA +0941..0948 ; Grapheme_Extend # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +094D ; Grapheme_Extend # Mn DEVANAGARI SIGN VIRAMA +0951..0957 ; Grapheme_Extend # Mn [7] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Grapheme_Extend # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Grapheme_Extend # Mn BENGALI SIGN CANDRABINDU +09BC ; Grapheme_Extend # Mn BENGALI SIGN NUKTA +09BE ; Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09C1..09C4 ; Grapheme_Extend # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09CD ; Grapheme_Extend # Mn BENGALI SIGN VIRAMA +09D7 ; Grapheme_Extend # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Grapheme_Extend # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09FE ; Grapheme_Extend # Mn BENGALI SANDHI MARK +0A01..0A02 ; Grapheme_Extend # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A3C ; Grapheme_Extend # Mn GURMUKHI SIGN NUKTA +0A41..0A42 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Grapheme_Extend # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Grapheme_Extend # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Grapheme_Extend # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Grapheme_Extend # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Grapheme_Extend # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Grapheme_Extend # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0ABC ; Grapheme_Extend # Mn GUJARATI SIGN NUKTA +0AC1..0AC5 ; Grapheme_Extend # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0ACD ; Grapheme_Extend # Mn GUJARATI SIGN VIRAMA +0AE2..0AE3 ; Grapheme_Extend # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFF ; Grapheme_Extend # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B01 ; Grapheme_Extend # Mn ORIYA SIGN CANDRABINDU +0B3C ; Grapheme_Extend # Mn ORIYA SIGN NUKTA +0B3E ; Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B3F ; Grapheme_Extend # Mn ORIYA VOWEL SIGN I +0B41..0B44 ; Grapheme_Extend # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B4D ; Grapheme_Extend # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Grapheme_Extend # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Grapheme_Extend # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Grapheme_Extend # Mn TAMIL SIGN ANUSVARA +0BBE ; Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BC0 ; Grapheme_Extend # Mn TAMIL VOWEL SIGN II +0BCD ; Grapheme_Extend # Mn TAMIL SIGN VIRAMA +0BD7 ; Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0C00 ; Grapheme_Extend # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C04 ; Grapheme_Extend # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C3C ; Grapheme_Extend # Mn TELUGU SIGN NUKTA +0C3E..0C40 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C46..0C48 ; Grapheme_Extend # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Grapheme_Extend # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Grapheme_Extend # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Grapheme_Extend # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Grapheme_Extend # Mn KANNADA SIGN CANDRABINDU +0CBC ; Grapheme_Extend # Mn KANNADA SIGN NUKTA +0CBF ; Grapheme_Extend # Mn KANNADA VOWEL SIGN I +0CC2 ; Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CC6 ; Grapheme_Extend # Mn KANNADA VOWEL SIGN E +0CCC..0CCD ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Grapheme_Extend # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Grapheme_Extend # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D3B..0D3C ; Grapheme_Extend # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3E ; Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D41..0D44 ; Grapheme_Extend # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D4D ; Grapheme_Extend # Mn MALAYALAM SIGN VIRAMA +0D57 ; Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Grapheme_Extend # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Grapheme_Extend # Mn SINHALA SIGN CANDRABINDU +0DCA ; Grapheme_Extend # Mn SINHALA SIGN AL-LAKUNA +0DCF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DD2..0DD4 ; Grapheme_Extend # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Grapheme_Extend # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DDF ; Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +0E31 ; Grapheme_Extend # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Grapheme_Extend # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E47..0E4E ; Grapheme_Extend # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0EB1 ; Grapheme_Extend # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EBC ; Grapheme_Extend # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EC8..0ECD ; Grapheme_Extend # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0F18..0F19 ; Grapheme_Extend # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Grapheme_Extend # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Grapheme_Extend # Mn TIBETAN MARK TSA -PHRU +0F71..0F7E ; Grapheme_Extend # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F80..0F84 ; Grapheme_Extend # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F86..0F87 ; Grapheme_Extend # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F8D..0F97 ; Grapheme_Extend # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Grapheme_Extend # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FC6 ; Grapheme_Extend # Mn TIBETAN SYMBOL PADMA GDAN +102D..1030 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1032..1037 ; Grapheme_Extend # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1039..103A ; Grapheme_Extend # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103D..103E ; Grapheme_Extend # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1058..1059 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Grapheme_Extend # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1071..1074 ; Grapheme_Extend # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Grapheme_Extend # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1085..1086 ; Grapheme_Extend # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +108D ; Grapheme_Extend # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +109D ; Grapheme_Extend # Mn MYANMAR VOWEL SIGN AITON AI +135D..135F ; Grapheme_Extend # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1712..1714 ; Grapheme_Extend # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1732..1733 ; Grapheme_Extend # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Grapheme_Extend # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Grapheme_Extend # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B4..17B5 ; Grapheme_Extend # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B7..17BD ; Grapheme_Extend # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17C6 ; Grapheme_Extend # Mn KHMER SIGN NIKAHIT +17C9..17D3 ; Grapheme_Extend # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Grapheme_Extend # Mn KHMER SIGN ATTHACAN +180B..180D ; Grapheme_Extend # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Grapheme_Extend # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1885..1886 ; Grapheme_Extend # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Grapheme_Extend # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Grapheme_Extend # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1927..1928 ; Grapheme_Extend # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1932 ; Grapheme_Extend # Mn LIMBU SMALL LETTER ANUSVARA +1939..193B ; Grapheme_Extend # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A17..1A18 ; Grapheme_Extend # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A1B ; Grapheme_Extend # Mn BUGINESE VOWEL SIGN AE +1A56 ; Grapheme_Extend # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A58..1A5E ; Grapheme_Extend # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Grapheme_Extend # Mn TAI THAM SIGN SAKOT +1A62 ; Grapheme_Extend # Mn TAI THAM VOWEL SIGN MAI SAT +1A65..1A6C ; Grapheme_Extend # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A73..1A7C ; Grapheme_Extend # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Grapheme_Extend # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Grapheme_Extend # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Grapheme_Extend # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Grapheme_Extend # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Grapheme_Extend # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B34 ; Grapheme_Extend # Mn BALINESE SIGN REREKAN +1B35 ; Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Grapheme_Extend # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3C ; Grapheme_Extend # Mn BALINESE VOWEL SIGN LA LENGA +1B42 ; Grapheme_Extend # Mn BALINESE VOWEL SIGN PEPET +1B6B..1B73 ; Grapheme_Extend # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B80..1B81 ; Grapheme_Extend # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1BA2..1BA5 ; Grapheme_Extend # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA8..1BA9 ; Grapheme_Extend # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAB..1BAD ; Grapheme_Extend # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE6 ; Grapheme_Extend # Mn BATAK SIGN TOMPI +1BE8..1BE9 ; Grapheme_Extend # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BED ; Grapheme_Extend # Mn BATAK VOWEL SIGN KARO O +1BEF..1BF1 ; Grapheme_Extend # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C2C..1C33 ; Grapheme_Extend # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C36..1C37 ; Grapheme_Extend # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1CD0..1CD2 ; Grapheme_Extend # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Grapheme_Extend # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Grapheme_Extend # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Grapheme_Extend # Mn VEDIC SIGN TIRYAK +1CF4 ; Grapheme_Extend # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Grapheme_Extend # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Grapheme_Extend # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C ; Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +20D0..20DC ; Grapheme_Extend # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Grapheme_Extend # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Grapheme_Extend # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Grapheme_Extend # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Grapheme_Extend # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +2CEF..2CF1 ; Grapheme_Extend # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2D7F ; Grapheme_Extend # Mn TIFINAGH CONSONANT JOINER +2DE0..2DFF ; Grapheme_Extend # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +302A..302D ; Grapheme_Extend # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Grapheme_Extend # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +A66F ; Grapheme_Extend # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Grapheme_Extend # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A674..A67D ; Grapheme_Extend # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A69E..A69F ; Grapheme_Extend # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A6F0..A6F1 ; Grapheme_Extend # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A802 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN DVISVARA +A806 ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN HASANTA +A80B ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ANUSVARA +A825..A826 ; Grapheme_Extend # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A82C ; Grapheme_Extend # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4..A8C5 ; Grapheme_Extend # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8E0..A8F1 ; Grapheme_Extend # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8FF ; Grapheme_Extend # Mn DEVANAGARI VOWEL SIGN AY +A926..A92D ; Grapheme_Extend # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A947..A951 ; Grapheme_Extend # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A980..A982 ; Grapheme_Extend # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A9B3 ; Grapheme_Extend # Mn JAVANESE SIGN CECAK TELU +A9B6..A9B9 ; Grapheme_Extend # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BC..A9BD ; Grapheme_Extend # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9E5 ; Grapheme_Extend # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Grapheme_Extend # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA31..AA32 ; Grapheme_Extend # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA35..AA36 ; Grapheme_Extend # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Grapheme_Extend # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Grapheme_Extend # Mn CHAM CONSONANT SIGN FINAL M +AA7C ; Grapheme_Extend # Mn MYANMAR SIGN TAI LAING TONE-2 +AAB0 ; Grapheme_Extend # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Grapheme_Extend # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Grapheme_Extend # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE..AABF ; Grapheme_Extend # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC1 ; Grapheme_Extend # Mn TAI VIET TONE MAI THO +AAEC..AAED ; Grapheme_Extend # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAF6 ; Grapheme_Extend # Mn MEETEI MAYEK VIRAMA +ABE5 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE8 ; Grapheme_Extend # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABED ; Grapheme_Extend # Mn MEETEI MAYEK APUN IYEK +FB1E ; Grapheme_Extend # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE00..FE0F ; Grapheme_Extend # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2F ; Grapheme_Extend # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +101FD ; Grapheme_Extend # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Grapheme_Extend # Mn COPTIC EPACT THOUSANDS MARK +10376..1037A ; Grapheme_Extend # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Grapheme_Extend # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Grapheme_Extend # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Grapheme_Extend # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A38..10A3A ; Grapheme_Extend # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Grapheme_Extend # Mn KHAROSHTHI VIRAMA +10AE5..10AE6 ; Grapheme_Extend # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D24..10D27 ; Grapheme_Extend # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Grapheme_Extend # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10F46..10F50 ; Grapheme_Extend # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Grapheme_Extend # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11001 ; Grapheme_Extend # Mn BRAHMI SIGN ANUSVARA +11038..11046 ; Grapheme_Extend # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11070 ; Grapheme_Extend # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11073..11074 ; Grapheme_Extend # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +1107F..11081 ; Grapheme_Extend # Mn [3] BRAHMI NUMBER JOINER..KAITHI SIGN ANUSVARA +110B3..110B6 ; Grapheme_Extend # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B9..110BA ; Grapheme_Extend # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110C2 ; Grapheme_Extend # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Grapheme_Extend # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Grapheme_Extend # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112D..11134 ; Grapheme_Extend # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11173 ; Grapheme_Extend # Mn MAHAJANI SIGN NUKTA +11180..11181 ; Grapheme_Extend # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +111B6..111BE ; Grapheme_Extend # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111C9..111CC ; Grapheme_Extend # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CF ; Grapheme_Extend # Mn SHARADA SIGN INVERTED CANDRABINDU +1122F..11231 ; Grapheme_Extend # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11234 ; Grapheme_Extend # Mn KHOJKI SIGN ANUSVARA +11236..11237 ; Grapheme_Extend # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +1123E ; Grapheme_Extend # Mn KHOJKI SIGN SUKUN +112DF ; Grapheme_Extend # Mn KHUDAWADI SIGN ANUSVARA +112E3..112EA ; Grapheme_Extend # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +11300..11301 ; Grapheme_Extend # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +1133B..1133C ; Grapheme_Extend # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +1133E ; Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11340 ; Grapheme_Extend # Mn GRANTHA VOWEL SIGN II +11357 ; Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +11366..1136C ; Grapheme_Extend # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Grapheme_Extend # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11438..1143F ; Grapheme_Extend # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11442..11444 ; Grapheme_Extend # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11446 ; Grapheme_Extend # Mn NEWA SIGN NUKTA +1145E ; Grapheme_Extend # Mn NEWA SANDHI MARK +114B0 ; Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114B3..114B8 ; Grapheme_Extend # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114BA ; Grapheme_Extend # Mn TIRHUTA VOWEL SIGN SHORT E +114BD ; Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +114BF..114C0 ; Grapheme_Extend # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C2..114C3 ; Grapheme_Extend # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115AF ; Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +115B2..115B5 ; Grapheme_Extend # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115BC..115BD ; Grapheme_Extend # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BF..115C0 ; Grapheme_Extend # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115DC..115DD ; Grapheme_Extend # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11633..1163A ; Grapheme_Extend # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163D ; Grapheme_Extend # Mn MODI SIGN ANUSVARA +1163F..11640 ; Grapheme_Extend # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +116AB ; Grapheme_Extend # Mn TAKRI SIGN ANUSVARA +116AD ; Grapheme_Extend # Mn TAKRI VOWEL SIGN AA +116B0..116B5 ; Grapheme_Extend # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B7 ; Grapheme_Extend # Mn TAKRI SIGN NUKTA +1171D..1171F ; Grapheme_Extend # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11722..11725 ; Grapheme_Extend # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11727..1172B ; Grapheme_Extend # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +1182F..11837 ; Grapheme_Extend # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11839..1183A ; Grapheme_Extend # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +11930 ; Grapheme_Extend # Mc DIVES AKURU VOWEL SIGN AA +1193B..1193C ; Grapheme_Extend # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193E ; Grapheme_Extend # Mn DIVES AKURU VIRAMA +11943 ; Grapheme_Extend # Mn DIVES AKURU SIGN NUKTA +119D4..119D7 ; Grapheme_Extend # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Grapheme_Extend # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119E0 ; Grapheme_Extend # Mn NANDINAGARI SIGN VIRAMA +11A01..11A0A ; Grapheme_Extend # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A33..11A38 ; Grapheme_Extend # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A3B..11A3E ; Grapheme_Extend # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A47 ; Grapheme_Extend # Mn ZANABAZAR SQUARE SUBJOINER +11A51..11A56 ; Grapheme_Extend # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A59..11A5B ; Grapheme_Extend # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Grapheme_Extend # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A98..11A99 ; Grapheme_Extend # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11C30..11C36 ; Grapheme_Extend # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Grapheme_Extend # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3F ; Grapheme_Extend # Mn BHAIKSUKI SIGN VIRAMA +11C92..11CA7 ; Grapheme_Extend # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CAA..11CB0 ; Grapheme_Extend # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB2..11CB3 ; Grapheme_Extend # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB5..11CB6 ; Grapheme_Extend # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Grapheme_Extend # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Grapheme_Extend # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Grapheme_Extend # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Grapheme_Extend # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D47 ; Grapheme_Extend # Mn MASARAM GONDI RA-KARA +11D90..11D91 ; Grapheme_Extend # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D95 ; Grapheme_Extend # Mn GUNJALA GONDI SIGN ANUSVARA +11D97 ; Grapheme_Extend # Mn GUNJALA GONDI VIRAMA +11EF3..11EF4 ; Grapheme_Extend # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +16AF0..16AF4 ; Grapheme_Extend # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Grapheme_Extend # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F4F ; Grapheme_Extend # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F8F..16F92 ; Grapheme_Extend # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FE4 ; Grapheme_Extend # Mn KHITAN SMALL SCRIPT FILLER +1BC9D..1BC9E ; Grapheme_Extend # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1CF00..1CF2D ; Grapheme_Extend # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Grapheme_Extend # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D165 ; Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D167..1D169 ; Grapheme_Extend # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16E..1D172 ; Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Grapheme_Extend # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1DA00..1DA36 ; Grapheme_Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA3B..1DA6C ; Grapheme_Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA75 ; Grapheme_Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA84 ; Grapheme_Extend # Mn SIGNWRITING LOCATION HEAD NECK +1DA9B..1DA9F ; Grapheme_Extend # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; Grapheme_Extend # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 +1E000..1E006 ; Grapheme_Extend # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Grapheme_Extend # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Grapheme_Extend # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Grapheme_Extend # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Grapheme_Extend # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E130..1E136 ; Grapheme_Extend # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Grapheme_Extend # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Grapheme_Extend # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; Grapheme_Extend # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E94A ; Grapheme_Extend # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG +E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 2090 + +# ================================================ + +# Derived Property: Grapheme_Base +# Generated from: [0..10FFFF] - Cc - Cf - Cs - Co - Cn - Zl - Zp - Grapheme_Extend +# Note: depending on an application's interpretation of Co (private use), +# they may be either in Grapheme_Base, or in Grapheme_Extend, or in neither. + +0020 ; Grapheme_Base # Zs SPACE +0021..0023 ; Grapheme_Base # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Grapheme_Base # Sc DOLLAR SIGN +0025..0027 ; Grapheme_Base # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Grapheme_Base # Ps LEFT PARENTHESIS +0029 ; Grapheme_Base # Pe RIGHT PARENTHESIS +002A ; Grapheme_Base # Po ASTERISK +002B ; Grapheme_Base # Sm PLUS SIGN +002C ; Grapheme_Base # Po COMMA +002D ; Grapheme_Base # Pd HYPHEN-MINUS +002E..002F ; Grapheme_Base # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Grapheme_Base # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Grapheme_Base # Po [2] COLON..SEMICOLON +003C..003E ; Grapheme_Base # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Grapheme_Base # Po [2] QUESTION MARK..COMMERCIAL AT +0041..005A ; Grapheme_Base # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +005B ; Grapheme_Base # Ps LEFT SQUARE BRACKET +005C ; Grapheme_Base # Po REVERSE SOLIDUS +005D ; Grapheme_Base # Pe RIGHT SQUARE BRACKET +005E ; Grapheme_Base # Sk CIRCUMFLEX ACCENT +005F ; Grapheme_Base # Pc LOW LINE +0060 ; Grapheme_Base # Sk GRAVE ACCENT +0061..007A ; Grapheme_Base # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +007B ; Grapheme_Base # Ps LEFT CURLY BRACKET +007C ; Grapheme_Base # Sm VERTICAL LINE +007D ; Grapheme_Base # Pe RIGHT CURLY BRACKET +007E ; Grapheme_Base # Sm TILDE +00A0 ; Grapheme_Base # Zs NO-BREAK SPACE +00A1 ; Grapheme_Base # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Grapheme_Base # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Grapheme_Base # So BROKEN BAR +00A7 ; Grapheme_Base # Po SECTION SIGN +00A8 ; Grapheme_Base # Sk DIAERESIS +00A9 ; Grapheme_Base # So COPYRIGHT SIGN +00AA ; Grapheme_Base # Lo FEMININE ORDINAL INDICATOR +00AB ; Grapheme_Base # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Grapheme_Base # Sm NOT SIGN +00AE ; Grapheme_Base # So REGISTERED SIGN +00AF ; Grapheme_Base # Sk MACRON +00B0 ; Grapheme_Base # So DEGREE SIGN +00B1 ; Grapheme_Base # Sm PLUS-MINUS SIGN +00B2..00B3 ; Grapheme_Base # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; Grapheme_Base # Sk ACUTE ACCENT +00B5 ; Grapheme_Base # L& MICRO SIGN +00B6..00B7 ; Grapheme_Base # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; Grapheme_Base # Sk CEDILLA +00B9 ; Grapheme_Base # No SUPERSCRIPT ONE +00BA ; Grapheme_Base # Lo MASCULINE ORDINAL INDICATOR +00BB ; Grapheme_Base # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; Grapheme_Base # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; Grapheme_Base # Po INVERTED QUESTION MARK +00C0..00D6 ; Grapheme_Base # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D7 ; Grapheme_Base # Sm MULTIPLICATION SIGN +00D8..00F6 ; Grapheme_Base # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F7 ; Grapheme_Base # Sm DIVISION SIGN +00F8..01BA ; Grapheme_Base # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Grapheme_Base # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Grapheme_Base # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Grapheme_Base # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Grapheme_Base # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Grapheme_Base # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02C1 ; Grapheme_Base # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Grapheme_Base # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Grapheme_Base # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Grapheme_Base # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Grapheme_Base # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Grapheme_Base # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Grapheme_Base # Lm MODIFIER LETTER VOICING +02ED ; Grapheme_Base # Sk MODIFIER LETTER UNASPIRATED +02EE ; Grapheme_Base # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Grapheme_Base # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0370..0373 ; Grapheme_Base # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0374 ; Grapheme_Base # Lm GREEK NUMERAL SIGN +0375 ; Grapheme_Base # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; Grapheme_Base # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Grapheme_Base # Lm GREEK YPOGEGRAMMENI +037B..037D ; Grapheme_Base # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037E ; Grapheme_Base # Po GREEK QUESTION MARK +037F ; Grapheme_Base # L& GREEK CAPITAL LETTER YOT +0384..0385 ; Grapheme_Base # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0386 ; Grapheme_Base # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0387 ; Grapheme_Base # Po GREEK ANO TELEIA +0388..038A ; Grapheme_Base # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Grapheme_Base # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Grapheme_Base # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03F5 ; Grapheme_Base # L& [83] GREEK CAPITAL LETTER SIGMA..GREEK LUNATE EPSILON SYMBOL +03F6 ; Grapheme_Base # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..0481 ; Grapheme_Base # L& [139] GREEK CAPITAL LETTER SHO..CYRILLIC SMALL LETTER KOPPA +0482 ; Grapheme_Base # So CYRILLIC THOUSANDS SIGN +048A..052F ; Grapheme_Base # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +0531..0556 ; Grapheme_Base # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Grapheme_Base # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; Grapheme_Base # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; Grapheme_Base # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; Grapheme_Base # Po ARMENIAN FULL STOP +058A ; Grapheme_Base # Pd ARMENIAN HYPHEN +058D..058E ; Grapheme_Base # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F ; Grapheme_Base # Sc ARMENIAN DRAM SIGN +05BE ; Grapheme_Base # Pd HEBREW PUNCTUATION MAQAF +05C0 ; Grapheme_Base # Po HEBREW PUNCTUATION PASEQ +05C3 ; Grapheme_Base # Po HEBREW PUNCTUATION SOF PASUQ +05C6 ; Grapheme_Base # Po HEBREW PUNCTUATION NUN HAFUKHA +05D0..05EA ; Grapheme_Base # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Grapheme_Base # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; Grapheme_Base # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +0606..0608 ; Grapheme_Base # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; Grapheme_Base # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; Grapheme_Base # Sc AFGHANI SIGN +060C..060D ; Grapheme_Base # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +060E..060F ; Grapheme_Base # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +061B ; Grapheme_Base # Po ARABIC SEMICOLON +061D..061F ; Grapheme_Base # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +0620..063F ; Grapheme_Base # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0640 ; Grapheme_Base # Lm ARABIC TATWEEL +0641..064A ; Grapheme_Base # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +0660..0669 ; Grapheme_Base # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D ; Grapheme_Base # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; Grapheme_Base # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Grapheme_Base # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; Grapheme_Base # Po ARABIC FULL STOP +06D5 ; Grapheme_Base # Lo ARABIC LETTER AE +06DE ; Grapheme_Base # So ARABIC START OF RUB EL HIZB +06E5..06E6 ; Grapheme_Base # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E9 ; Grapheme_Base # So ARABIC PLACE OF SAJDAH +06EE..06EF ; Grapheme_Base # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; Grapheme_Base # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; Grapheme_Base # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; Grapheme_Base # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; Grapheme_Base # Lo ARABIC LETTER HEH WITH INVERTED V +0700..070D ; Grapheme_Base # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +0710 ; Grapheme_Base # Lo SYRIAC LETTER ALAPH +0712..072F ; Grapheme_Base # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +074D..07A5 ; Grapheme_Base # Lo [89] SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER WAAVU +07B1 ; Grapheme_Base # Lo THAANA LETTER NAA +07C0..07C9 ; Grapheme_Base # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; Grapheme_Base # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07F4..07F5 ; Grapheme_Base # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; Grapheme_Base # So NKO SYMBOL OO DENNEN +07F7..07F9 ; Grapheme_Base # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; Grapheme_Base # Lm NKO LAJANYALAN +07FE..07FF ; Grapheme_Base # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN +0800..0815 ; Grapheme_Base # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +081A ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +0824 ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER SHORT A +0828 ; Grapheme_Base # Lm SAMARITAN MODIFIER LETTER I +0830..083E ; Grapheme_Base # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +0840..0858 ; Grapheme_Base # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +085E ; Grapheme_Base # Po MANDAIC PUNCTUATION +0860..086A ; Grapheme_Base # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA +0870..0887 ; Grapheme_Base # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; Grapheme_Base # Sk ARABIC RAISED ROUND DOT +0889..088E ; Grapheme_Base # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +08A0..08C8 ; Grapheme_Base # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; Grapheme_Base # Lm ARABIC SMALL FARSI YEH +0903 ; Grapheme_Base # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Grapheme_Base # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093B ; Grapheme_Base # Mc DEVANAGARI VOWEL SIGN OOE +093D ; Grapheme_Base # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Grapheme_Base # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0949..094C ; Grapheme_Base # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Grapheme_Base # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; Grapheme_Base # Lo DEVANAGARI OM +0958..0961 ; Grapheme_Base # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0964..0965 ; Grapheme_Base # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0966..096F ; Grapheme_Base # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; Grapheme_Base # Po DEVANAGARI ABBREVIATION SIGN +0971 ; Grapheme_Base # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..0980 ; Grapheme_Base # Lo [15] DEVANAGARI LETTER CANDRA A..BENGALI ANJI +0982..0983 ; Grapheme_Base # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Grapheme_Base # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Grapheme_Base # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Grapheme_Base # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Grapheme_Base # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Grapheme_Base # Lo BENGALI LETTER LA +09B6..09B9 ; Grapheme_Base # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BD ; Grapheme_Base # Lo BENGALI SIGN AVAGRAHA +09BF..09C0 ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN I..BENGALI VOWEL SIGN II +09C7..09C8 ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Grapheme_Base # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CE ; Grapheme_Base # Lo BENGALI LETTER KHANDA TA +09DC..09DD ; Grapheme_Base # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Grapheme_Base # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E6..09EF ; Grapheme_Base # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; Grapheme_Base # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; Grapheme_Base # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; Grapheme_Base # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; Grapheme_Base # So BENGALI ISSHAR +09FB ; Grapheme_Base # Sc BENGALI GANDA MARK +09FC ; Grapheme_Base # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; Grapheme_Base # Po BENGALI ABBREVIATION SIGN +0A03 ; Grapheme_Base # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Grapheme_Base # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Grapheme_Base # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Grapheme_Base # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Grapheme_Base # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Grapheme_Base # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Grapheme_Base # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Grapheme_Base # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3E..0A40 ; Grapheme_Base # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A59..0A5C ; Grapheme_Base # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Grapheme_Base # Lo GURMUKHI LETTER FA +0A66..0A6F ; Grapheme_Base # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A72..0A74 ; Grapheme_Base # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A76 ; Grapheme_Base # Po GURMUKHI ABBREVIATION SIGN +0A83 ; Grapheme_Base # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Grapheme_Base # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Grapheme_Base # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Grapheme_Base # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Grapheme_Base # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Grapheme_Base # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Grapheme_Base # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABD ; Grapheme_Base # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Grapheme_Base # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC9 ; Grapheme_Base # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Grapheme_Base # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AD0 ; Grapheme_Base # Lo GUJARATI OM +0AE0..0AE1 ; Grapheme_Base # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE6..0AEF ; Grapheme_Base # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Grapheme_Base # Po GUJARATI ABBREVIATION SIGN +0AF1 ; Grapheme_Base # Sc GUJARATI RUPEE SIGN +0AF9 ; Grapheme_Base # Lo GUJARATI LETTER ZHA +0B02..0B03 ; Grapheme_Base # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Grapheme_Base # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Grapheme_Base # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Grapheme_Base # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Grapheme_Base # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Grapheme_Base # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Grapheme_Base # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3D ; Grapheme_Base # Lo ORIYA SIGN AVAGRAHA +0B40 ; Grapheme_Base # Mc ORIYA VOWEL SIGN II +0B47..0B48 ; Grapheme_Base # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Grapheme_Base # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B5C..0B5D ; Grapheme_Base # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Grapheme_Base # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B66..0B6F ; Grapheme_Base # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; Grapheme_Base # So ORIYA ISSHAR +0B71 ; Grapheme_Base # Lo ORIYA LETTER WA +0B72..0B77 ; Grapheme_Base # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS +0B83 ; Grapheme_Base # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Grapheme_Base # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Grapheme_Base # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Grapheme_Base # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Grapheme_Base # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Grapheme_Base # Lo TAMIL LETTER JA +0B9E..0B9F ; Grapheme_Base # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Grapheme_Base # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Grapheme_Base # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Grapheme_Base # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBF ; Grapheme_Base # Mc TAMIL VOWEL SIGN I +0BC1..0BC2 ; Grapheme_Base # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Grapheme_Base # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Grapheme_Base # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD0 ; Grapheme_Base # Lo TAMIL OM +0BE6..0BEF ; Grapheme_Base # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Grapheme_Base # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; Grapheme_Base # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; Grapheme_Base # Sc TAMIL RUPEE SIGN +0BFA ; Grapheme_Base # So TAMIL NUMBER SIGN +0C01..0C03 ; Grapheme_Base # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C05..0C0C ; Grapheme_Base # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Grapheme_Base # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Grapheme_Base # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Grapheme_Base # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3D ; Grapheme_Base # Lo TELUGU SIGN AVAGRAHA +0C41..0C44 ; Grapheme_Base # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C58..0C5A ; Grapheme_Base # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Grapheme_Base # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Grapheme_Base # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C66..0C6F ; Grapheme_Base # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; Grapheme_Base # Po TELUGU SIGN SIDDHAM +0C78..0C7E ; Grapheme_Base # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; Grapheme_Base # So TELUGU SIGN TUUMU +0C80 ; Grapheme_Base # Lo KANNADA SIGN SPACING CANDRABINDU +0C82..0C83 ; Grapheme_Base # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; Grapheme_Base # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; Grapheme_Base # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Grapheme_Base # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Grapheme_Base # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Grapheme_Base # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Grapheme_Base # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBD ; Grapheme_Base # Lo KANNADA SIGN AVAGRAHA +0CBE ; Grapheme_Base # Mc KANNADA VOWEL SIGN AA +0CC0..0CC1 ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN U +0CC3..0CC4 ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN VOCALIC R..KANNADA VOWEL SIGN VOCALIC RR +0CC7..0CC8 ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Grapheme_Base # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CDD..0CDE ; Grapheme_Base # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Grapheme_Base # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE6..0CEF ; Grapheme_Base # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; Grapheme_Base # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA +0D02..0D03 ; Grapheme_Base # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; Grapheme_Base # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Grapheme_Base # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Grapheme_Base # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3D ; Grapheme_Base # Lo MALAYALAM SIGN AVAGRAHA +0D3F..0D40 ; Grapheme_Base # Mc [2] MALAYALAM VOWEL SIGN I..MALAYALAM VOWEL SIGN II +0D46..0D48 ; Grapheme_Base # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Grapheme_Base # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4E ; Grapheme_Base # Lo MALAYALAM LETTER DOT REPH +0D4F ; Grapheme_Base # So MALAYALAM SIGN PARA +0D54..0D56 ; Grapheme_Base # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D58..0D5E ; Grapheme_Base # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; Grapheme_Base # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D66..0D6F ; Grapheme_Base # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; Grapheme_Base # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; Grapheme_Base # So MALAYALAM DATE MARK +0D7A..0D7F ; Grapheme_Base # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K +0D82..0D83 ; Grapheme_Base # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Grapheme_Base # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Grapheme_Base # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Grapheme_Base # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Grapheme_Base # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Grapheme_Base # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DD0..0DD1 ; Grapheme_Base # Mc [2] SINHALA VOWEL SIGN KETTI AEDA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD8..0DDE ; Grapheme_Base # Mc [7] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN KOMBUVA HAA GAYANUKITTA +0DE6..0DEF ; Grapheme_Base # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; Grapheme_Base # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; Grapheme_Base # Po SINHALA PUNCTUATION KUNDDALIYA +0E01..0E30 ; Grapheme_Base # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E32..0E33 ; Grapheme_Base # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E3F ; Grapheme_Base # Sc THAI CURRENCY SYMBOL BAHT +0E40..0E45 ; Grapheme_Base # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Grapheme_Base # Lm THAI CHARACTER MAIYAMOK +0E4F ; Grapheme_Base # Po THAI CHARACTER FONGMAN +0E50..0E59 ; Grapheme_Base # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; Grapheme_Base # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0E81..0E82 ; Grapheme_Base # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Grapheme_Base # Lo LAO LETTER KHO TAM +0E86..0E8A ; Grapheme_Base # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Grapheme_Base # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Grapheme_Base # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; Grapheme_Base # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB2..0EB3 ; Grapheme_Base # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EBD ; Grapheme_Base # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Grapheme_Base # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Grapheme_Base # Lm LAO KO LA +0ED0..0ED9 ; Grapheme_Base # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; Grapheme_Base # Lo [4] LAO HO NO..LAO LETTER KHMU NYO +0F00 ; Grapheme_Base # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; Grapheme_Base # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; Grapheme_Base # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Grapheme_Base # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Grapheme_Base # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F1A..0F1F ; Grapheme_Base # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; Grapheme_Base # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; Grapheme_Base # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; Grapheme_Base # So TIBETAN MARK BSDUS RTAGS +0F36 ; Grapheme_Base # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F38 ; Grapheme_Base # So TIBETAN MARK CHE MGO +0F3A ; Grapheme_Base # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; Grapheme_Base # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; Grapheme_Base # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; Grapheme_Base # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; Grapheme_Base # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; Grapheme_Base # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Grapheme_Base # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F7F ; Grapheme_Base # Mc TIBETAN SIGN RNAM BCAD +0F85 ; Grapheme_Base # Po TIBETAN MARK PALUTA +0F88..0F8C ; Grapheme_Base # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0FBE..0FC5 ; Grapheme_Base # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC7..0FCC ; Grapheme_Base # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; Grapheme_Base # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; Grapheme_Base # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD5..0FD8 ; Grapheme_Base # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +0FD9..0FDA ; Grapheme_Base # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS +1000..102A ; Grapheme_Base # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +1031 ; Grapheme_Base # Mc MYANMAR VOWEL SIGN E +1038 ; Grapheme_Base # Mc MYANMAR SIGN VISARGA +103B..103C ; Grapheme_Base # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103F ; Grapheme_Base # Lo MYANMAR LETTER GREAT SA +1040..1049 ; Grapheme_Base # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; Grapheme_Base # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; Grapheme_Base # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +105A..105D ; Grapheme_Base # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +1061 ; Grapheme_Base # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Grapheme_Base # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Grapheme_Base # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Grapheme_Base # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Grapheme_Base # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1075..1081 ; Grapheme_Base # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1083..1084 ; Grapheme_Base # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1087..108C ; Grapheme_Base # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108E ; Grapheme_Base # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Grapheme_Base # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; Grapheme_Base # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; Grapheme_Base # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109E..109F ; Grapheme_Base # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +10A0..10C5 ; Grapheme_Base # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER YN +10CD ; Grapheme_Base # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Grapheme_Base # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FB ; Grapheme_Base # Po GEORGIAN PARAGRAPH SEPARATOR +10FC ; Grapheme_Base # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Grapheme_Base # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1100..1248 ; Grapheme_Base # Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA +124A..124D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Grapheme_Base # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Grapheme_Base # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Grapheme_Base # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Grapheme_Base # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Grapheme_Base # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Grapheme_Base # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +1360..1368 ; Grapheme_Base # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; Grapheme_Base # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; Grapheme_Base # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; Grapheme_Base # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +13A0..13F5 ; Grapheme_Base # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Grapheme_Base # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +1400 ; Grapheme_Base # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; Grapheme_Base # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; Grapheme_Base # So CANADIAN SYLLABICS CHI SIGN +166E ; Grapheme_Base # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; Grapheme_Base # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +1680 ; Grapheme_Base # Zs OGHAM SPACE MARK +1681..169A ; Grapheme_Base # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; Grapheme_Base # Ps OGHAM FEATHER MARK +169C ; Grapheme_Base # Pe OGHAM REVERSED FEATHER MARK +16A0..16EA ; Grapheme_Base # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EB..16ED ; Grapheme_Base # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +16EE..16F0 ; Grapheme_Base # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; Grapheme_Base # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC +1700..1711 ; Grapheme_Base # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1715 ; Grapheme_Base # Mc TAGALOG SIGN PAMUDPOD +171F..1731 ; Grapheme_Base # Lo [19] TAGALOG LETTER ARCHAIC RA..HANUNOO LETTER HA +1734 ; Grapheme_Base # Mc HANUNOO SIGN PAMUDPOD +1735..1736 ; Grapheme_Base # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1740..1751 ; Grapheme_Base # Lo [18] BUHID LETTER A..BUHID LETTER HA +1760..176C ; Grapheme_Base # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Grapheme_Base # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1780..17B3 ; Grapheme_Base # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B6 ; Grapheme_Base # Mc KHMER VOWEL SIGN AA +17BE..17C5 ; Grapheme_Base # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C7..17C8 ; Grapheme_Base # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17D4..17D6 ; Grapheme_Base # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; Grapheme_Base # Lm KHMER SIGN LEK TOO +17D8..17DA ; Grapheme_Base # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; Grapheme_Base # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; Grapheme_Base # Lo KHMER SIGN AVAKRAHASANYA +17E0..17E9 ; Grapheme_Base # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; Grapheme_Base # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +1800..1805 ; Grapheme_Base # Po [6] MONGOLIAN BIRGA..MONGOLIAN FOUR DOTS +1806 ; Grapheme_Base # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; Grapheme_Base # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +1810..1819 ; Grapheme_Base # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; Grapheme_Base # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Grapheme_Base # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Grapheme_Base # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Grapheme_Base # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1887..18A8 ; Grapheme_Base # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18AA ; Grapheme_Base # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +18B0..18F5 ; Grapheme_Base # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +1900..191E ; Grapheme_Base # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1923..1926 ; Grapheme_Base # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1929..192B ; Grapheme_Base # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Grapheme_Base # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1933..1938 ; Grapheme_Base # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1940 ; Grapheme_Base # So LIMBU SIGN LOO +1944..1945 ; Grapheme_Base # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; Grapheme_Base # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE +1950..196D ; Grapheme_Base # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Grapheme_Base # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 +1980..19AB ; Grapheme_Base # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; Grapheme_Base # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; Grapheme_Base # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; Grapheme_Base # No NEW TAI LUE THAM DIGIT ONE +19DE..19FF ; Grapheme_Base # So [34] NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC +1A00..1A16 ; Grapheme_Base # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A19..1A1A ; Grapheme_Base # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1E..1A1F ; Grapheme_Base # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION +1A20..1A54 ; Grapheme_Base # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Grapheme_Base # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A57 ; Grapheme_Base # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A61 ; Grapheme_Base # Mc TAI THAM VOWEL SIGN A +1A63..1A64 ; Grapheme_Base # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A6D..1A72 ; Grapheme_Base # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A80..1A89 ; Grapheme_Base # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Grapheme_Base # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; Grapheme_Base # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; Grapheme_Base # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; Grapheme_Base # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG +1B04 ; Grapheme_Base # Mc BALINESE SIGN BISAH +1B05..1B33 ; Grapheme_Base # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B3B ; Grapheme_Base # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3D..1B41 ; Grapheme_Base # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B43..1B44 ; Grapheme_Base # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; Grapheme_Base # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; Grapheme_Base # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; Grapheme_Base # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; Grapheme_Base # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B74..1B7C ; Grapheme_Base # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; Grapheme_Base # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1B82 ; Grapheme_Base # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Grapheme_Base # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Grapheme_Base # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA6..1BA7 ; Grapheme_Base # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BAA ; Grapheme_Base # Mc SUNDANESE SIGN PAMAAEH +1BAE..1BAF ; Grapheme_Base # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; Grapheme_Base # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BE5 ; Grapheme_Base # Lo [44] SUNDANESE AVAGRAHA..BATAK LETTER U +1BE7 ; Grapheme_Base # Mc BATAK VOWEL SIGN E +1BEA..1BEC ; Grapheme_Base # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BEE ; Grapheme_Base # Mc BATAK VOWEL SIGN U +1BF2..1BF3 ; Grapheme_Base # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; Grapheme_Base # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT +1C00..1C23 ; Grapheme_Base # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Grapheme_Base # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C34..1C35 ; Grapheme_Base # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C3B..1C3F ; Grapheme_Base # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; Grapheme_Base # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; Grapheme_Base # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA +1C50..1C59 ; Grapheme_Base # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; Grapheme_Base # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Grapheme_Base # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; Grapheme_Base # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +1C80..1C88 ; Grapheme_Base # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1C90..1CBA ; Grapheme_Base # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Grapheme_Base # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +1CC0..1CC7 ; Grapheme_Base # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA +1CD3 ; Grapheme_Base # Po VEDIC SIGN NIHSHVASA +1CE1 ; Grapheme_Base # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; Grapheme_Base # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Grapheme_Base # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Grapheme_Base # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; Grapheme_Base # Mc VEDIC SIGN ATIKRAMA +1CFA ; Grapheme_Base # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +1D00..1D2B ; Grapheme_Base # L& [44] LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL +1D2C..1D6A ; Grapheme_Base # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D6B..1D77 ; Grapheme_Base # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D78 ; Grapheme_Base # Lm MODIFIER LETTER CYRILLIC EN +1D79..1D9A ; Grapheme_Base # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBF ; Grapheme_Base # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +1E00..1F15 ; Grapheme_Base # L& [278] LATIN CAPITAL LETTER A WITH RING BELOW..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Grapheme_Base # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Grapheme_Base # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Grapheme_Base # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Grapheme_Base # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Grapheme_Base # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Grapheme_Base # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Grapheme_Base # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Grapheme_Base # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; Grapheme_Base # Sk GREEK KORONIS +1FBE ; Grapheme_Base # L& GREEK PROSGEGRAMMENI +1FBF..1FC1 ; Grapheme_Base # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; Grapheme_Base # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Grapheme_Base # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; Grapheme_Base # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; Grapheme_Base # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Grapheme_Base # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; Grapheme_Base # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; Grapheme_Base # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; Grapheme_Base # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; Grapheme_Base # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Grapheme_Base # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; Grapheme_Base # Sk [2] GREEK OXIA..GREEK DASIA +2000..200A ; Grapheme_Base # Zs [11] EN QUAD..HAIR SPACE +2010..2015 ; Grapheme_Base # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Grapheme_Base # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Grapheme_Base # Pi LEFT SINGLE QUOTATION MARK +2019 ; Grapheme_Base # Pf RIGHT SINGLE QUOTATION MARK +201A ; Grapheme_Base # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Grapheme_Base # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Grapheme_Base # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Grapheme_Base # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Grapheme_Base # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Grapheme_Base # Po [8] DAGGER..HYPHENATION POINT +202F ; Grapheme_Base # Zs NARROW NO-BREAK SPACE +2030..2038 ; Grapheme_Base # Po [9] PER MILLE SIGN..CARET +2039 ; Grapheme_Base # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Grapheme_Base # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Grapheme_Base # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; Grapheme_Base # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; Grapheme_Base # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Grapheme_Base # Sm FRACTION SLASH +2045 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Grapheme_Base # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Grapheme_Base # Sm COMMERCIAL MINUS SIGN +2053 ; Grapheme_Base # Po SWUNG DASH +2054 ; Grapheme_Base # Pc INVERTED UNDERTIE +2055..205E ; Grapheme_Base # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; Grapheme_Base # Zs MEDIUM MATHEMATICAL SPACE +2070 ; Grapheme_Base # No SUPERSCRIPT ZERO +2071 ; Grapheme_Base # Lm SUPERSCRIPT LATIN SMALL LETTER I +2074..2079 ; Grapheme_Base # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; Grapheme_Base # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Grapheme_Base # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Grapheme_Base # Pe SUPERSCRIPT RIGHT PARENTHESIS +207F ; Grapheme_Base # Lm SUPERSCRIPT LATIN SMALL LETTER N +2080..2089 ; Grapheme_Base # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; Grapheme_Base # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Grapheme_Base # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Grapheme_Base # Pe SUBSCRIPT RIGHT PARENTHESIS +2090..209C ; Grapheme_Base # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +20A0..20C0 ; Grapheme_Base # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +2100..2101 ; Grapheme_Base # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL C +2103..2106 ; Grapheme_Base # So [4] DEGREE CELSIUS..CADA UNA +2107 ; Grapheme_Base # L& EULER CONSTANT +2108..2109 ; Grapheme_Base # So [2] SCRUPLE..DEGREE FAHRENHEIT +210A..2113 ; Grapheme_Base # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2114 ; Grapheme_Base # So L B BAR SYMBOL +2115 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL N +2116..2117 ; Grapheme_Base # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; Grapheme_Base # Sm SCRIPT CAPITAL P +2119..211D ; Grapheme_Base # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2123 ; Grapheme_Base # So [6] PRESCRIPTION TAKE..VERSICLE +2124 ; Grapheme_Base # L& DOUBLE-STRUCK CAPITAL Z +2125 ; Grapheme_Base # So OUNCE SIGN +2126 ; Grapheme_Base # L& OHM SIGN +2127 ; Grapheme_Base # So INVERTED OHM SIGN +2128 ; Grapheme_Base # L& BLACK-LETTER CAPITAL Z +2129 ; Grapheme_Base # So TURNED GREEK SMALL LETTER IOTA +212A..212D ; Grapheme_Base # L& [4] KELVIN SIGN..BLACK-LETTER CAPITAL C +212E ; Grapheme_Base # So ESTIMATED SYMBOL +212F..2134 ; Grapheme_Base # L& [6] SCRIPT SMALL E..SCRIPT SMALL O +2135..2138 ; Grapheme_Base # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Grapheme_Base # L& INFORMATION SOURCE +213A..213B ; Grapheme_Base # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; Grapheme_Base # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Grapheme_Base # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Grapheme_Base # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; Grapheme_Base # So PROPERTY LINE +214B ; Grapheme_Base # Sm TURNED AMPERSAND +214C..214D ; Grapheme_Base # So [2] PER SIGN..AKTIESELSKAB +214E ; Grapheme_Base # L& TURNED SMALL F +214F ; Grapheme_Base # So SYMBOL FOR SAMARITAN SOURCE +2150..215F ; Grapheme_Base # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2160..2182 ; Grapheme_Base # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Grapheme_Base # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Grapheme_Base # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2189 ; Grapheme_Base # No VULGAR FRACTION ZERO THIRDS +218A..218B ; Grapheme_Base # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; Grapheme_Base # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Grapheme_Base # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Grapheme_Base # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Grapheme_Base # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Grapheme_Base # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Grapheme_Base # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Grapheme_Base # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Grapheme_Base # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Grapheme_Base # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Grapheme_Base # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Grapheme_Base # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Grapheme_Base # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Grapheme_Base # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Grapheme_Base # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Grapheme_Base # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Grapheme_Base # So DOWNWARDS DOUBLE ARROW +21D4 ; Grapheme_Base # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Grapheme_Base # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Grapheme_Base # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Grapheme_Base # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Grapheme_Base # Ps LEFT CEILING +2309 ; Grapheme_Base # Pe RIGHT CEILING +230A ; Grapheme_Base # Ps LEFT FLOOR +230B ; Grapheme_Base # Pe RIGHT FLOOR +230C..231F ; Grapheme_Base # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Grapheme_Base # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Grapheme_Base # So [7] FROWN..KEYBOARD +2329 ; Grapheme_Base # Ps LEFT-POINTING ANGLE BRACKET +232A ; Grapheme_Base # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Grapheme_Base # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Grapheme_Base # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Grapheme_Base # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Grapheme_Base # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Grapheme_Base # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Grapheme_Base # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; Grapheme_Base # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; Grapheme_Base # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; Grapheme_Base # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; Grapheme_Base # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA..24FF ; Grapheme_Base # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; Grapheme_Base # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Grapheme_Base # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Grapheme_Base # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Grapheme_Base # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Grapheme_Base # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Grapheme_Base # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Grapheme_Base # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Grapheme_Base # Sm MUSIC SHARP SIGN +2670..2767 ; Grapheme_Base # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Grapheme_Base # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Grapheme_Base # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Grapheme_Base # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Grapheme_Base # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Grapheme_Base # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Grapheme_Base # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Grapheme_Base # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Grapheme_Base # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Grapheme_Base # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Grapheme_Base # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Grapheme_Base # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Grapheme_Base # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Grapheme_Base # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Grapheme_Base # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; Grapheme_Base # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; Grapheme_Base # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Grapheme_Base # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Grapheme_Base # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Grapheme_Base # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Grapheme_Base # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Grapheme_Base # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Grapheme_Base # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Grapheme_Base # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Grapheme_Base # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Grapheme_Base # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Grapheme_Base # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Grapheme_Base # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Grapheme_Base # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Grapheme_Base # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Grapheme_Base # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Grapheme_Base # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Grapheme_Base # Ps LEFT WHITE CURLY BRACKET +2984 ; Grapheme_Base # Pe RIGHT WHITE CURLY BRACKET +2985 ; Grapheme_Base # Ps LEFT WHITE PARENTHESIS +2986 ; Grapheme_Base # Pe RIGHT WHITE PARENTHESIS +2987 ; Grapheme_Base # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Grapheme_Base # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Grapheme_Base # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Grapheme_Base # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Grapheme_Base # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Grapheme_Base # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Grapheme_Base # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Grapheme_Base # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Grapheme_Base # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Grapheme_Base # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Grapheme_Base # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Grapheme_Base # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Grapheme_Base # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Grapheme_Base # Ps LEFT WIGGLY FENCE +29D9 ; Grapheme_Base # Pe RIGHT WIGGLY FENCE +29DA ; Grapheme_Base # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Grapheme_Base # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Grapheme_Base # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Grapheme_Base # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Grapheme_Base # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Grapheme_Base # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Grapheme_Base # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Grapheme_Base # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Grapheme_Base # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Grapheme_Base # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Grapheme_Base # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; Grapheme_Base # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; Grapheme_Base # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2C00..2C7B ; Grapheme_Base # L& [124] GLAGOLITIC CAPITAL LETTER AZU..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Grapheme_Base # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2CE4 ; Grapheme_Base # L& [103] LATIN CAPITAL LETTER S WITH SWASH TAIL..COPTIC SYMBOL KAI +2CE5..2CEA ; Grapheme_Base # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; Grapheme_Base # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CF2..2CF3 ; Grapheme_Base # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC ; Grapheme_Base # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; Grapheme_Base # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; Grapheme_Base # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER +2D00..2D25 ; Grapheme_Base # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Grapheme_Base # L& GEORGIAN SMALL LETTER YN +2D2D ; Grapheme_Base # L& GEORGIAN SMALL LETTER AEN +2D30..2D67 ; Grapheme_Base # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Grapheme_Base # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; Grapheme_Base # Po TIFINAGH SEPARATOR MARK +2D80..2D96 ; Grapheme_Base # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +2E00..2E01 ; Grapheme_Base # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Grapheme_Base # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Grapheme_Base # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Grapheme_Base # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Grapheme_Base # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Grapheme_Base # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Grapheme_Base # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Grapheme_Base # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Grapheme_Base # Po RAISED SQUARE +2E0C ; Grapheme_Base # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Grapheme_Base # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Grapheme_Base # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Grapheme_Base # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Grapheme_Base # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Grapheme_Base # Pd HYPHEN WITH DIAERESIS +2E1B ; Grapheme_Base # Po TILDE WITH RING ABOVE +2E1C ; Grapheme_Base # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Grapheme_Base # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Grapheme_Base # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Grapheme_Base # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Grapheme_Base # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Grapheme_Base # Ps TOP LEFT HALF BRACKET +2E23 ; Grapheme_Base # Pe TOP RIGHT HALF BRACKET +2E24 ; Grapheme_Base # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Grapheme_Base # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Grapheme_Base # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Grapheme_Base # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Grapheme_Base # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Grapheme_Base # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Grapheme_Base # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Grapheme_Base # Lm VERTICAL TILDE +2E30..2E39 ; Grapheme_Base # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Grapheme_Base # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Grapheme_Base # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Grapheme_Base # Pd DOUBLE HYPHEN +2E41 ; Grapheme_Base # Po REVERSED COMMA +2E42 ; Grapheme_Base # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Grapheme_Base # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; Grapheme_Base # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; Grapheme_Base # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Grapheme_Base # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Grapheme_Base # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Grapheme_Base # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Grapheme_Base # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Grapheme_Base # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Grapheme_Base # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Grapheme_Base # Pd OBLIQUE HYPHEN +2E80..2E99 ; Grapheme_Base # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Grapheme_Base # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Grapheme_Base # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +2FF0..2FFB ; Grapheme_Base # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3000 ; Grapheme_Base # Zs IDEOGRAPHIC SPACE +3001..3003 ; Grapheme_Base # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; Grapheme_Base # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3005 ; Grapheme_Base # Lm IDEOGRAPHIC ITERATION MARK +3006 ; Grapheme_Base # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Grapheme_Base # Nl IDEOGRAPHIC NUMBER ZERO +3008 ; Grapheme_Base # Ps LEFT ANGLE BRACKET +3009 ; Grapheme_Base # Pe RIGHT ANGLE BRACKET +300A ; Grapheme_Base # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Grapheme_Base # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Grapheme_Base # Ps LEFT CORNER BRACKET +300D ; Grapheme_Base # Pe RIGHT CORNER BRACKET +300E ; Grapheme_Base # Ps LEFT WHITE CORNER BRACKET +300F ; Grapheme_Base # Pe RIGHT WHITE CORNER BRACKET +3010 ; Grapheme_Base # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Grapheme_Base # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Grapheme_Base # So [2] POSTAL MARK..GETA MARK +3014 ; Grapheme_Base # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Grapheme_Base # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Grapheme_Base # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Grapheme_Base # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Grapheme_Base # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Grapheme_Base # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Grapheme_Base # Ps LEFT WHITE SQUARE BRACKET +301B ; Grapheme_Base # Pe RIGHT WHITE SQUARE BRACKET +301C ; Grapheme_Base # Pd WAVE DASH +301D ; Grapheme_Base # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Grapheme_Base # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Grapheme_Base # So POSTAL MARK FACE +3021..3029 ; Grapheme_Base # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3030 ; Grapheme_Base # Pd WAVY DASH +3031..3035 ; Grapheme_Base # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; Grapheme_Base # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +3038..303A ; Grapheme_Base # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Grapheme_Base # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +303C ; Grapheme_Base # Lo MASU MARK +303D ; Grapheme_Base # Po PART ALTERNATION MARK +303E..303F ; Grapheme_Base # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +3041..3096 ; Grapheme_Base # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309B..309C ; Grapheme_Base # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309D..309E ; Grapheme_Base # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Grapheme_Base # Lo HIRAGANA DIGRAPH YORI +30A0 ; Grapheme_Base # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30A1..30FA ; Grapheme_Base # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FB ; Grapheme_Base # Po KATAKANA MIDDLE DOT +30FC..30FE ; Grapheme_Base # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +30FF ; Grapheme_Base # Lo KATAKANA DIGRAPH KOTO +3105..312F ; Grapheme_Base # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +3131..318E ; Grapheme_Base # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3190..3191 ; Grapheme_Base # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Grapheme_Base # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Grapheme_Base # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31A0..31BF ; Grapheme_Base # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH +31C0..31E3 ; Grapheme_Base # So [36] CJK STROKE T..CJK STROKE Q +31F0..31FF ; Grapheme_Base # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +3200..321E ; Grapheme_Base # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3220..3229 ; Grapheme_Base # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; Grapheme_Base # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Grapheme_Base # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Grapheme_Base # So PARTNERSHIP SIGN +3251..325F ; Grapheme_Base # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +3260..327F ; Grapheme_Base # So [32] CIRCLED HANGUL KIYEOK..KOREAN STANDARD SYMBOL +3280..3289 ; Grapheme_Base # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Grapheme_Base # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; Grapheme_Base # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..33FF ; Grapheme_Base # So [320] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..SQUARE GAL +3400..4DBF ; Grapheme_Base # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4DC0..4DFF ; Grapheme_Base # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +4E00..A014 ; Grapheme_Base # Lo [21013] CJK UNIFIED IDEOGRAPH-4E00..YI SYLLABLE E +A015 ; Grapheme_Base # Lm YI SYLLABLE WU +A016..A48C ; Grapheme_Base # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; Grapheme_Base # So [55] YI RADICAL QOT..YI RADICAL KE +A4D0..A4F7 ; Grapheme_Base # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Grapheme_Base # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; Grapheme_Base # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A500..A60B ; Grapheme_Base # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Grapheme_Base # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; Grapheme_Base # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; Grapheme_Base # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; Grapheme_Base # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; Grapheme_Base # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO +A640..A66D ; Grapheme_Base # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Grapheme_Base # Lo CYRILLIC LETTER MULTIOCULAR O +A673 ; Grapheme_Base # Po SLAVONIC ASTERISK +A67E ; Grapheme_Base # Po CYRILLIC KAVYKA +A67F ; Grapheme_Base # Lm CYRILLIC PAYEROK +A680..A69B ; Grapheme_Base # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Grapheme_Base # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6A0..A6E5 ; Grapheme_Base # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Grapheme_Base # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F2..A6F7 ; Grapheme_Base # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +A700..A716 ; Grapheme_Base # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Grapheme_Base # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Grapheme_Base # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A722..A76F ; Grapheme_Base # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Grapheme_Base # Lm MODIFIER LETTER US +A771..A787 ; Grapheme_Base # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A788 ; Grapheme_Base # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Grapheme_Base # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A78B..A78E ; Grapheme_Base # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; Grapheme_Base # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; Grapheme_Base # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Grapheme_Base # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; Grapheme_Base # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Grapheme_Base # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Grapheme_Base # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; Grapheme_Base # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; Grapheme_Base # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Grapheme_Base # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A801 ; Grapheme_Base # Lo [7] LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI LETTER I +A803..A805 ; Grapheme_Base # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A807..A80A ; Grapheme_Base # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80C..A822 ; Grapheme_Base # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Grapheme_Base # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A827 ; Grapheme_Base # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; Grapheme_Base # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A830..A835 ; Grapheme_Base # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Grapheme_Base # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Grapheme_Base # Sc NORTH INDIC RUPEE MARK +A839 ; Grapheme_Base # So NORTH INDIC QUANTITY MARK +A840..A873 ; Grapheme_Base # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; Grapheme_Base # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD +A880..A881 ; Grapheme_Base # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Grapheme_Base # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Grapheme_Base # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8CE..A8CF ; Grapheme_Base # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; Grapheme_Base # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE +A8F2..A8F7 ; Grapheme_Base # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; Grapheme_Base # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; Grapheme_Base # Lo DEVANAGARI HEADSTROKE +A8FC ; Grapheme_Base # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; Grapheme_Base # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A900..A909 ; Grapheme_Base # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; Grapheme_Base # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A92E..A92F ; Grapheme_Base # Po [2] KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA +A930..A946 ; Grapheme_Base # Lo [23] REJANG LETTER KA..REJANG LETTER A +A952..A953 ; Grapheme_Base # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; Grapheme_Base # Po REJANG SECTION MARK +A960..A97C ; Grapheme_Base # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +A983 ; Grapheme_Base # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Grapheme_Base # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B4..A9B5 ; Grapheme_Base # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9BA..A9BB ; Grapheme_Base # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BE..A9C0 ; Grapheme_Base # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; Grapheme_Base # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9CF ; Grapheme_Base # Lm JAVANESE PANGRANGKEP +A9D0..A9D9 ; Grapheme_Base # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; Grapheme_Base # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN +A9E0..A9E4 ; Grapheme_Base # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E6 ; Grapheme_Base # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; Grapheme_Base # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; Grapheme_Base # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; Grapheme_Base # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA00..AA28 ; Grapheme_Base # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA2F..AA30 ; Grapheme_Base # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA33..AA34 ; Grapheme_Base # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA40..AA42 ; Grapheme_Base # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA44..AA4B ; Grapheme_Base # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4D ; Grapheme_Base # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; Grapheme_Base # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; Grapheme_Base # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA +AA60..AA6F ; Grapheme_Base # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Grapheme_Base # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Grapheme_Base # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; Grapheme_Base # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; Grapheme_Base # Lo MYANMAR LETTER AITON RA +AA7B ; Grapheme_Base # Mc MYANMAR SIGN PAO KAREN TONE +AA7D ; Grapheme_Base # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AAAF ; Grapheme_Base # Lo [50] MYANMAR LETTER SHWE PALAUNG CHA..TAI VIET LETTER HIGH O +AAB1 ; Grapheme_Base # Lo TAI VIET VOWEL AA +AAB5..AAB6 ; Grapheme_Base # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9..AABD ; Grapheme_Base # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AAC0 ; Grapheme_Base # Lo TAI VIET TONE MAI NUENG +AAC2 ; Grapheme_Base # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Grapheme_Base # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Grapheme_Base # Lm TAI VIET SYMBOL SAM +AADE..AADF ; Grapheme_Base # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI +AAE0..AAEA ; Grapheme_Base # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN II +AAEE..AAEF ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Grapheme_Base # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Grapheme_Base # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Grapheme_Base # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Grapheme_Base # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AB01..AB06 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Grapheme_Base # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +AB30..AB5A ; Grapheme_Base # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5B ; Grapheme_Base # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Grapheme_Base # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB68 ; Grapheme_Base # L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Grapheme_Base # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Grapheme_Base # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +AB70..ABBF ; Grapheme_Base # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA +ABC0..ABE2 ; Grapheme_Base # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE6..ABE7 ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE9..ABEA ; Grapheme_Base # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; Grapheme_Base # Po MEETEI MAYEK CHEIKHEI +ABEC ; Grapheme_Base # Mc MEETEI MAYEK LUM IYEK +ABF0..ABF9 ; Grapheme_Base # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE +AC00..D7A3 ; Grapheme_Base # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Grapheme_Base # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Grapheme_Base # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +F900..FA6D ; Grapheme_Base # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Grapheme_Base # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +FB00..FB06 ; Grapheme_Base # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FB13..FB17 ; Grapheme_Base # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH +FB1D ; Grapheme_Base # Lo HEBREW LETTER YOD WITH HIRIQ +FB1F..FB28 ; Grapheme_Base # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; Grapheme_Base # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; Grapheme_Base # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Grapheme_Base # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Grapheme_Base # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Grapheme_Base # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Grapheme_Base # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FBB1 ; Grapheme_Base # Lo [108] HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; Grapheme_Base # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; Grapheme_Base # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD3E ; Grapheme_Base # Pe ORNATE LEFT PARENTHESIS +FD3F ; Grapheme_Base # Ps ORNATE RIGHT PARENTHESIS +FD40..FD4F ; Grapheme_Base # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F ; Grapheme_Base # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Grapheme_Base # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF ; Grapheme_Base # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB ; Grapheme_Base # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; Grapheme_Base # Sc RIAL SIGN +FDFD..FDFF ; Grapheme_Base # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE10..FE16 ; Grapheme_Base # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; Grapheme_Base # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Grapheme_Base # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; Grapheme_Base # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; Grapheme_Base # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; Grapheme_Base # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; Grapheme_Base # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; Grapheme_Base # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; Grapheme_Base # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; Grapheme_Base # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; Grapheme_Base # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Grapheme_Base # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; Grapheme_Base # Pd SMALL EM DASH +FE59 ; Grapheme_Base # Ps SMALL LEFT PARENTHESIS +FE5A ; Grapheme_Base # Pe SMALL RIGHT PARENTHESIS +FE5B ; Grapheme_Base # Ps SMALL LEFT CURLY BRACKET +FE5C ; Grapheme_Base # Pe SMALL RIGHT CURLY BRACKET +FE5D ; Grapheme_Base # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; Grapheme_Base # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; Grapheme_Base # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; Grapheme_Base # Sm SMALL PLUS SIGN +FE63 ; Grapheme_Base # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Grapheme_Base # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Grapheme_Base # Po SMALL REVERSE SOLIDUS +FE69 ; Grapheme_Base # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; Grapheme_Base # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FE70..FE74 ; Grapheme_Base # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Grapheme_Base # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +FF01..FF03 ; Grapheme_Base # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; Grapheme_Base # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; Grapheme_Base # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; Grapheme_Base # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; Grapheme_Base # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; Grapheme_Base # Po FULLWIDTH ASTERISK +FF0B ; Grapheme_Base # Sm FULLWIDTH PLUS SIGN +FF0C ; Grapheme_Base # Po FULLWIDTH COMMA +FF0D ; Grapheme_Base # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; Grapheme_Base # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; Grapheme_Base # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; Grapheme_Base # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; Grapheme_Base # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; Grapheme_Base # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF21..FF3A ; Grapheme_Base # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF3B ; Grapheme_Base # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; Grapheme_Base # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; Grapheme_Base # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; Grapheme_Base # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; Grapheme_Base # Pc FULLWIDTH LOW LINE +FF40 ; Grapheme_Base # Sk FULLWIDTH GRAVE ACCENT +FF41..FF5A ; Grapheme_Base # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +FF5B ; Grapheme_Base # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; Grapheme_Base # Sm FULLWIDTH VERTICAL LINE +FF5D ; Grapheme_Base # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; Grapheme_Base # Sm FULLWIDTH TILDE +FF5F ; Grapheme_Base # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; Grapheme_Base # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; Grapheme_Base # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Grapheme_Base # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Grapheme_Base # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Grapheme_Base # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF66..FF6F ; Grapheme_Base # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF70 ; Grapheme_Base # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF71..FF9D ; Grapheme_Base # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +FFA0..FFBE ; Grapheme_Base # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Grapheme_Base # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Grapheme_Base # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I +FFE0..FFE1 ; Grapheme_Base # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; Grapheme_Base # Sm FULLWIDTH NOT SIGN +FFE3 ; Grapheme_Base # Sk FULLWIDTH MACRON +FFE4 ; Grapheme_Base # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; Grapheme_Base # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; Grapheme_Base # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; Grapheme_Base # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; Grapheme_Base # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10000..1000B ; Grapheme_Base # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Grapheme_Base # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Grapheme_Base # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Grapheme_Base # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Grapheme_Base # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Grapheme_Base # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Grapheme_Base # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 +10100..10102 ; Grapheme_Base # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; Grapheme_Base # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Grapheme_Base # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10140..10174 ; Grapheme_Base # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; Grapheme_Base # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; Grapheme_Base # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; Grapheme_Base # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E ; Grapheme_Base # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +10190..1019C ; Grapheme_Base # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101A0 ; Grapheme_Base # So GREEK SYMBOL TAU RHO +101D0..101FC ; Grapheme_Base # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +10280..1029C ; Grapheme_Base # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X +102A0..102D0 ; Grapheme_Base # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 +102E1..102FB ; Grapheme_Base # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +10300..1031F ; Grapheme_Base # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; Grapheme_Base # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..10340 ; Grapheme_Base # Lo [20] OLD ITALIC LETTER YE..GOTHIC LETTER PAIRTHRA +10341 ; Grapheme_Base # Nl GOTHIC LETTER NINETY +10342..10349 ; Grapheme_Base # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Grapheme_Base # Nl GOTHIC LETTER NINE HUNDRED +10350..10375 ; Grapheme_Base # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10380..1039D ; Grapheme_Base # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; Grapheme_Base # Po UGARITIC WORD DIVIDER +103A0..103C3 ; Grapheme_Base # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Grapheme_Base # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; Grapheme_Base # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; Grapheme_Base # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED +10400..1044F ; Grapheme_Base # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW +10450..1049D ; Grapheme_Base # Lo [78] SHAVIAN LETTER PEEP..OSMANYA LETTER OO +104A0..104A9 ; Grapheme_Base # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE +104B0..104D3 ; Grapheme_Base # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Grapheme_Base # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA +10500..10527 ; Grapheme_Base # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE +10530..10563 ; Grapheme_Base # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; Grapheme_Base # Po CAUCASIAN ALBANIAN CITATION MARK +10570..1057A ; Grapheme_Base # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Grapheme_Base # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Grapheme_Base # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Grapheme_Base # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Grapheme_Base # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Grapheme_Base # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Grapheme_Base # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Grapheme_Base # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE +10600..10736 ; Grapheme_Base # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Grapheme_Base # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Grapheme_Base # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 +10780..10785 ; Grapheme_Base # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Grapheme_Base # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Grapheme_Base # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10800..10805 ; Grapheme_Base # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Grapheme_Base # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Grapheme_Base # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Grapheme_Base # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Grapheme_Base # Lo CYPRIOT SYLLABLE ZA +1083F..10855 ; Grapheme_Base # Lo [23] CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW +10857 ; Grapheme_Base # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; Grapheme_Base # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND +10860..10876 ; Grapheme_Base # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; Grapheme_Base # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; Grapheme_Base # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY +10880..1089E ; Grapheme_Base # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; Grapheme_Base # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED +108E0..108F2 ; Grapheme_Base # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Grapheme_Base # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; Grapheme_Base # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED +10900..10915 ; Grapheme_Base # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; Grapheme_Base # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; Grapheme_Base # Po PHOENICIAN WORD SEPARATOR +10920..10939 ; Grapheme_Base # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; Grapheme_Base # Po LYDIAN TRIANGULAR MARK +10980..109B7 ; Grapheme_Base # Lo [56] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; Grapheme_Base # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; Grapheme_Base # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; Grapheme_Base # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; Grapheme_Base # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS +10A00 ; Grapheme_Base # Lo KHAROSHTHI LETTER A +10A10..10A13 ; Grapheme_Base # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Grapheme_Base # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Grapheme_Base # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A40..10A48 ; Grapheme_Base # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; Grapheme_Base # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES +10A60..10A7C ; Grapheme_Base # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; Grapheme_Base # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; Grapheme_Base # Po OLD SOUTH ARABIAN NUMERIC INDICATOR +10A80..10A9C ; Grapheme_Base # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; Grapheme_Base # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY +10AC0..10AC7 ; Grapheme_Base # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; Grapheme_Base # So MANICHAEAN SIGN UD +10AC9..10AE4 ; Grapheme_Base # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AEB..10AEF ; Grapheme_Base # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; Grapheme_Base # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER +10B00..10B35 ; Grapheme_Base # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; Grapheme_Base # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B40..10B55 ; Grapheme_Base # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; Grapheme_Base # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND +10B60..10B72 ; Grapheme_Base # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; Grapheme_Base # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND +10B80..10B91 ; Grapheme_Base # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; Grapheme_Base # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; Grapheme_Base # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED +10C00..10C48 ; Grapheme_Base # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH +10C80..10CB2 ; Grapheme_Base # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Grapheme_Base # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; Grapheme_Base # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND +10D00..10D23 ; Grapheme_Base # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D30..10D39 ; Grapheme_Base # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE +10E60..10E7E ; Grapheme_Base # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +10E80..10EA9 ; Grapheme_Base # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAD ; Grapheme_Base # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; Grapheme_Base # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE +10F00..10F1C ; Grapheme_Base # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; Grapheme_Base # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; Grapheme_Base # Lo OLD SOGDIAN LIGATURE AYIN-DALETH +10F30..10F45 ; Grapheme_Base # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F51..10F54 ; Grapheme_Base # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; Grapheme_Base # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F70..10F81 ; Grapheme_Base # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F86..10F89 ; Grapheme_Base # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +10FB0..10FC4 ; Grapheme_Base # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; Grapheme_Base # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED +10FE0..10FF6 ; Grapheme_Base # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH +11000 ; Grapheme_Base # Mc BRAHMI SIGN CANDRABINDU +11002 ; Grapheme_Base # Mc BRAHMI SIGN VISARGA +11003..11037 ; Grapheme_Base # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11047..1104D ; Grapheme_Base # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065 ; Grapheme_Base # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F ; Grapheme_Base # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11071..11072 ; Grapheme_Base # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11075 ; Grapheme_Base # Lo BRAHMI LETTER OLD TAMIL LLA +11082 ; Grapheme_Base # Mc KAITHI SIGN VISARGA +11083..110AF ; Grapheme_Base # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Grapheme_Base # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B7..110B8 ; Grapheme_Base # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110BB..110BC ; Grapheme_Base # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BE..110C1 ; Grapheme_Base # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110D0..110E8 ; Grapheme_Base # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Grapheme_Base # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE +11103..11126 ; Grapheme_Base # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +1112C ; Grapheme_Base # Mc CHAKMA VOWEL SIGN E +11136..1113F ; Grapheme_Base # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Grapheme_Base # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; Grapheme_Base # Lo CHAKMA LETTER LHAA +11145..11146 ; Grapheme_Base # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; Grapheme_Base # Lo CHAKMA LETTER VAA +11150..11172 ; Grapheme_Base # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11174..11175 ; Grapheme_Base # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; Grapheme_Base # Lo MAHAJANI LIGATURE SHRI +11182 ; Grapheme_Base # Mc SHARADA SIGN VISARGA +11183..111B2 ; Grapheme_Base # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Grapheme_Base # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111BF..111C0 ; Grapheme_Base # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Grapheme_Base # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Grapheme_Base # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111CD ; Grapheme_Base # Po SHARADA SUTRA MARK +111CE ; Grapheme_Base # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111D0..111D9 ; Grapheme_Base # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; Grapheme_Base # Lo SHARADA EKAM +111DB ; Grapheme_Base # Po SHARADA SIGN SIDDHAM +111DC ; Grapheme_Base # Lo SHARADA HEADSTROKE +111DD..111DF ; Grapheme_Base # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 +111E1..111F4 ; Grapheme_Base # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND +11200..11211 ; Grapheme_Base # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Grapheme_Base # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; Grapheme_Base # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +11232..11233 ; Grapheme_Base # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11235 ; Grapheme_Base # Mc KHOJKI SIGN VIRAMA +11238..1123D ; Grapheme_Base # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +11280..11286 ; Grapheme_Base # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Grapheme_Base # Lo MULTANI LETTER GHA +1128A..1128D ; Grapheme_Base # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Grapheme_Base # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Grapheme_Base # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; Grapheme_Base # Po MULTANI SECTION MARK +112B0..112DE ; Grapheme_Base # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112E0..112E2 ; Grapheme_Base # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112F0..112F9 ; Grapheme_Base # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE +11302..11303 ; Grapheme_Base # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; Grapheme_Base # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Grapheme_Base # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Grapheme_Base # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Grapheme_Base # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Grapheme_Base # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Grapheme_Base # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133D ; Grapheme_Base # Lo GRANTHA SIGN AVAGRAHA +1133F ; Grapheme_Base # Mc GRANTHA VOWEL SIGN I +11341..11344 ; Grapheme_Base # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; Grapheme_Base # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; Grapheme_Base # Lo GRANTHA OM +1135D..11361 ; Grapheme_Base # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; Grapheme_Base # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11400..11434 ; Grapheme_Base # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; Grapheme_Base # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11440..11441 ; Grapheme_Base # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11445 ; Grapheme_Base # Mc NEWA SIGN VISARGA +11447..1144A ; Grapheme_Base # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; Grapheme_Base # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; Grapheme_Base # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; Grapheme_Base # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; Grapheme_Base # Po NEWA INSERTION SIGN +1145F..11461 ; Grapheme_Base # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA +11480..114AF ; Grapheme_Base # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B1..114B2 ; Grapheme_Base # Mc [2] TIRHUTA VOWEL SIGN I..TIRHUTA VOWEL SIGN II +114B9 ; Grapheme_Base # Mc TIRHUTA VOWEL SIGN E +114BB..114BC ; Grapheme_Base # Mc [2] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN O +114BE ; Grapheme_Base # Mc TIRHUTA VOWEL SIGN AU +114C1 ; Grapheme_Base # Mc TIRHUTA SIGN VISARGA +114C4..114C5 ; Grapheme_Base # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; Grapheme_Base # Po TIRHUTA ABBREVIATION SIGN +114C7 ; Grapheme_Base # Lo TIRHUTA OM +114D0..114D9 ; Grapheme_Base # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE +11580..115AE ; Grapheme_Base # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115B0..115B1 ; Grapheme_Base # Mc [2] SIDDHAM VOWEL SIGN I..SIDDHAM VOWEL SIGN II +115B8..115BB ; Grapheme_Base # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BE ; Grapheme_Base # Mc SIDDHAM SIGN VISARGA +115C1..115D7 ; Grapheme_Base # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; Grapheme_Base # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +11600..1162F ; Grapheme_Base # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; Grapheme_Base # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +1163B..1163C ; Grapheme_Base # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163E ; Grapheme_Base # Mc MODI SIGN VISARGA +11641..11643 ; Grapheme_Base # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; Grapheme_Base # Lo MODI SIGN HUVA +11650..11659 ; Grapheme_Base # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE +11660..1166C ; Grapheme_Base # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT +11680..116AA ; Grapheme_Base # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AC ; Grapheme_Base # Mc TAKRI SIGN VISARGA +116AE..116AF ; Grapheme_Base # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B6 ; Grapheme_Base # Mc TAKRI SIGN VIRAMA +116B8 ; Grapheme_Base # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; Grapheme_Base # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; Grapheme_Base # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE +11700..1171A ; Grapheme_Base # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +11720..11721 ; Grapheme_Base # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11726 ; Grapheme_Base # Mc AHOM VOWEL SIGN E +11730..11739 ; Grapheme_Base # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; Grapheme_Base # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; Grapheme_Base # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; Grapheme_Base # So AHOM SYMBOL VI +11740..11746 ; Grapheme_Base # Lo [7] AHOM LETTER CA..AHOM LETTER LLA +11800..1182B ; Grapheme_Base # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; Grapheme_Base # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +11838 ; Grapheme_Base # Mc DOGRA SIGN VISARGA +1183B ; Grapheme_Base # Po DOGRA ABBREVIATION SIGN +118A0..118DF ; Grapheme_Base # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; Grapheme_Base # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; Grapheme_Base # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF..11906 ; Grapheme_Base # Lo [8] WARANG CITI OM..DIVES AKURU LETTER E +11909 ; Grapheme_Base # Lo DIVES AKURU LETTER O +1190C..11913 ; Grapheme_Base # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Grapheme_Base # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Grapheme_Base # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11931..11935 ; Grapheme_Base # Mc [5] DIVES AKURU VOWEL SIGN I..DIVES AKURU VOWEL SIGN E +11937..11938 ; Grapheme_Base # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193D ; Grapheme_Base # Mc DIVES AKURU SIGN HALANTA +1193F ; Grapheme_Base # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; Grapheme_Base # Mc DIVES AKURU MEDIAL YA +11941 ; Grapheme_Base # Lo DIVES AKURU INITIAL RA +11942 ; Grapheme_Base # Mc DIVES AKURU MEDIAL RA +11944..11946 ; Grapheme_Base # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; Grapheme_Base # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE +119A0..119A7 ; Grapheme_Base # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Grapheme_Base # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; Grapheme_Base # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119DC..119DF ; Grapheme_Base # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E1 ; Grapheme_Base # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; Grapheme_Base # Po NANDINAGARI SIGN SIDDHAM +119E3 ; Grapheme_Base # Lo NANDINAGARI HEADSTROKE +119E4 ; Grapheme_Base # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A00 ; Grapheme_Base # Lo ZANABAZAR SQUARE LETTER A +11A0B..11A32 ; Grapheme_Base # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A39 ; Grapheme_Base # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; Grapheme_Base # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3F..11A46 ; Grapheme_Base # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A50 ; Grapheme_Base # Lo SOYOMBO LETTER A +11A57..11A58 ; Grapheme_Base # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A5C..11A89 ; Grapheme_Base # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A97 ; Grapheme_Base # Mc SOYOMBO SIGN VISARGA +11A9A..11A9C ; Grapheme_Base # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; Grapheme_Base # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; Grapheme_Base # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 +11AB0..11AF8 ; Grapheme_Base # Lo [73] CANADIAN SYLLABICS NATTILIK HI..PAU CIN HAU GLOTTAL STOP FINAL +11C00..11C08 ; Grapheme_Base # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Grapheme_Base # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; Grapheme_Base # Mc BHAIKSUKI VOWEL SIGN AA +11C3E ; Grapheme_Base # Mc BHAIKSUKI SIGN VISARGA +11C40 ; Grapheme_Base # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; Grapheme_Base # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; Grapheme_Base # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; Grapheme_Base # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK +11C70..11C71 ; Grapheme_Base # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; Grapheme_Base # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11CA9 ; Grapheme_Base # Mc MARCHEN SUBJOINED LETTER YA +11CB1 ; Grapheme_Base # Mc MARCHEN VOWEL SIGN I +11CB4 ; Grapheme_Base # Mc MARCHEN VOWEL SIGN O +11D00..11D06 ; Grapheme_Base # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Grapheme_Base # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Grapheme_Base # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D46 ; Grapheme_Base # Lo MASARAM GONDI REPHA +11D50..11D59 ; Grapheme_Base # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE +11D60..11D65 ; Grapheme_Base # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Grapheme_Base # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Grapheme_Base # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; Grapheme_Base # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D93..11D94 ; Grapheme_Base # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D96 ; Grapheme_Base # Mc GUNJALA GONDI SIGN VISARGA +11D98 ; Grapheme_Base # Lo GUNJALA GONDI OM +11DA0..11DA9 ; Grapheme_Base # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE +11EE0..11EF2 ; Grapheme_Base # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF5..11EF6 ; Grapheme_Base # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; Grapheme_Base # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +11FB0 ; Grapheme_Base # Lo LISU LETTER YHA +11FC0..11FD4 ; Grapheme_Base # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC ; Grapheme_Base # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0 ; Grapheme_Base # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1 ; Grapheme_Base # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF ; Grapheme_Base # Po TAMIL PUNCTUATION END OF TEXT +12000..12399 ; Grapheme_Base # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; Grapheme_Base # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; Grapheme_Base # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; Grapheme_Base # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU +12F90..12FF0 ; Grapheme_Base # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; Grapheme_Base # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 +13000..1342E ; Grapheme_Base # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +14400..14646 ; Grapheme_Base # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 +16800..16A38 ; Grapheme_Base # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ +16A40..16A5E ; Grapheme_Base # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; Grapheme_Base # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; Grapheme_Base # Po [2] MRO DANDA..MRO DOUBLE DANDA +16A70..16ABE ; Grapheme_Base # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; Grapheme_Base # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE +16AD0..16AED ; Grapheme_Base # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF5 ; Grapheme_Base # Po BASSA VAH FULL STOP +16B00..16B2F ; Grapheme_Base # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B37..16B3B ; Grapheme_Base # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; Grapheme_Base # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; Grapheme_Base # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; Grapheme_Base # Po PAHAWH HMONG SIGN XAUS +16B45 ; Grapheme_Base # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; Grapheme_Base # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; Grapheme_Base # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; Grapheme_Base # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Grapheme_Base # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ +16E40..16E7F ; Grapheme_Base # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; Grapheme_Base # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; Grapheme_Base # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH +16F00..16F4A ; Grapheme_Base # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F50 ; Grapheme_Base # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; Grapheme_Base # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F93..16F9F ; Grapheme_Base # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FE0..16FE1 ; Grapheme_Base # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE2 ; Grapheme_Base # Po OLD CHINESE HOOK MARK +16FE3 ; Grapheme_Base # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; Grapheme_Base # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +17000..187F7 ; Grapheme_Base # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Grapheme_Base # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; Grapheme_Base # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1AFF0..1AFF3 ; Grapheme_Base # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Grapheme_Base # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Grapheme_Base # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000..1B122 ; Grapheme_Base # Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU +1B150..1B152 ; Grapheme_Base # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1B164..1B167 ; Grapheme_Base # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N +1B170..1B2FB ; Grapheme_Base # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +1BC00..1BC6A ; Grapheme_Base # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Grapheme_Base # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Grapheme_Base # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Grapheme_Base # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; Grapheme_Base # So DUPLOYAN SIGN O WITH CROSS +1BC9F ; Grapheme_Base # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1CF50..1CFC3 ; Grapheme_Base # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; Grapheme_Base # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; Grapheme_Base # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; Grapheme_Base # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D166 ; Grapheme_Base # Mc MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; Grapheme_Base # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D ; Grapheme_Base # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT +1D183..1D184 ; Grapheme_Base # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; Grapheme_Base # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1EA ; Grapheme_Base # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D200..1D241 ; Grapheme_Base # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D245 ; Grapheme_Base # So GREEK MUSICAL LEIMMA +1D2E0..1D2F3 ; Grapheme_Base # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356 ; Grapheme_Base # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378 ; Grapheme_Base # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; Grapheme_Base # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Grapheme_Base # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Grapheme_Base # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Grapheme_Base # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Grapheme_Base # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Grapheme_Base # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Grapheme_Base # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Grapheme_Base # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Grapheme_Base # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Grapheme_Base # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Grapheme_Base # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Grapheme_Base # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Grapheme_Base # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Grapheme_Base # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Grapheme_Base # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Grapheme_Base # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Grapheme_Base # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Grapheme_Base # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Grapheme_Base # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Grapheme_Base # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Grapheme_Base # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Grapheme_Base # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Grapheme_Base # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Grapheme_Base # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Grapheme_Base # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Grapheme_Base # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Grapheme_Base # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Grapheme_Base # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Grapheme_Base # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Grapheme_Base # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Grapheme_Base # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Grapheme_Base # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Grapheme_Base # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Grapheme_Base # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Grapheme_Base # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Grapheme_Base # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Grapheme_Base # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Grapheme_Base # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1D800..1D9FF ; Grapheme_Base # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA37..1DA3A ; Grapheme_Base # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA6D..1DA74 ; Grapheme_Base # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA76..1DA83 ; Grapheme_Base # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA85..1DA86 ; Grapheme_Base # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; Grapheme_Base # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DF00..1DF09 ; Grapheme_Base # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; Grapheme_Base # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; Grapheme_Base # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL +1E100..1E12C ; Grapheme_Base # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E137..1E13D ; Grapheme_Base # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; Grapheme_Base # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; Grapheme_Base # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Grapheme_Base # So NYIAKENG PUACHUE HMONG CIRCLED CA +1E290..1E2AD ; Grapheme_Base # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2C0..1E2EB ; Grapheme_Base # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2F0..1E2F9 ; Grapheme_Base # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF ; Grapheme_Base # Sc WANCHO NGUN SIGN +1E7E0..1E7E6 ; Grapheme_Base # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Grapheme_Base # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Grapheme_Base # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Grapheme_Base # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE +1E800..1E8C4 ; Grapheme_Base # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; Grapheme_Base # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E900..1E943 ; Grapheme_Base # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E94B ; Grapheme_Base # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; Grapheme_Base # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; Grapheme_Base # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK +1EC71..1ECAB ; Grapheme_Base # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; Grapheme_Base # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; Grapheme_Base # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; Grapheme_Base # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; Grapheme_Base # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; Grapheme_Base # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; Grapheme_Base # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; Grapheme_Base # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1EE00..1EE03 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Grapheme_Base # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Grapheme_Base # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Grapheme_Base # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Grapheme_Base # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Grapheme_Base # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Grapheme_Base # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Grapheme_Base # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Grapheme_Base # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Grapheme_Base # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Grapheme_Base # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Grapheme_Base # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Grapheme_Base # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL +1F000..1F02B ; Grapheme_Base # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; Grapheme_Base # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; Grapheme_Base # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; Grapheme_Base # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; Grapheme_Base # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; Grapheme_Base # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10C ; Grapheme_Base # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F1AD ; Grapheme_Base # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL +1F1E6..1F202 ; Grapheme_Base # So [29] REGIONAL INDICATOR SYMBOL LETTER A..SQUARED KATAKANA SA +1F210..1F23B ; Grapheme_Base # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; Grapheme_Base # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; Grapheme_Base # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; Grapheme_Base # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; Grapheme_Base # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; Grapheme_Base # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; Grapheme_Base # So [728] RAT..ELEVATOR +1F6DD..1F6EC ; Grapheme_Base # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6F0..1F6FC ; Grapheme_Base # So [13] SATELLITE..ROLLER SKATE +1F700..1F773 ; Grapheme_Base # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F780..1F7D8 ; Grapheme_Base # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F7E0..1F7EB ; Grapheme_Base # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; Grapheme_Base # So HEAVY EQUALS SIGN +1F800..1F80B ; Grapheme_Base # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; Grapheme_Base # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; Grapheme_Base # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; Grapheme_Base # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; Grapheme_Base # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; Grapheme_Base # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1FA53 ; Grapheme_Base # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; Grapheme_Base # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA74 ; Grapheme_Base # So [5] BALLET SHOES..THONG SANDAL +1FA78..1FA7C ; Grapheme_Base # So [5] DROP OF BLOOD..CRUTCH +1FA80..1FA86 ; Grapheme_Base # So [7] YO-YO..NESTING DOLLS +1FA90..1FAAC ; Grapheme_Base # So [29] RINGED PLANET..HAMSA +1FAB0..1FABA ; Grapheme_Base # So [11] FLY..NEST WITH EGGS +1FAC0..1FAC5 ; Grapheme_Base # So [6] ANATOMICAL HEART..PERSON WITH CROWN +1FAD0..1FAD9 ; Grapheme_Base # So [10] BLUEBERRIES..JAR +1FAE0..1FAE7 ; Grapheme_Base # So [8] MELTING FACE..BUBBLES +1FAF0..1FAF6 ; Grapheme_Base # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FB00..1FB92 ; Grapheme_Base # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; Grapheme_Base # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBF0..1FBF9 ; Grapheme_Base # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +20000..2A6DF ; Grapheme_Base # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Grapheme_Base # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; Grapheme_Base # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Grapheme_Base # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Grapheme_Base # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Grapheme_Base # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 142539 + +# ================================================ + +# Derived Property: Grapheme_Link (deprecated) +# Generated from: Canonical_Combining_Class=Virama +# Use Canonical_Combining_Class=Virama directly instead + +094D ; Grapheme_Link # Mn DEVANAGARI SIGN VIRAMA +09CD ; Grapheme_Link # Mn BENGALI SIGN VIRAMA +0A4D ; Grapheme_Link # Mn GURMUKHI SIGN VIRAMA +0ACD ; Grapheme_Link # Mn GUJARATI SIGN VIRAMA +0B4D ; Grapheme_Link # Mn ORIYA SIGN VIRAMA +0BCD ; Grapheme_Link # Mn TAMIL SIGN VIRAMA +0C4D ; Grapheme_Link # Mn TELUGU SIGN VIRAMA +0CCD ; Grapheme_Link # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Grapheme_Link # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Grapheme_Link # Mn MALAYALAM SIGN VIRAMA +0DCA ; Grapheme_Link # Mn SINHALA SIGN AL-LAKUNA +0E3A ; Grapheme_Link # Mn THAI CHARACTER PHINTHU +0EBA ; Grapheme_Link # Mn LAO SIGN PALI VIRAMA +0F84 ; Grapheme_Link # Mn TIBETAN MARK HALANTA +1039..103A ; Grapheme_Link # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1714 ; Grapheme_Link # Mn TAGALOG SIGN VIRAMA +1715 ; Grapheme_Link # Mc TAGALOG SIGN PAMUDPOD +1734 ; Grapheme_Link # Mc HANUNOO SIGN PAMUDPOD +17D2 ; Grapheme_Link # Mn KHMER SIGN COENG +1A60 ; Grapheme_Link # Mn TAI THAM SIGN SAKOT +1B44 ; Grapheme_Link # Mc BALINESE ADEG ADEG +1BAA ; Grapheme_Link # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Grapheme_Link # Mn SUNDANESE SIGN VIRAMA +1BF2..1BF3 ; Grapheme_Link # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +2D7F ; Grapheme_Link # Mn TIFINAGH CONSONANT JOINER +A806 ; Grapheme_Link # Mn SYLOTI NAGRI SIGN HASANTA +A82C ; Grapheme_Link # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA +A8C4 ; Grapheme_Link # Mn SAURASHTRA SIGN VIRAMA +A953 ; Grapheme_Link # Mc REJANG VIRAMA +A9C0 ; Grapheme_Link # Mc JAVANESE PANGKON +AAF6 ; Grapheme_Link # Mn MEETEI MAYEK VIRAMA +ABED ; Grapheme_Link # Mn MEETEI MAYEK APUN IYEK +10A3F ; Grapheme_Link # Mn KHAROSHTHI VIRAMA +11046 ; Grapheme_Link # Mn BRAHMI VIRAMA +11070 ; Grapheme_Link # Mn BRAHMI SIGN OLD TAMIL VIRAMA +1107F ; Grapheme_Link # Mn BRAHMI NUMBER JOINER +110B9 ; Grapheme_Link # Mn KAITHI SIGN VIRAMA +11133..11134 ; Grapheme_Link # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +111C0 ; Grapheme_Link # Mc SHARADA SIGN VIRAMA +11235 ; Grapheme_Link # Mc KHOJKI SIGN VIRAMA +112EA ; Grapheme_Link # Mn KHUDAWADI SIGN VIRAMA +1134D ; Grapheme_Link # Mc GRANTHA SIGN VIRAMA +11442 ; Grapheme_Link # Mn NEWA SIGN VIRAMA +114C2 ; Grapheme_Link # Mn TIRHUTA SIGN VIRAMA +115BF ; Grapheme_Link # Mn SIDDHAM SIGN VIRAMA +1163F ; Grapheme_Link # Mn MODI SIGN VIRAMA +116B6 ; Grapheme_Link # Mc TAKRI SIGN VIRAMA +1172B ; Grapheme_Link # Mn AHOM SIGN KILLER +11839 ; Grapheme_Link # Mn DOGRA SIGN VIRAMA +1193D ; Grapheme_Link # Mc DIVES AKURU SIGN HALANTA +1193E ; Grapheme_Link # Mn DIVES AKURU VIRAMA +119E0 ; Grapheme_Link # Mn NANDINAGARI SIGN VIRAMA +11A34 ; Grapheme_Link # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Grapheme_Link # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Grapheme_Link # Mn SOYOMBO SUBJOINER +11C3F ; Grapheme_Link # Mn BHAIKSUKI SIGN VIRAMA +11D44..11D45 ; Grapheme_Link # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; Grapheme_Link # Mn GUNJALA GONDI VIRAMA + +# Total code points: 63 + +# EOF diff --git a/pcre2/maint/Unicode.tables/PropList.txt b/pcre2/maint/Unicode.tables/PropList.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a5a9346828fa9134446c5d702f3f2ebc5e35e3b --- /dev/null +++ b/pcre2/maint/Unicode.tables/PropList.txt @@ -0,0 +1,1743 @@ +# PropList-14.0.0.txt +# Date: 2021-08-12, 23:13:05 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ + +# ================================================ + +0009..000D ; White_Space # Cc [5] .. +0020 ; White_Space # Zs SPACE +0085 ; White_Space # Cc +00A0 ; White_Space # Zs NO-BREAK SPACE +1680 ; White_Space # Zs OGHAM SPACE MARK +2000..200A ; White_Space # Zs [11] EN QUAD..HAIR SPACE +2028 ; White_Space # Zl LINE SEPARATOR +2029 ; White_Space # Zp PARAGRAPH SEPARATOR +202F ; White_Space # Zs NARROW NO-BREAK SPACE +205F ; White_Space # Zs MEDIUM MATHEMATICAL SPACE +3000 ; White_Space # Zs IDEOGRAPHIC SPACE + +# Total code points: 25 + +# ================================================ + +061C ; Bidi_Control # Cf ARABIC LETTER MARK +200E..200F ; Bidi_Control # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +202A..202E ; Bidi_Control # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +2066..2069 ; Bidi_Control # Cf [4] LEFT-TO-RIGHT ISOLATE..POP DIRECTIONAL ISOLATE + +# Total code points: 12 + +# ================================================ + +200C..200D ; Join_Control # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER + +# Total code points: 2 + +# ================================================ + +002D ; Dash # Pd HYPHEN-MINUS +058A ; Dash # Pd ARMENIAN HYPHEN +05BE ; Dash # Pd HEBREW PUNCTUATION MAQAF +1400 ; Dash # Pd CANADIAN SYLLABICS HYPHEN +1806 ; Dash # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2015 ; Dash # Pd [6] HYPHEN..HORIZONTAL BAR +2053 ; Dash # Po SWUNG DASH +207B ; Dash # Sm SUPERSCRIPT MINUS +208B ; Dash # Sm SUBSCRIPT MINUS +2212 ; Dash # Sm MINUS SIGN +2E17 ; Dash # Pd DOUBLE OBLIQUE HYPHEN +2E1A ; Dash # Pd HYPHEN WITH DIAERESIS +2E3A..2E3B ; Dash # Pd [2] TWO-EM DASH..THREE-EM DASH +2E40 ; Dash # Pd DOUBLE HYPHEN +2E5D ; Dash # Pd OBLIQUE HYPHEN +301C ; Dash # Pd WAVE DASH +3030 ; Dash # Pd WAVY DASH +30A0 ; Dash # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +FE31..FE32 ; Dash # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE58 ; Dash # Pd SMALL EM DASH +FE63 ; Dash # Pd SMALL HYPHEN-MINUS +FF0D ; Dash # Pd FULLWIDTH HYPHEN-MINUS +10EAD ; Dash # Pd YEZIDI HYPHENATION MARK + +# Total code points: 30 + +# ================================================ + +002D ; Hyphen # Pd HYPHEN-MINUS +00AD ; Hyphen # Cf SOFT HYPHEN +058A ; Hyphen # Pd ARMENIAN HYPHEN +1806 ; Hyphen # Pd MONGOLIAN TODO SOFT HYPHEN +2010..2011 ; Hyphen # Pd [2] HYPHEN..NON-BREAKING HYPHEN +2E17 ; Hyphen # Pd DOUBLE OBLIQUE HYPHEN +30FB ; Hyphen # Po KATAKANA MIDDLE DOT +FE63 ; Hyphen # Pd SMALL HYPHEN-MINUS +FF0D ; Hyphen # Pd FULLWIDTH HYPHEN-MINUS +FF65 ; Hyphen # Po HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 11 + +# ================================================ + +0022 ; Quotation_Mark # Po QUOTATION MARK +0027 ; Quotation_Mark # Po APOSTROPHE +00AB ; Quotation_Mark # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00BB ; Quotation_Mark # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +2018 ; Quotation_Mark # Pi LEFT SINGLE QUOTATION MARK +2019 ; Quotation_Mark # Pf RIGHT SINGLE QUOTATION MARK +201A ; Quotation_Mark # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Quotation_Mark # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Quotation_Mark # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Quotation_Mark # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Quotation_Mark # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2039 ; Quotation_Mark # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Quotation_Mark # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +2E42 ; Quotation_Mark # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +300C ; Quotation_Mark # Ps LEFT CORNER BRACKET +300D ; Quotation_Mark # Pe RIGHT CORNER BRACKET +300E ; Quotation_Mark # Ps LEFT WHITE CORNER BRACKET +300F ; Quotation_Mark # Pe RIGHT WHITE CORNER BRACKET +301D ; Quotation_Mark # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Quotation_Mark # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +FE41 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Quotation_Mark # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Quotation_Mark # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FF02 ; Quotation_Mark # Po FULLWIDTH QUOTATION MARK +FF07 ; Quotation_Mark # Po FULLWIDTH APOSTROPHE +FF62 ; Quotation_Mark # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET + +# Total code points: 30 + +# ================================================ + +0021 ; Terminal_Punctuation # Po EXCLAMATION MARK +002C ; Terminal_Punctuation # Po COMMA +002E ; Terminal_Punctuation # Po FULL STOP +003A..003B ; Terminal_Punctuation # Po [2] COLON..SEMICOLON +003F ; Terminal_Punctuation # Po QUESTION MARK +037E ; Terminal_Punctuation # Po GREEK QUESTION MARK +0387 ; Terminal_Punctuation # Po GREEK ANO TELEIA +0589 ; Terminal_Punctuation # Po ARMENIAN FULL STOP +05C3 ; Terminal_Punctuation # Po HEBREW PUNCTUATION SOF PASUQ +060C ; Terminal_Punctuation # Po ARABIC COMMA +061B ; Terminal_Punctuation # Po ARABIC SEMICOLON +061D..061F ; Terminal_Punctuation # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +06D4 ; Terminal_Punctuation # Po ARABIC FULL STOP +0700..070A ; Terminal_Punctuation # Po [11] SYRIAC END OF PARAGRAPH..SYRIAC CONTRACTION +070C ; Terminal_Punctuation # Po SYRIAC HARKLEAN METOBELUS +07F8..07F9 ; Terminal_Punctuation # Po [2] NKO COMMA..NKO EXCLAMATION MARK +0830..083E ; Terminal_Punctuation # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU +085E ; Terminal_Punctuation # Po MANDAIC PUNCTUATION +0964..0965 ; Terminal_Punctuation # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E5A..0E5B ; Terminal_Punctuation # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT +0F08 ; Terminal_Punctuation # Po TIBETAN MARK SBRUL SHAD +0F0D..0F12 ; Terminal_Punctuation # Po [6] TIBETAN MARK SHAD..TIBETAN MARK RGYA GRAM SHAD +104A..104B ; Terminal_Punctuation # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1361..1368 ; Terminal_Punctuation # Po [8] ETHIOPIC WORDSPACE..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Terminal_Punctuation # Po CANADIAN SYLLABICS FULL STOP +16EB..16ED ; Terminal_Punctuation # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Terminal_Punctuation # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +17D4..17D6 ; Terminal_Punctuation # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17DA ; Terminal_Punctuation # Po KHMER SIGN KOOMUUT +1802..1805 ; Terminal_Punctuation # Po [4] MONGOLIAN COMMA..MONGOLIAN FOUR DOTS +1808..1809 ; Terminal_Punctuation # Po [2] MONGOLIAN MANCHU COMMA..MONGOLIAN MANCHU FULL STOP +1944..1945 ; Terminal_Punctuation # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Terminal_Punctuation # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Terminal_Punctuation # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5D..1B5F ; Terminal_Punctuation # Po [3] BALINESE CARIK PAMUNGKAH..BALINESE CARIK PAREREN +1B7D..1B7E ; Terminal_Punctuation # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1C3B..1C3F ; Terminal_Punctuation # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C7E..1C7F ; Terminal_Punctuation # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK +2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP +2E41 ; Terminal_Punctuation # Po REVERSED COMMA +2E4C ; Terminal_Punctuation # Po MEDIEVAL COMMA +2E4E..2E4F ; Terminal_Punctuation # Po [2] PUNCTUS ELEVATUS MARK..CORNISH VERSE DIVIDER +2E53..2E54 ; Terminal_Punctuation # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK +3001..3002 ; Terminal_Punctuation # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +A4FE..A4FF ; Terminal_Punctuation # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +A60D..A60F ; Terminal_Punctuation # Po [3] VAI COMMA..VAI QUESTION MARK +A6F3..A6F7 ; Terminal_Punctuation # Po [5] BAMUM FULL STOP..BAMUM QUESTION MARK +A876..A877 ; Terminal_Punctuation # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Terminal_Punctuation # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Terminal_Punctuation # Po KAYAH LI SIGN SHYA +A9C7..A9C9 ; Terminal_Punctuation # Po [3] JAVANESE PADA PANGKAT..JAVANESE PADA LUNGSI +AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI +AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK +FF0C ; Terminal_Punctuation # Po FULLWIDTH COMMA +FF0E ; Terminal_Punctuation # Po FULLWIDTH FULL STOP +FF1A..FF1B ; Terminal_Punctuation # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1F ; Terminal_Punctuation # Po FULLWIDTH QUESTION MARK +FF61 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA +1039F ; Terminal_Punctuation # Po UGARITIC WORD DIVIDER +103D0 ; Terminal_Punctuation # Po OLD PERSIAN WORD DIVIDER +10857 ; Terminal_Punctuation # Po IMPERIAL ARAMAIC SECTION SIGN +1091F ; Terminal_Punctuation # Po PHOENICIAN WORD SEPARATOR +10A56..10A57 ; Terminal_Punctuation # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10AF0..10AF5 ; Terminal_Punctuation # Po [6] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION TWO DOTS +10B3A..10B3F ; Terminal_Punctuation # Po [6] TINY TWO DOTS OVER ONE DOT PUNCTUATION..LARGE ONE RING OVER TWO RINGS PUNCTUATION +10B99..10B9C ; Terminal_Punctuation # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10F55..10F59 ; Terminal_Punctuation # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Terminal_Punctuation # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +11047..1104D ; Terminal_Punctuation # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +110BE..110C1 ; Terminal_Punctuation # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Terminal_Punctuation # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Terminal_Punctuation # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Terminal_Punctuation # Po SHARADA SUTRA MARK +111DE..111DF ; Terminal_Punctuation # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..1123C ; Terminal_Punctuation # Po [5] KHOJKI DANDA..KHOJKI DOUBLE SECTION MARK +112A9 ; Terminal_Punctuation # Po MULTANI SECTION MARK +1144B..1144D ; Terminal_Punctuation # Po [3] NEWA DANDA..NEWA COMMA +1145A..1145B ; Terminal_Punctuation # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +115C2..115C5 ; Terminal_Punctuation # Po [4] SIDDHAM DANDA..SIDDHAM SEPARATOR BAR +115C9..115D7 ; Terminal_Punctuation # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Terminal_Punctuation # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Terminal_Punctuation # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11944 ; Terminal_Punctuation # Po DIVES AKURU DOUBLE DANDA +11946 ; Terminal_Punctuation # Po DIVES AKURU END OF TEXT MARK +11A42..11A43 ; Terminal_Punctuation # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Terminal_Punctuation # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11AA1..11AA2 ; Terminal_Punctuation # Po [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 +11C41..11C43 ; Terminal_Punctuation # Po [3] BHAIKSUKI DANDA..BHAIKSUKI WORD SEPARATOR +11C71 ; Terminal_Punctuation # Po MARCHEN MARK SHAD +11EF7..11EF8 ; Terminal_Punctuation # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +12470..12474 ; Terminal_Punctuation # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +16A6E..16A6F ; Terminal_Punctuation # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Terminal_Punctuation # Po BASSA VAH FULL STOP +16B37..16B39 ; Terminal_Punctuation # Po [3] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN CIM CHEEM +16B44 ; Terminal_Punctuation # Po PAHAWH HMONG SIGN XAUS +16E97..16E98 ; Terminal_Punctuation # Po [2] MEDEFAIDRIN COMMA..MEDEFAIDRIN FULL STOP +1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON + +# Total code points: 276 + +# ================================================ + +005E ; Other_Math # Sk CIRCUMFLEX ACCENT +03D0..03D2 ; Other_Math # L& [3] GREEK BETA SYMBOL..GREEK UPSILON WITH HOOK SYMBOL +03D5 ; Other_Math # L& GREEK PHI SYMBOL +03F0..03F1 ; Other_Math # L& [2] GREEK KAPPA SYMBOL..GREEK RHO SYMBOL +03F4..03F5 ; Other_Math # L& [2] GREEK CAPITAL THETA SYMBOL..GREEK LUNATE EPSILON SYMBOL +2016 ; Other_Math # Po DOUBLE VERTICAL LINE +2032..2034 ; Other_Math # Po [3] PRIME..TRIPLE PRIME +2040 ; Other_Math # Pc CHARACTER TIE +2061..2064 ; Other_Math # Cf [4] FUNCTION APPLICATION..INVISIBLE PLUS +207D ; Other_Math # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Other_Math # Pe SUPERSCRIPT RIGHT PARENTHESIS +208D ; Other_Math # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Other_Math # Pe SUBSCRIPT RIGHT PARENTHESIS +20D0..20DC ; Other_Math # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20E1 ; Other_Math # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E5..20E6 ; Other_Math # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING DOUBLE VERTICAL STROKE OVERLAY +20EB..20EF ; Other_Math # Mn [5] COMBINING LONG DOUBLE SOLIDUS OVERLAY..COMBINING RIGHT ARROW BELOW +2102 ; Other_Math # L& DOUBLE-STRUCK CAPITAL C +2107 ; Other_Math # L& EULER CONSTANT +210A..2113 ; Other_Math # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2115 ; Other_Math # L& DOUBLE-STRUCK CAPITAL N +2119..211D ; Other_Math # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +2124 ; Other_Math # L& DOUBLE-STRUCK CAPITAL Z +2128 ; Other_Math # L& BLACK-LETTER CAPITAL Z +2129 ; Other_Math # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Other_Math # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212F..2131 ; Other_Math # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Other_Math # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Other_Math # Lo [4] ALEF SYMBOL..DALET SYMBOL +213C..213F ; Other_Math # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2145..2149 ; Other_Math # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +2195..2199 ; Other_Math # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219C..219F ; Other_Math # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A1..21A2 ; Other_Math # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A4..21A5 ; Other_Math # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A7 ; Other_Math # So DOWNWARDS ARROW FROM BAR +21A9..21AD ; Other_Math # So [5] LEFTWARDS ARROW WITH HOOK..LEFT RIGHT WAVE ARROW +21B0..21B1 ; Other_Math # So [2] UPWARDS ARROW WITH TIP LEFTWARDS..UPWARDS ARROW WITH TIP RIGHTWARDS +21B6..21B7 ; Other_Math # So [2] ANTICLOCKWISE TOP SEMICIRCLE ARROW..CLOCKWISE TOP SEMICIRCLE ARROW +21BC..21CD ; Other_Math # So [18] LEFTWARDS HARPOON WITH BARB UPWARDS..LEFTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Other_Math # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D3 ; Other_Math # So DOWNWARDS DOUBLE ARROW +21D5..21DB ; Other_Math # So [7] UP DOWN DOUBLE ARROW..RIGHTWARDS TRIPLE ARROW +21DD ; Other_Math # So RIGHTWARDS SQUIGGLE ARROW +21E4..21E5 ; Other_Math # So [2] LEFTWARDS ARROW TO BAR..RIGHTWARDS ARROW TO BAR +2308 ; Other_Math # Ps LEFT CEILING +2309 ; Other_Math # Pe RIGHT CEILING +230A ; Other_Math # Ps LEFT FLOOR +230B ; Other_Math # Pe RIGHT FLOOR +23B4..23B5 ; Other_Math # So [2] TOP SQUARE BRACKET..BOTTOM SQUARE BRACKET +23B7 ; Other_Math # So RADICAL SYMBOL BOTTOM +23D0 ; Other_Math # So VERTICAL LINE EXTENSION +23E2 ; Other_Math # So WHITE TRAPEZIUM +25A0..25A1 ; Other_Math # So [2] BLACK SQUARE..WHITE SQUARE +25AE..25B6 ; Other_Math # So [9] BLACK VERTICAL RECTANGLE..BLACK RIGHT-POINTING TRIANGLE +25BC..25C0 ; Other_Math # So [5] BLACK DOWN-POINTING TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C6..25C7 ; Other_Math # So [2] BLACK DIAMOND..WHITE DIAMOND +25CA..25CB ; Other_Math # So [2] LOZENGE..WHITE CIRCLE +25CF..25D3 ; Other_Math # So [5] BLACK CIRCLE..CIRCLE WITH UPPER HALF BLACK +25E2 ; Other_Math # So BLACK LOWER RIGHT TRIANGLE +25E4 ; Other_Math # So BLACK UPPER LEFT TRIANGLE +25E7..25EC ; Other_Math # So [6] SQUARE WITH LEFT HALF BLACK..WHITE UP-POINTING TRIANGLE WITH DOT +2605..2606 ; Other_Math # So [2] BLACK STAR..WHITE STAR +2640 ; Other_Math # So FEMALE SIGN +2642 ; Other_Math # So MALE SIGN +2660..2663 ; Other_Math # So [4] BLACK SPADE SUIT..BLACK CLUB SUIT +266D..266E ; Other_Math # So [2] MUSIC FLAT SIGN..MUSIC NATURAL SIGN +27C5 ; Other_Math # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Other_Math # Pe RIGHT S-SHAPED BAG DELIMITER +27E6 ; Other_Math # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Other_Math # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Other_Math # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Other_Math # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Other_Math # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Other_Math # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Other_Math # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Other_Math # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Other_Math # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Other_Math # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +2983 ; Other_Math # Ps LEFT WHITE CURLY BRACKET +2984 ; Other_Math # Pe RIGHT WHITE CURLY BRACKET +2985 ; Other_Math # Ps LEFT WHITE PARENTHESIS +2986 ; Other_Math # Pe RIGHT WHITE PARENTHESIS +2987 ; Other_Math # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Other_Math # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Other_Math # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Other_Math # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Other_Math # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Other_Math # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Other_Math # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Other_Math # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Other_Math # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Other_Math # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Other_Math # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Other_Math # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Other_Math # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Other_Math # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Other_Math # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Other_Math # Pe RIGHT BLACK TORTOISE SHELL BRACKET +29D8 ; Other_Math # Ps LEFT WIGGLY FENCE +29D9 ; Other_Math # Pe RIGHT WIGGLY FENCE +29DA ; Other_Math # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Other_Math # Pe RIGHT DOUBLE WIGGLY FENCE +29FC ; Other_Math # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Other_Math # Pe RIGHT-POINTING CURVED ANGLE BRACKET +FE61 ; Other_Math # Po SMALL ASTERISK +FE63 ; Other_Math # Pd SMALL HYPHEN-MINUS +FE68 ; Other_Math # Po SMALL REVERSE SOLIDUS +FF3C ; Other_Math # Po FULLWIDTH REVERSE SOLIDUS +FF3E ; Other_Math # Sk FULLWIDTH CIRCUMFLEX ACCENT +1D400..1D454 ; Other_Math # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Other_Math # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Other_Math # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Other_Math # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Other_Math # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Other_Math # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Other_Math # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Other_Math # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Other_Math # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Other_Math # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Other_Math # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Other_Math # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Other_Math # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Other_Math # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Other_Math # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Other_Math # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Other_Math # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Other_Math # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Other_Math # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C2..1D6DA ; Other_Math # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DC..1D6FA ; Other_Math # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FC..1D714 ; Other_Math # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D716..1D734 ; Other_Math # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D736..1D74E ; Other_Math # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D750..1D76E ; Other_Math # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D770..1D788 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D78A..1D7A8 ; Other_Math # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7AA..1D7C2 ; Other_Math # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C4..1D7CB ; Other_Math # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Other_Math # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EE00..1EE03 ; Other_Math # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Other_Math # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Other_Math # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Other_Math # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Other_Math # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Other_Math # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Other_Math # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Other_Math # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Other_Math # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Other_Math # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Other_Math # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Other_Math # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Other_Math # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Other_Math # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Other_Math # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Other_Math # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Other_Math # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Other_Math # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Other_Math # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN + +# Total code points: 1362 + +# ================================================ + +0030..0039 ; Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F +FF10..FF19 ; Hex_Digit # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF21..FF26 ; Hex_Digit # L& [6] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER F +FF41..FF46 ; Hex_Digit # L& [6] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER F + +# Total code points: 44 + +# ================================================ + +0030..0039 ; ASCII_Hex_Digit # Nd [10] DIGIT ZERO..DIGIT NINE +0041..0046 ; ASCII_Hex_Digit # L& [6] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER F +0061..0066 ; ASCII_Hex_Digit # L& [6] LATIN SMALL LETTER A..LATIN SMALL LETTER F + +# Total code points: 22 + +# ================================================ + +0345 ; Other_Alphabetic # Mn COMBINING GREEK YPOGEGRAMMENI +05B0..05BD ; Other_Alphabetic # Mn [14] HEBREW POINT SHEVA..HEBREW POINT METEG +05BF ; Other_Alphabetic # Mn HEBREW POINT RAFE +05C1..05C2 ; Other_Alphabetic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4..05C5 ; Other_Alphabetic # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C7 ; Other_Alphabetic # Mn HEBREW POINT QAMATS QATAN +0610..061A ; Other_Alphabetic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +064B..0657 ; Other_Alphabetic # Mn [13] ARABIC FATHATAN..ARABIC INVERTED DAMMA +0659..065F ; Other_Alphabetic # Mn [7] ARABIC ZWARAKAY..ARABIC WAVY HAMZA BELOW +0670 ; Other_Alphabetic # Mn ARABIC LETTER SUPERSCRIPT ALEF +06D6..06DC ; Other_Alphabetic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06E1..06E4 ; Other_Alphabetic # Mn [4] ARABIC SMALL HIGH DOTLESS HEAD OF KHAH..ARABIC SMALL HIGH MADDA +06E7..06E8 ; Other_Alphabetic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06ED ; Other_Alphabetic # Mn ARABIC SMALL LOW MEEM +0711 ; Other_Alphabetic # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0730..073F ; Other_Alphabetic # Mn [16] SYRIAC PTHAHA ABOVE..SYRIAC RWAHA +07A6..07B0 ; Other_Alphabetic # Mn [11] THAANA ABAFILI..THAANA SUKUN +0816..0817 ; Other_Alphabetic # Mn [2] SAMARITAN MARK IN..SAMARITAN MARK IN-ALAF +081B..0823 ; Other_Alphabetic # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0825..0827 ; Other_Alphabetic # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0829..082C ; Other_Alphabetic # Mn [4] SAMARITAN VOWEL SIGN LONG I..SAMARITAN VOWEL SIGN SUKUN +08D4..08DF ; Other_Alphabetic # Mn [12] ARABIC SMALL HIGH WORD AR-RUB..ARABIC SMALL HIGH WORD WAQFA +08E3..08E9 ; Other_Alphabetic # Mn [7] ARABIC TURNED DAMMA BELOW..ARABIC CURLY KASRATAN +08F0..0902 ; Other_Alphabetic # Mn [19] ARABIC OPEN FATHATAN..DEVANAGARI SIGN ANUSVARA +0903 ; Other_Alphabetic # Mc DEVANAGARI SIGN VISARGA +093A ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN OE +093B ; Other_Alphabetic # Mc DEVANAGARI VOWEL SIGN OOE +093E..0940 ; Other_Alphabetic # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Other_Alphabetic # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Other_Alphabetic # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094E..094F ; Other_Alphabetic # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0955..0957 ; Other_Alphabetic # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0962..0963 ; Other_Alphabetic # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0981 ; Other_Alphabetic # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Other_Alphabetic # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +09BE..09C0 ; Other_Alphabetic # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Other_Alphabetic # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Other_Alphabetic # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09D7 ; Other_Alphabetic # Mc BENGALI AU LENGTH MARK +09E2..09E3 ; Other_Alphabetic # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +0A01..0A02 ; Other_Alphabetic # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Other_Alphabetic # Mc GURMUKHI SIGN VISARGA +0A3E..0A40 ; Other_Alphabetic # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4C ; Other_Alphabetic # Mn [2] GURMUKHI VOWEL SIGN OO..GURMUKHI VOWEL SIGN AU +0A51 ; Other_Alphabetic # Mn GURMUKHI SIGN UDAAT +0A70..0A71 ; Other_Alphabetic # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A75 ; Other_Alphabetic # Mn GURMUKHI SIGN YAKASH +0A81..0A82 ; Other_Alphabetic # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Other_Alphabetic # Mc GUJARATI SIGN VISARGA +0ABE..0AC0 ; Other_Alphabetic # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Other_Alphabetic # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Other_Alphabetic # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Other_Alphabetic # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0AE2..0AE3 ; Other_Alphabetic # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AFA..0AFC ; Other_Alphabetic # Mn [3] GUJARATI SIGN SUKUN..GUJARATI SIGN MADDAH +0B01 ; Other_Alphabetic # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Other_Alphabetic # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B3E ; Other_Alphabetic # Mc ORIYA VOWEL SIGN AA +0B3F ; Other_Alphabetic # Mn ORIYA VOWEL SIGN I +0B40 ; Other_Alphabetic # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Other_Alphabetic # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Other_Alphabetic # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B56 ; Other_Alphabetic # Mn ORIYA AI LENGTH MARK +0B57 ; Other_Alphabetic # Mc ORIYA AU LENGTH MARK +0B62..0B63 ; Other_Alphabetic # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B82 ; Other_Alphabetic # Mn TAMIL SIGN ANUSVARA +0BBE..0BBF ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Other_Alphabetic # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Other_Alphabetic # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Other_Alphabetic # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BD7 ; Other_Alphabetic # Mc TAMIL AU LENGTH MARK +0C00 ; Other_Alphabetic # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Other_Alphabetic # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C3E..0C40 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Other_Alphabetic # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4C ; Other_Alphabetic # Mn [3] TELUGU VOWEL SIGN O..TELUGU VOWEL SIGN AU +0C55..0C56 ; Other_Alphabetic # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C62..0C63 ; Other_Alphabetic # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C81 ; Other_Alphabetic # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Other_Alphabetic # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0CBE ; Other_Alphabetic # Mc KANNADA VOWEL SIGN AA +0CBF ; Other_Alphabetic # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Other_Alphabetic # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Other_Alphabetic # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Other_Alphabetic # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC ; Other_Alphabetic # Mn KANNADA VOWEL SIGN AU +0CD5..0CD6 ; Other_Alphabetic # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CE2..0CE3 ; Other_Alphabetic # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0D00..0D01 ; Other_Alphabetic # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Other_Alphabetic # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D3E..0D40 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Other_Alphabetic # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Other_Alphabetic # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D57 ; Other_Alphabetic # Mc MALAYALAM AU LENGTH MARK +0D62..0D63 ; Other_Alphabetic # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D81 ; Other_Alphabetic # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Other_Alphabetic # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0DCF..0DD1 ; Other_Alphabetic # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Other_Alphabetic # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Other_Alphabetic # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Other_Alphabetic # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DF2..0DF3 ; Other_Alphabetic # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0E31 ; Other_Alphabetic # Mn THAI CHARACTER MAI HAN-AKAT +0E34..0E3A ; Other_Alphabetic # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E4D ; Other_Alphabetic # Mn THAI CHARACTER NIKHAHIT +0EB1 ; Other_Alphabetic # Mn LAO VOWEL SIGN MAI KAN +0EB4..0EB9 ; Other_Alphabetic # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU +0EBB..0EBC ; Other_Alphabetic # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO +0ECD ; Other_Alphabetic # Mn LAO NIGGAHITA +0F71..0F7E ; Other_Alphabetic # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Other_Alphabetic # Mc TIBETAN SIGN RNAM BCAD +0F80..0F81 ; Other_Alphabetic # Mn [2] TIBETAN VOWEL SIGN REVERSED I..TIBETAN VOWEL SIGN REVERSED II +0F8D..0F97 ; Other_Alphabetic # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Other_Alphabetic # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +102B..102C ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Other_Alphabetic # Mc MYANMAR VOWEL SIGN E +1032..1036 ; Other_Alphabetic # Mn [5] MYANMAR VOWEL SIGN AI..MYANMAR SIGN ANUSVARA +1038 ; Other_Alphabetic # Mc MYANMAR SIGN VISARGA +103B..103C ; Other_Alphabetic # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Other_Alphabetic # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +1056..1057 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105E..1060 ; Other_Alphabetic # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1062..1064 ; Other_Alphabetic # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1067..106D ; Other_Alphabetic # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1071..1074 ; Other_Alphabetic # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1082 ; Other_Alphabetic # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Other_Alphabetic # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Other_Alphabetic # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Other_Alphabetic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Other_Alphabetic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Other_Alphabetic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109C ; Other_Alphabetic # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Other_Alphabetic # Mn MYANMAR VOWEL SIGN AITON AI +1712..1713 ; Other_Alphabetic # Mn [2] TAGALOG VOWEL SIGN I..TAGALOG VOWEL SIGN U +1732..1733 ; Other_Alphabetic # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1752..1753 ; Other_Alphabetic # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U +1772..1773 ; Other_Alphabetic # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U +17B6 ; Other_Alphabetic # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Other_Alphabetic # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Other_Alphabetic # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Other_Alphabetic # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Other_Alphabetic # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +1885..1886 ; Other_Alphabetic # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +18A9 ; Other_Alphabetic # Mn MONGOLIAN LETTER ALI GALI DAGALGA +1920..1922 ; Other_Alphabetic # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Other_Alphabetic # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Other_Alphabetic # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Other_Alphabetic # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Other_Alphabetic # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Other_Alphabetic # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Other_Alphabetic # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1A17..1A18 ; Other_Alphabetic # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Other_Alphabetic # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Other_Alphabetic # Mn BUGINESE VOWEL SIGN AE +1A55 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Other_Alphabetic # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Other_Alphabetic # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Other_Alphabetic # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A61 ; Other_Alphabetic # Mc TAI THAM VOWEL SIGN A +1A62 ; Other_Alphabetic # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Other_Alphabetic # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Other_Alphabetic # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Other_Alphabetic # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A74 ; Other_Alphabetic # Mn [2] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN MAI KANG +1ABF..1AC0 ; Other_Alphabetic # Mn [2] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER TURNED W BELOW +1ACC..1ACE ; Other_Alphabetic # Mn [3] COMBINING LATIN SMALL LETTER INSULAR G..COMBINING LATIN SMALL LETTER INSULAR T +1B00..1B03 ; Other_Alphabetic # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Other_Alphabetic # Mc BALINESE SIGN BISAH +1B35 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Other_Alphabetic # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Other_Alphabetic # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Other_Alphabetic # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Other_Alphabetic # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Other_Alphabetic # Mn BALINESE VOWEL SIGN PEPET +1B43 ; Other_Alphabetic # Mc BALINESE VOWEL SIGN PEPET TEDUNG +1B80..1B81 ; Other_Alphabetic # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Other_Alphabetic # Mc SUNDANESE SIGN PANGWISAD +1BA1 ; Other_Alphabetic # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Other_Alphabetic # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Other_Alphabetic # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Other_Alphabetic # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAC..1BAD ; Other_Alphabetic # Mn [2] SUNDANESE CONSONANT SIGN PASANGAN MA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BE7 ; Other_Alphabetic # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Other_Alphabetic # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Other_Alphabetic # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Other_Alphabetic # Mn BATAK VOWEL SIGN KARO O +1BEE ; Other_Alphabetic # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Other_Alphabetic # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1C24..1C2B ; Other_Alphabetic # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Other_Alphabetic # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Other_Alphabetic # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36 ; Other_Alphabetic # Mn LEPCHA SIGN RAN +1DE7..1DF4 ; Other_Alphabetic # Mn [14] COMBINING LATIN SMALL LETTER ALPHA..COMBINING LATIN SMALL LETTER U WITH DIAERESIS +24B6..24E9 ; Other_Alphabetic # So [52] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN SMALL LETTER Z +2DE0..2DFF ; Other_Alphabetic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A674..A67B ; Other_Alphabetic # Mn [8] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC LETTER OMEGA +A69E..A69F ; Other_Alphabetic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +A802 ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN DVISVARA +A80B ; Other_Alphabetic # Mn SYLOTI NAGRI SIGN ANUSVARA +A823..A824 ; Other_Alphabetic # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Other_Alphabetic # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Other_Alphabetic # Mc SYLOTI NAGRI VOWEL SIGN OO +A880..A881 ; Other_Alphabetic # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A8B4..A8C3 ; Other_Alphabetic # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C5 ; Other_Alphabetic # Mn SAURASHTRA SIGN CANDRABINDU +A8FF ; Other_Alphabetic # Mn DEVANAGARI VOWEL SIGN AY +A926..A92A ; Other_Alphabetic # Mn [5] KAYAH LI VOWEL UE..KAYAH LI VOWEL O +A947..A951 ; Other_Alphabetic # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952 ; Other_Alphabetic # Mc REJANG CONSONANT SIGN H +A980..A982 ; Other_Alphabetic # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Other_Alphabetic # Mc JAVANESE SIGN WIGNYAN +A9B4..A9B5 ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Other_Alphabetic # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Other_Alphabetic # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Other_Alphabetic # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9BF ; Other_Alphabetic # Mc [2] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE CONSONANT SIGN CAKRA +A9E5 ; Other_Alphabetic # Mn MYANMAR SIGN SHAN SAW +AA29..AA2E ; Other_Alphabetic # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Other_Alphabetic # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Other_Alphabetic # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Other_Alphabetic # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Other_Alphabetic # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA43 ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL NG +AA4C ; Other_Alphabetic # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Other_Alphabetic # Mc CHAM CONSONANT SIGN FINAL H +AA7B ; Other_Alphabetic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Other_Alphabetic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Other_Alphabetic # Mc MYANMAR SIGN TAI LAING TONE-5 +AAB0 ; Other_Alphabetic # Mn TAI VIET MAI KANG +AAB2..AAB4 ; Other_Alphabetic # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB7..AAB8 ; Other_Alphabetic # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AABE ; Other_Alphabetic # Mn TAI VIET VOWEL AM +AAEB ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Other_Alphabetic # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF5 ; Other_Alphabetic # Mc MEETEI MAYEK VOWEL SIGN VISARGA +ABE3..ABE4 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Other_Alphabetic # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Other_Alphabetic # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +FB1E ; Other_Alphabetic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +10376..1037A ; Other_Alphabetic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII +10A01..10A03 ; Other_Alphabetic # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Other_Alphabetic # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Other_Alphabetic # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10D24..10D27 ; Other_Alphabetic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10EAB..10EAC ; Other_Alphabetic # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +11000 ; Other_Alphabetic # Mc BRAHMI SIGN CANDRABINDU +11001 ; Other_Alphabetic # Mn BRAHMI SIGN ANUSVARA +11002 ; Other_Alphabetic # Mc BRAHMI SIGN VISARGA +11038..11045 ; Other_Alphabetic # Mn [14] BRAHMI VOWEL SIGN AA..BRAHMI VOWEL SIGN AU +11073..11074 ; Other_Alphabetic # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11082 ; Other_Alphabetic # Mc KAITHI SIGN VISARGA +110B0..110B2 ; Other_Alphabetic # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Other_Alphabetic # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Other_Alphabetic # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110C2 ; Other_Alphabetic # Mn KAITHI VOWEL SIGN VOCALIC R +11100..11102 ; Other_Alphabetic # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11127..1112B ; Other_Alphabetic # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Other_Alphabetic # Mc CHAKMA VOWEL SIGN E +1112D..11132 ; Other_Alphabetic # Mn [6] CHAKMA VOWEL SIGN AI..CHAKMA AU MARK +11145..11146 ; Other_Alphabetic # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11180..11181 ; Other_Alphabetic # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Other_Alphabetic # Mc SHARADA SIGN VISARGA +111B3..111B5 ; Other_Alphabetic # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Other_Alphabetic # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF ; Other_Alphabetic # Mc SHARADA VOWEL SIGN AU +111CE ; Other_Alphabetic # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Other_Alphabetic # Mn SHARADA SIGN INVERTED CANDRABINDU +1122C..1122E ; Other_Alphabetic # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Other_Alphabetic # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Other_Alphabetic # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Other_Alphabetic # Mn KHOJKI SIGN ANUSVARA +11237 ; Other_Alphabetic # Mn KHOJKI SIGN SHADDA +1123E ; Other_Alphabetic # Mn KHOJKI SIGN SUKUN +112DF ; Other_Alphabetic # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Other_Alphabetic # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112E8 ; Other_Alphabetic # Mn [6] KHUDAWADI VOWEL SIGN U..KHUDAWADI VOWEL SIGN AU +11300..11301 ; Other_Alphabetic # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Other_Alphabetic # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +1133E..1133F ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Other_Alphabetic # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Other_Alphabetic # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134C ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN OO..GRANTHA VOWEL SIGN AU +11357 ; Other_Alphabetic # Mc GRANTHA AU LENGTH MARK +11362..11363 ; Other_Alphabetic # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11435..11437 ; Other_Alphabetic # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Other_Alphabetic # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Other_Alphabetic # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11443..11444 ; Other_Alphabetic # Mn [2] NEWA SIGN CANDRABINDU..NEWA SIGN ANUSVARA +11445 ; Other_Alphabetic # Mc NEWA SIGN VISARGA +114B0..114B2 ; Other_Alphabetic # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Other_Alphabetic # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Other_Alphabetic # Mc TIRHUTA VOWEL SIGN E +114BA ; Other_Alphabetic # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Other_Alphabetic # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Other_Alphabetic # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Other_Alphabetic # Mc TIRHUTA SIGN VISARGA +115AF..115B1 ; Other_Alphabetic # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Other_Alphabetic # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Other_Alphabetic # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Other_Alphabetic # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Other_Alphabetic # Mc SIDDHAM SIGN VISARGA +115DC..115DD ; Other_Alphabetic # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU +11630..11632 ; Other_Alphabetic # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Other_Alphabetic # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Other_Alphabetic # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Other_Alphabetic # Mn MODI SIGN ANUSVARA +1163E ; Other_Alphabetic # Mc MODI SIGN VISARGA +11640 ; Other_Alphabetic # Mn MODI SIGN ARDHACANDRA +116AB ; Other_Alphabetic # Mn TAKRI SIGN ANUSVARA +116AC ; Other_Alphabetic # Mc TAKRI SIGN VISARGA +116AD ; Other_Alphabetic # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Other_Alphabetic # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Other_Alphabetic # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +1171D..1171F ; Other_Alphabetic # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Other_Alphabetic # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Other_Alphabetic # Mc AHOM VOWEL SIGN E +11727..1172A ; Other_Alphabetic # Mn [4] AHOM VOWEL SIGN AW..AHOM VOWEL SIGN AM +1182C..1182E ; Other_Alphabetic # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Other_Alphabetic # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Other_Alphabetic # Mc DOGRA SIGN VISARGA +11930..11935 ; Other_Alphabetic # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Other_Alphabetic # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Other_Alphabetic # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +11940 ; Other_Alphabetic # Mc DIVES AKURU MEDIAL YA +11942 ; Other_Alphabetic # Mc DIVES AKURU MEDIAL RA +119D1..119D3 ; Other_Alphabetic # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Other_Alphabetic # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Other_Alphabetic # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Other_Alphabetic # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E4 ; Other_Alphabetic # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E +11A01..11A0A ; Other_Alphabetic # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A35..11A38 ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE SIGN CANDRABINDU..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Other_Alphabetic # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3B..11A3E ; Other_Alphabetic # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A51..11A56 ; Other_Alphabetic # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Other_Alphabetic # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Other_Alphabetic # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A8A..11A96 ; Other_Alphabetic # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Other_Alphabetic # Mc SOYOMBO SIGN VISARGA +11C2F ; Other_Alphabetic # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Other_Alphabetic # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Other_Alphabetic # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Other_Alphabetic # Mc BHAIKSUKI SIGN VISARGA +11C92..11CA7 ; Other_Alphabetic # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Other_Alphabetic # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Other_Alphabetic # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Other_Alphabetic # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Other_Alphabetic # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Other_Alphabetic # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU +11D31..11D36 ; Other_Alphabetic # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Other_Alphabetic # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Other_Alphabetic # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D41 ; Other_Alphabetic # Mn [3] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI SIGN VISARGA +11D43 ; Other_Alphabetic # Mn MASARAM GONDI SIGN CANDRA +11D47 ; Other_Alphabetic # Mn MASARAM GONDI RA-KARA +11D8A..11D8E ; Other_Alphabetic # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Other_Alphabetic # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Other_Alphabetic # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Other_Alphabetic # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Other_Alphabetic # Mc GUNJALA GONDI SIGN VISARGA +11EF3..11EF4 ; Other_Alphabetic # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Other_Alphabetic # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +16F4F ; Other_Alphabetic # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F51..16F87 ; Other_Alphabetic # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Other_Alphabetic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16FF0..16FF1 ; Other_Alphabetic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1BC9E ; Other_Alphabetic # Mn DUPLOYAN DOUBLE MARK +1E000..1E006 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Other_Alphabetic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Other_Alphabetic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Other_Alphabetic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Other_Alphabetic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA +1E947 ; Other_Alphabetic # Mn ADLAM HAMZA +1F130..1F149 ; Other_Alphabetic # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Alphabetic # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Alphabetic # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 1404 + +# ================================================ + +3006 ; Ideographic # Lo IDEOGRAPHIC CLOSING MARK +3007 ; Ideographic # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Ideographic # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Ideographic # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +3400..4DBF ; Ideographic # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..9FFF ; Ideographic # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +F900..FA6D ; Ideographic # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Ideographic # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +16FE4 ; Ideographic # Mn KHITAN SMALL SCRIPT FILLER +17000..187F7 ; Ideographic # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18CD5 ; Ideographic # Lo [1238] TANGUT COMPONENT-001..KHITAN SMALL SCRIPT CHARACTER-18CD5 +18D00..18D08 ; Ideographic # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 +1B170..1B2FB ; Ideographic # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB +20000..2A6DF ; Ideographic # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Ideographic # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; Ideographic # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Ideographic # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Ideographic # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Ideographic # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Ideographic # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 101661 + +# ================================================ + +005E ; Diacritic # Sk CIRCUMFLEX ACCENT +0060 ; Diacritic # Sk GRAVE ACCENT +00A8 ; Diacritic # Sk DIAERESIS +00AF ; Diacritic # Sk MACRON +00B4 ; Diacritic # Sk ACUTE ACCENT +00B7 ; Diacritic # Po MIDDLE DOT +00B8 ; Diacritic # Sk CEDILLA +02B0..02C1 ; Diacritic # Lm [18] MODIFIER LETTER SMALL H..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Diacritic # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Diacritic # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Diacritic # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E0..02E4 ; Diacritic # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +02E5..02EB ; Diacritic # Sk [7] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER YANG DEPARTING TONE MARK +02EC ; Diacritic # Lm MODIFIER LETTER VOICING +02ED ; Diacritic # Sk MODIFIER LETTER UNASPIRATED +02EE ; Diacritic # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Diacritic # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0300..034E ; Diacritic # Mn [79] COMBINING GRAVE ACCENT..COMBINING UPWARDS ARROW BELOW +0350..0357 ; Diacritic # Mn [8] COMBINING RIGHT ARROWHEAD ABOVE..COMBINING RIGHT HALF RING ABOVE +035D..0362 ; Diacritic # Mn [6] COMBINING DOUBLE BREVE..COMBINING DOUBLE RIGHTWARDS ARROW BELOW +0374 ; Diacritic # Lm GREEK NUMERAL SIGN +0375 ; Diacritic # Sk GREEK LOWER NUMERAL SIGN +037A ; Diacritic # Lm GREEK YPOGEGRAMMENI +0384..0385 ; Diacritic # Sk [2] GREEK TONOS..GREEK DIALYTIKA TONOS +0483..0487 ; Diacritic # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE +0559 ; Diacritic # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +0591..05A1 ; Diacritic # Mn [17] HEBREW ACCENT ETNAHTA..HEBREW ACCENT PAZER +05A3..05BD ; Diacritic # Mn [27] HEBREW ACCENT MUNAH..HEBREW POINT METEG +05BF ; Diacritic # Mn HEBREW POINT RAFE +05C1..05C2 ; Diacritic # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C4 ; Diacritic # Mn HEBREW MARK UPPER DOT +064B..0652 ; Diacritic # Mn [8] ARABIC FATHATAN..ARABIC SUKUN +0657..0658 ; Diacritic # Mn [2] ARABIC INVERTED DAMMA..ARABIC MARK NOON GHUNNA +06DF..06E0 ; Diacritic # Mn [2] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH UPRIGHT RECTANGULAR ZERO +06E5..06E6 ; Diacritic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06EA..06EC ; Diacritic # Mn [3] ARABIC EMPTY CENTRE LOW STOP..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE +0730..074A ; Diacritic # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +07A6..07B0 ; Diacritic # Mn [11] THAANA ABAFILI..THAANA SUKUN +07EB..07F3 ; Diacritic # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Diacritic # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +0818..0819 ; Diacritic # Mn [2] SAMARITAN MARK OCCLUSION..SAMARITAN MARK DAGESH +0898..089F ; Diacritic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08C9 ; Diacritic # Lm ARABIC SMALL FARSI YEH +08CA..08D2 ; Diacritic # Mn [9] ARABIC SMALL HIGH FARSI YEH..ARABIC LARGE ROUND DOT INSIDE CIRCLE BELOW +08E3..08FE ; Diacritic # Mn [28] ARABIC TURNED DAMMA BELOW..ARABIC DAMMA WITH DOT +093C ; Diacritic # Mn DEVANAGARI SIGN NUKTA +094D ; Diacritic # Mn DEVANAGARI SIGN VIRAMA +0951..0954 ; Diacritic # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +0971 ; Diacritic # Lm DEVANAGARI SIGN HIGH SPACING DOT +09BC ; Diacritic # Mn BENGALI SIGN NUKTA +09CD ; Diacritic # Mn BENGALI SIGN VIRAMA +0A3C ; Diacritic # Mn GURMUKHI SIGN NUKTA +0A4D ; Diacritic # Mn GURMUKHI SIGN VIRAMA +0ABC ; Diacritic # Mn GUJARATI SIGN NUKTA +0ACD ; Diacritic # Mn GUJARATI SIGN VIRAMA +0AFD..0AFF ; Diacritic # Mn [3] GUJARATI SIGN THREE-DOT NUKTA ABOVE..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE +0B3C ; Diacritic # Mn ORIYA SIGN NUKTA +0B4D ; Diacritic # Mn ORIYA SIGN VIRAMA +0B55 ; Diacritic # Mn ORIYA SIGN OVERLINE +0BCD ; Diacritic # Mn TAMIL SIGN VIRAMA +0C3C ; Diacritic # Mn TELUGU SIGN NUKTA +0C4D ; Diacritic # Mn TELUGU SIGN VIRAMA +0CBC ; Diacritic # Mn KANNADA SIGN NUKTA +0CCD ; Diacritic # Mn KANNADA SIGN VIRAMA +0D3B..0D3C ; Diacritic # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D4D ; Diacritic # Mn MALAYALAM SIGN VIRAMA +0DCA ; Diacritic # Mn SINHALA SIGN AL-LAKUNA +0E47..0E4C ; Diacritic # Mn [6] THAI CHARACTER MAITAIKHU..THAI CHARACTER THANTHAKHAT +0E4E ; Diacritic # Mn THAI CHARACTER YAMAKKAN +0EBA ; Diacritic # Mn LAO SIGN PALI VIRAMA +0EC8..0ECC ; Diacritic # Mn [5] LAO TONE MAI EK..LAO CANCELLATION MARK +0F18..0F19 ; Diacritic # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F35 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F37 ; Diacritic # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F39 ; Diacritic # Mn TIBETAN MARK TSA -PHRU +0F3E..0F3F ; Diacritic # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F82..0F84 ; Diacritic # Mn [3] TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA +0F86..0F87 ; Diacritic # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0FC6 ; Diacritic # Mn TIBETAN SYMBOL PADMA GDAN +1037 ; Diacritic # Mn MYANMAR SIGN DOT BELOW +1039..103A ; Diacritic # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +1063..1064 ; Diacritic # Mc [2] MYANMAR TONE MARK SGAW KAREN HATHI..MYANMAR TONE MARK SGAW KAREN KE PHO +1069..106D ; Diacritic # Mc [5] MYANMAR SIGN WESTERN PWO KAREN TONE-1..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +1087..108C ; Diacritic # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Diacritic # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108F ; Diacritic # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +109A..109B ; Diacritic # Mc [2] MYANMAR SIGN KHAMTI TONE-1..MYANMAR SIGN KHAMTI TONE-3 +135D..135F ; Diacritic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1714 ; Diacritic # Mn TAGALOG SIGN VIRAMA +1715 ; Diacritic # Mc TAGALOG SIGN PAMUDPOD +17C9..17D3 ; Diacritic # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17DD ; Diacritic # Mn KHMER SIGN ATTHACAN +1939..193B ; Diacritic # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1A75..1A7C ; Diacritic # Mn [8] TAI THAM SIGN TONE-1..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Diacritic # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1AB0..1ABD ; Diacritic # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Diacritic # Me COMBINING PARENTHESES OVERLAY +1AC1..1ACB ; Diacritic # Mn [11] COMBINING LEFT PARENTHESIS ABOVE LEFT..COMBINING TRIPLE ACUTE ACCENT +1B34 ; Diacritic # Mn BALINESE SIGN REREKAN +1B44 ; Diacritic # Mc BALINESE ADEG ADEG +1B6B..1B73 ; Diacritic # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1BAA ; Diacritic # Mc SUNDANESE SIGN PAMAAEH +1BAB ; Diacritic # Mn SUNDANESE SIGN VIRAMA +1C36..1C37 ; Diacritic # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C78..1C7D ; Diacritic # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1CD0..1CD2 ; Diacritic # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD3 ; Diacritic # Po VEDIC SIGN NIHSHVASA +1CD4..1CE0 ; Diacritic # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE1 ; Diacritic # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE2..1CE8 ; Diacritic # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Diacritic # Mn VEDIC SIGN TIRYAK +1CF4 ; Diacritic # Mn VEDIC TONE CANDRA ABOVE +1CF7 ; Diacritic # Mc VEDIC SIGN ATIKRAMA +1CF8..1CF9 ; Diacritic # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1D2C..1D6A ; Diacritic # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1DC4..1DCF ; Diacritic # Mn [12] COMBINING MACRON-ACUTE..COMBINING ZIGZAG BELOW +1DF5..1DFF ; Diacritic # Mn [11] COMBINING UP TACK ABOVE..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +1FBD ; Diacritic # Sk GREEK KORONIS +1FBF..1FC1 ; Diacritic # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FCD..1FCF ; Diacritic # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FDD..1FDF ; Diacritic # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FED..1FEF ; Diacritic # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FFD..1FFE ; Diacritic # Sk [2] GREEK OXIA..GREEK DASIA +2CEF..2CF1 ; Diacritic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2E2F ; Diacritic # Lm VERTICAL TILDE +302A..302D ; Diacritic # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +302E..302F ; Diacritic # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3099..309A ; Diacritic # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Diacritic # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30FC ; Diacritic # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +A66F ; Diacritic # Mn COMBINING CYRILLIC VZMET +A67C..A67D ; Diacritic # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK +A67F ; Diacritic # Lm CYRILLIC PAYEROK +A69C..A69D ; Diacritic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A6F0..A6F1 ; Diacritic # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A700..A716 ; Diacritic # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Diacritic # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Diacritic # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Diacritic # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Diacritic # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A7F8..A7F9 ; Diacritic # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A8C4 ; Diacritic # Mn SAURASHTRA SIGN VIRAMA +A8E0..A8F1 ; Diacritic # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A92B..A92D ; Diacritic # Mn [3] KAYAH LI TONE PLOPHU..KAYAH LI TONE CALYA PLOPHU +A92E ; Diacritic # Po KAYAH LI SIGN CWI +A953 ; Diacritic # Mc REJANG VIRAMA +A9B3 ; Diacritic # Mn JAVANESE SIGN CECAK TELU +A9C0 ; Diacritic # Mc JAVANESE PANGKON +A9E5 ; Diacritic # Mn MYANMAR SIGN SHAN SAW +AA7B ; Diacritic # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Diacritic # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Diacritic # Mc MYANMAR SIGN TAI LAING TONE-5 +AABF ; Diacritic # Mn TAI VIET TONE MAI EK +AAC0 ; Diacritic # Lo TAI VIET TONE MAI NUENG +AAC1 ; Diacritic # Mn TAI VIET TONE MAI THO +AAC2 ; Diacritic # Lo TAI VIET TONE MAI SONG +AAF6 ; Diacritic # Mn MEETEI MAYEK VIRAMA +AB5B ; Diacritic # Sk MODIFIER BREVE WITH INVERTED BREVE +AB5C..AB5F ; Diacritic # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB69 ; Diacritic # Lm MODIFIER LETTER SMALL TURNED W +AB6A..AB6B ; Diacritic # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +ABEC ; Diacritic # Mc MEETEI MAYEK LUM IYEK +ABED ; Diacritic # Mn MEETEI MAYEK APUN IYEK +FB1E ; Diacritic # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FE20..FE2F ; Diacritic # Mn [16] COMBINING LIGATURE LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF +FF3E ; Diacritic # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF40 ; Diacritic # Sk FULLWIDTH GRAVE ACCENT +FF70 ; Diacritic # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Diacritic # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE3 ; Diacritic # Sk FULLWIDTH MACRON +102E0 ; Diacritic # Mn COPTIC EPACT THOUSANDS MARK +10780..10785 ; Diacritic # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Diacritic # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Diacritic # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +10AE5..10AE6 ; Diacritic # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10D22..10D23 ; Diacritic # Lo [2] HANIFI ROHINGYA MARK SAKIN..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Diacritic # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10F46..10F50 ; Diacritic # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F82..10F85 ; Diacritic # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +11046 ; Diacritic # Mn BRAHMI VIRAMA +11070 ; Diacritic # Mn BRAHMI SIGN OLD TAMIL VIRAMA +110B9..110BA ; Diacritic # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +11133..11134 ; Diacritic # Mn [2] CHAKMA VIRAMA..CHAKMA MAAYYAA +11173 ; Diacritic # Mn MAHAJANI SIGN NUKTA +111C0 ; Diacritic # Mc SHARADA SIGN VIRAMA +111CA..111CC ; Diacritic # Mn [3] SHARADA SIGN NUKTA..SHARADA EXTRA SHORT VOWEL MARK +11235 ; Diacritic # Mc KHOJKI SIGN VIRAMA +11236 ; Diacritic # Mn KHOJKI SIGN NUKTA +112E9..112EA ; Diacritic # Mn [2] KHUDAWADI SIGN NUKTA..KHUDAWADI SIGN VIRAMA +1133C ; Diacritic # Mn GRANTHA SIGN NUKTA +1134D ; Diacritic # Mc GRANTHA SIGN VIRAMA +11366..1136C ; Diacritic # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Diacritic # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA +11442 ; Diacritic # Mn NEWA SIGN VIRAMA +11446 ; Diacritic # Mn NEWA SIGN NUKTA +114C2..114C3 ; Diacritic # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +115BF..115C0 ; Diacritic # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +1163F ; Diacritic # Mn MODI SIGN VIRAMA +116B6 ; Diacritic # Mc TAKRI SIGN VIRAMA +116B7 ; Diacritic # Mn TAKRI SIGN NUKTA +1172B ; Diacritic # Mn AHOM SIGN KILLER +11839..1183A ; Diacritic # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1193D ; Diacritic # Mc DIVES AKURU SIGN HALANTA +1193E ; Diacritic # Mn DIVES AKURU VIRAMA +11943 ; Diacritic # Mn DIVES AKURU SIGN NUKTA +119E0 ; Diacritic # Mn NANDINAGARI SIGN VIRAMA +11A34 ; Diacritic # Mn ZANABAZAR SQUARE SIGN VIRAMA +11A47 ; Diacritic # Mn ZANABAZAR SQUARE SUBJOINER +11A99 ; Diacritic # Mn SOYOMBO SUBJOINER +11C3F ; Diacritic # Mn BHAIKSUKI SIGN VIRAMA +11D42 ; Diacritic # Mn MASARAM GONDI SIGN NUKTA +11D44..11D45 ; Diacritic # Mn [2] MASARAM GONDI SIGN HALANTA..MASARAM GONDI VIRAMA +11D97 ; Diacritic # Mn GUNJALA GONDI VIRAMA +16AF0..16AF4 ; Diacritic # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16B30..16B36 ; Diacritic # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16F8F..16F92 ; Diacritic # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Diacritic # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 +16FF0..16FF1 ; Diacritic # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +1AFF0..1AFF3 ; Diacritic # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Diacritic # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Diacritic # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1CF00..1CF2D ; Diacritic # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Diacritic # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Diacritic # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D16D..1D172 ; Diacritic # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE +1E2EC..1E2EF ; Diacritic # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E8D0..1E8D6 ; Diacritic # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS +1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK +1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA + +# Total code points: 1064 + +# ================================================ + +00B7 ; Extender # Po MIDDLE DOT +02D0..02D1 ; Extender # Lm [2] MODIFIER LETTER TRIANGULAR COLON..MODIFIER LETTER HALF TRIANGULAR COLON +0640 ; Extender # Lm ARABIC TATWEEL +07FA ; Extender # Lm NKO LAJANYALAN +0B55 ; Extender # Mn ORIYA SIGN OVERLINE +0E46 ; Extender # Lm THAI CHARACTER MAIYAMOK +0EC6 ; Extender # Lm LAO KO LA +180A ; Extender # Po MONGOLIAN NIRUGU +1843 ; Extender # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1AA7 ; Extender # Lm TAI THAM SIGN MAI YAMOK +1C36 ; Extender # Mn LEPCHA SIGN RAN +1C7B ; Extender # Lm OL CHIKI RELAA +3005 ; Extender # Lm IDEOGRAPHIC ITERATION MARK +3031..3035 ; Extender # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +309D..309E ; Extender # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +30FC..30FE ; Extender # Lm [3] KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK +A015 ; Extender # Lm YI SYLLABLE WU +A60C ; Extender # Lm VAI SYLLABLE LENGTHENER +A9CF ; Extender # Lm JAVANESE PANGRANGKEP +A9E6 ; Extender # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +AA70 ; Extender # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AADD ; Extender # Lm TAI VIET SYMBOL SAM +AAF3..AAF4 ; Extender # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +FF70 ; Extender # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +10781..10782 ; Extender # Lm [2] MODIFIER LETTER SUPERSCRIPT TRIANGULAR COLON..MODIFIER LETTER SUPERSCRIPT HALF TRIANGULAR COLON +1135D ; Extender # Lo GRANTHA SIGN PLUTA +115C6..115C8 ; Extender # Po [3] SIDDHAM REPETITION MARK-1..SIDDHAM REPETITION MARK-3 +11A98 ; Extender # Mn SOYOMBO GEMINATION MARK +16B42..16B43 ; Extender # Lm [2] PAHAWH HMONG SIGN VOS NRUA..PAHAWH HMONG SIGN IB YAM +16FE0..16FE1 ; Extender # Lm [2] TANGUT ITERATION MARK..NUSHU ITERATION MARK +16FE3 ; Extender # Lm OLD CHINESE ITERATION MARK +1E13C..1E13D ; Extender # Lm [2] NYIAKENG PUACHUE HMONG SIGN XW XW..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E944..1E946 ; Extender # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK + +# Total code points: 50 + +# ================================================ + +00AA ; Other_Lowercase # Lo FEMININE ORDINAL INDICATOR +00BA ; Other_Lowercase # Lo MASCULINE ORDINAL INDICATOR +02B0..02B8 ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02C0..02C1 ; Other_Lowercase # Lm [2] MODIFIER LETTER GLOTTAL STOP..MODIFIER LETTER REVERSED GLOTTAL STOP +02E0..02E4 ; Other_Lowercase # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +0345 ; Other_Lowercase # Mn COMBINING GREEK YPOGEGRAMMENI +037A ; Other_Lowercase # Lm GREEK YPOGEGRAMMENI +1D2C..1D6A ; Other_Lowercase # Lm [63] MODIFIER LETTER CAPITAL A..GREEK SUBSCRIPT SMALL LETTER CHI +1D78 ; Other_Lowercase # Lm MODIFIER LETTER CYRILLIC EN +1D9B..1DBF ; Other_Lowercase # Lm [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA +2071 ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Other_Lowercase # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Other_Lowercase # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +2170..217F ; Other_Lowercase # Nl [16] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL ONE THOUSAND +24D0..24E9 ; Other_Lowercase # So [26] CIRCLED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +2C7C..2C7D ; Other_Lowercase # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +A69C..A69D ; Other_Lowercase # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A770 ; Other_Lowercase # Lm MODIFIER LETTER US +A7F8..A7F9 ; Other_Lowercase # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +AB5C..AB5F ; Other_Lowercase # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +10780 ; Other_Lowercase # Lm MODIFIER LETTER SMALL CAPITAL AA +10783..10785 ; Other_Lowercase # Lm [3] MODIFIER LETTER SMALL AE..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Other_Lowercase # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Other_Lowercase # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL + +# Total code points: 244 + +# ================================================ + +2160..216F ; Other_Uppercase # Nl [16] ROMAN NUMERAL ONE..ROMAN NUMERAL ONE THOUSAND +24B6..24CF ; Other_Uppercase # So [26] CIRCLED LATIN CAPITAL LETTER A..CIRCLED LATIN CAPITAL LETTER Z +1F130..1F149 ; Other_Uppercase # So [26] SQUARED LATIN CAPITAL LETTER A..SQUARED LATIN CAPITAL LETTER Z +1F150..1F169 ; Other_Uppercase # So [26] NEGATIVE CIRCLED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z +1F170..1F189 ; Other_Uppercase # So [26] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED LATIN CAPITAL LETTER Z + +# Total code points: 120 + +# ================================================ + +FDD0..FDEF ; Noncharacter_Code_Point # Cn [32] .. +FFFE..FFFF ; Noncharacter_Code_Point # Cn [2] .. +1FFFE..1FFFF ; Noncharacter_Code_Point # Cn [2] .. +2FFFE..2FFFF ; Noncharacter_Code_Point # Cn [2] .. +3FFFE..3FFFF ; Noncharacter_Code_Point # Cn [2] .. +4FFFE..4FFFF ; Noncharacter_Code_Point # Cn [2] .. +5FFFE..5FFFF ; Noncharacter_Code_Point # Cn [2] .. +6FFFE..6FFFF ; Noncharacter_Code_Point # Cn [2] .. +7FFFE..7FFFF ; Noncharacter_Code_Point # Cn [2] .. +8FFFE..8FFFF ; Noncharacter_Code_Point # Cn [2] .. +9FFFE..9FFFF ; Noncharacter_Code_Point # Cn [2] .. +AFFFE..AFFFF ; Noncharacter_Code_Point # Cn [2] .. +BFFFE..BFFFF ; Noncharacter_Code_Point # Cn [2] .. +CFFFE..CFFFF ; Noncharacter_Code_Point # Cn [2] .. +DFFFE..DFFFF ; Noncharacter_Code_Point # Cn [2] .. +EFFFE..EFFFF ; Noncharacter_Code_Point # Cn [2] .. +FFFFE..FFFFF ; Noncharacter_Code_Point # Cn [2] .. +10FFFE..10FFFF; Noncharacter_Code_Point # Cn [2] .. + +# Total code points: 66 + +# ================================================ + +09BE ; Other_Grapheme_Extend # Mc BENGALI VOWEL SIGN AA +09D7 ; Other_Grapheme_Extend # Mc BENGALI AU LENGTH MARK +0B3E ; Other_Grapheme_Extend # Mc ORIYA VOWEL SIGN AA +0B57 ; Other_Grapheme_Extend # Mc ORIYA AU LENGTH MARK +0BBE ; Other_Grapheme_Extend # Mc TAMIL VOWEL SIGN AA +0BD7 ; Other_Grapheme_Extend # Mc TAMIL AU LENGTH MARK +0CC2 ; Other_Grapheme_Extend # Mc KANNADA VOWEL SIGN UU +0CD5..0CD6 ; Other_Grapheme_Extend # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0D3E ; Other_Grapheme_Extend # Mc MALAYALAM VOWEL SIGN AA +0D57 ; Other_Grapheme_Extend # Mc MALAYALAM AU LENGTH MARK +0DCF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN AELA-PILLA +0DDF ; Other_Grapheme_Extend # Mc SINHALA VOWEL SIGN GAYANUKITTA +1B35 ; Other_Grapheme_Extend # Mc BALINESE VOWEL SIGN TEDUNG +200C ; Other_Grapheme_Extend # Cf ZERO WIDTH NON-JOINER +302E..302F ; Other_Grapheme_Extend # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +1133E ; Other_Grapheme_Extend # Mc GRANTHA VOWEL SIGN AA +11357 ; Other_Grapheme_Extend # Mc GRANTHA AU LENGTH MARK +114B0 ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN AA +114BD ; Other_Grapheme_Extend # Mc TIRHUTA VOWEL SIGN SHORT O +115AF ; Other_Grapheme_Extend # Mc SIDDHAM VOWEL SIGN AA +11930 ; Other_Grapheme_Extend # Mc DIVES AKURU VOWEL SIGN AA +1D165 ; Other_Grapheme_Extend # Mc MUSICAL SYMBOL COMBINING STEM +1D16E..1D172 ; Other_Grapheme_Extend # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 127 + +# ================================================ + +2FF0..2FF1 ; IDS_Binary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW +2FF4..2FFB ; IDS_Binary_Operator # So [8] IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID + +# Total code points: 10 + +# ================================================ + +2FF2..2FF3 ; IDS_Trinary_Operator # So [2] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW + +# Total code points: 2 + +# ================================================ + +2E80..2E99 ; Radical # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Radical # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Radical # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE + +# Total code points: 329 + +# ================================================ + +3400..4DBF ; Unified_Ideograph # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..9FFF ; Unified_Ideograph # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +FA0E..FA0F ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F +FA11 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA11 +FA13..FA14 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14 +FA1F ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA1F +FA21 ; Unified_Ideograph # Lo CJK COMPATIBILITY IDEOGRAPH-FA21 +FA23..FA24 ; Unified_Ideograph # Lo [2] CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24 +FA27..FA29 ; Unified_Ideograph # Lo [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29 +20000..2A6DF ; Unified_Ideograph # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Unified_Ideograph # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; Unified_Ideograph # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Unified_Ideograph # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Unified_Ideograph # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +30000..3134A ; Unified_Ideograph # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 92865 + +# ================================================ + +034F ; Other_Default_Ignorable_Code_Point # Mn COMBINING GRAPHEME JOINER +115F..1160 ; Other_Default_Ignorable_Code_Point # Lo [2] HANGUL CHOSEONG FILLER..HANGUL JUNGSEONG FILLER +17B4..17B5 ; Other_Default_Ignorable_Code_Point # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +2065 ; Other_Default_Ignorable_Code_Point # Cn +3164 ; Other_Default_Ignorable_Code_Point # Lo HANGUL FILLER +FFA0 ; Other_Default_Ignorable_Code_Point # Lo HALFWIDTH HANGUL FILLER +FFF0..FFF8 ; Other_Default_Ignorable_Code_Point # Cn [9] .. +E0000 ; Other_Default_Ignorable_Code_Point # Cn +E0002..E001F ; Other_Default_Ignorable_Code_Point # Cn [30] .. +E0080..E00FF ; Other_Default_Ignorable_Code_Point # Cn [128] .. +E01F0..E0FFF ; Other_Default_Ignorable_Code_Point # Cn [3600] .. + +# Total code points: 3776 + +# ================================================ + +0149 ; Deprecated # L& LATIN SMALL LETTER N PRECEDED BY APOSTROPHE +0673 ; Deprecated # Lo ARABIC LETTER ALEF WITH WAVY HAMZA BELOW +0F77 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC RR +0F79 ; Deprecated # Mn TIBETAN VOWEL SIGN VOCALIC LL +17A3..17A4 ; Deprecated # Lo [2] KHMER INDEPENDENT VOWEL QAQ..KHMER INDEPENDENT VOWEL QAA +206A..206F ; Deprecated # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES +2329 ; Deprecated # Ps LEFT-POINTING ANGLE BRACKET +232A ; Deprecated # Pe RIGHT-POINTING ANGLE BRACKET +E0001 ; Deprecated # Cf LANGUAGE TAG + +# Total code points: 15 + +# ================================================ + +0069..006A ; Soft_Dotted # L& [2] LATIN SMALL LETTER I..LATIN SMALL LETTER J +012F ; Soft_Dotted # L& LATIN SMALL LETTER I WITH OGONEK +0249 ; Soft_Dotted # L& LATIN SMALL LETTER J WITH STROKE +0268 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE +029D ; Soft_Dotted # L& LATIN SMALL LETTER J WITH CROSSED-TAIL +02B2 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J +03F3 ; Soft_Dotted # L& GREEK LETTER YOT +0456 ; Soft_Dotted # L& CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I +0458 ; Soft_Dotted # L& CYRILLIC SMALL LETTER JE +1D62 ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER I +1D96 ; Soft_Dotted # L& LATIN SMALL LETTER I WITH RETROFLEX HOOK +1DA4 ; Soft_Dotted # Lm MODIFIER LETTER SMALL I WITH STROKE +1DA8 ; Soft_Dotted # Lm MODIFIER LETTER SMALL J WITH CROSSED-TAIL +1E2D ; Soft_Dotted # L& LATIN SMALL LETTER I WITH TILDE BELOW +1ECB ; Soft_Dotted # L& LATIN SMALL LETTER I WITH DOT BELOW +2071 ; Soft_Dotted # Lm SUPERSCRIPT LATIN SMALL LETTER I +2148..2149 ; Soft_Dotted # L& [2] DOUBLE-STRUCK ITALIC SMALL I..DOUBLE-STRUCK ITALIC SMALL J +2C7C ; Soft_Dotted # Lm LATIN SUBSCRIPT SMALL LETTER J +1D422..1D423 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SMALL I..MATHEMATICAL BOLD SMALL J +1D456..1D457 ; Soft_Dotted # L& [2] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL ITALIC SMALL J +1D48A..1D48B ; Soft_Dotted # L& [2] MATHEMATICAL BOLD ITALIC SMALL I..MATHEMATICAL BOLD ITALIC SMALL J +1D4BE..1D4BF ; Soft_Dotted # L& [2] MATHEMATICAL SCRIPT SMALL I..MATHEMATICAL SCRIPT SMALL J +1D4F2..1D4F3 ; Soft_Dotted # L& [2] MATHEMATICAL BOLD SCRIPT SMALL I..MATHEMATICAL BOLD SCRIPT SMALL J +1D526..1D527 ; Soft_Dotted # L& [2] MATHEMATICAL FRAKTUR SMALL I..MATHEMATICAL FRAKTUR SMALL J +1D55A..1D55B ; Soft_Dotted # L& [2] MATHEMATICAL DOUBLE-STRUCK SMALL I..MATHEMATICAL DOUBLE-STRUCK SMALL J +1D58E..1D58F ; Soft_Dotted # L& [2] MATHEMATICAL BOLD FRAKTUR SMALL I..MATHEMATICAL BOLD FRAKTUR SMALL J +1D5C2..1D5C3 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF SMALL I..MATHEMATICAL SANS-SERIF SMALL J +1D5F6..1D5F7 ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD SMALL I..MATHEMATICAL SANS-SERIF BOLD SMALL J +1D62A..1D62B ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF ITALIC SMALL I..MATHEMATICAL SANS-SERIF ITALIC SMALL J +1D65E..1D65F ; Soft_Dotted # L& [2] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL I..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL J +1D692..1D693 ; Soft_Dotted # L& [2] MATHEMATICAL MONOSPACE SMALL I..MATHEMATICAL MONOSPACE SMALL J +1DF1A ; Soft_Dotted # L& LATIN SMALL LETTER I WITH STROKE AND RETROFLEX HOOK + +# Total code points: 47 + +# ================================================ + +0E40..0E44 ; Logical_Order_Exception # Lo [5] THAI CHARACTER SARA E..THAI CHARACTER SARA AI MAIMALAI +0EC0..0EC4 ; Logical_Order_Exception # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +19B5..19B7 ; Logical_Order_Exception # Lo [3] NEW TAI LUE VOWEL SIGN E..NEW TAI LUE VOWEL SIGN O +19BA ; Logical_Order_Exception # Lo NEW TAI LUE VOWEL SIGN AY +AAB5..AAB6 ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB9 ; Logical_Order_Exception # Lo TAI VIET VOWEL UEA +AABB..AABC ; Logical_Order_Exception # Lo [2] TAI VIET VOWEL AUE..TAI VIET VOWEL AY + +# Total code points: 19 + +# ================================================ + +1885..1886 ; Other_ID_Start # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +2118 ; Other_ID_Start # Sm SCRIPT CAPITAL P +212E ; Other_ID_Start # So ESTIMATED SYMBOL +309B..309C ; Other_ID_Start # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK + +# Total code points: 6 + +# ================================================ + +00B7 ; Other_ID_Continue # Po MIDDLE DOT +0387 ; Other_ID_Continue # Po GREEK ANO TELEIA +1369..1371 ; Other_ID_Continue # No [9] ETHIOPIC DIGIT ONE..ETHIOPIC DIGIT NINE +19DA ; Other_ID_Continue # No NEW TAI LUE THAM DIGIT ONE + +# Total code points: 12 + +# ================================================ + +0021 ; Sentence_Terminal # Po EXCLAMATION MARK +002E ; Sentence_Terminal # Po FULL STOP +003F ; Sentence_Terminal # Po QUESTION MARK +0589 ; Sentence_Terminal # Po ARMENIAN FULL STOP +061D..061F ; Sentence_Terminal # Po [3] ARABIC END OF TEXT MARK..ARABIC QUESTION MARK +06D4 ; Sentence_Terminal # Po ARABIC FULL STOP +0700..0702 ; Sentence_Terminal # Po [3] SYRIAC END OF PARAGRAPH..SYRIAC SUBLINEAR FULL STOP +07F9 ; Sentence_Terminal # Po NKO EXCLAMATION MARK +0837 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION MELODIC QITSA +0839 ; Sentence_Terminal # Po SAMARITAN PUNCTUATION QITSA +083D..083E ; Sentence_Terminal # Po [2] SAMARITAN PUNCTUATION SOF MASHFAAT..SAMARITAN PUNCTUATION ANNAAU +0964..0965 ; Sentence_Terminal # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +104A..104B ; Sentence_Terminal # Po [2] MYANMAR SIGN LITTLE SECTION..MYANMAR SIGN SECTION +1362 ; Sentence_Terminal # Po ETHIOPIC FULL STOP +1367..1368 ; Sentence_Terminal # Po [2] ETHIOPIC QUESTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +166E ; Sentence_Terminal # Po CANADIAN SYLLABICS FULL STOP +1735..1736 ; Sentence_Terminal # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1803 ; Sentence_Terminal # Po MONGOLIAN FULL STOP +1809 ; Sentence_Terminal # Po MONGOLIAN MANCHU FULL STOP +1944..1945 ; Sentence_Terminal # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1AA8..1AAB ; Sentence_Terminal # Po [4] TAI THAM SIGN KAAN..TAI THAM SIGN SATKAANKUU +1B5A..1B5B ; Sentence_Terminal # Po [2] BALINESE PANTI..BALINESE PAMADA +1B5E..1B5F ; Sentence_Terminal # Po [2] BALINESE CARIK SIKI..BALINESE CARIK PAREREN +1B7D..1B7E ; Sentence_Terminal # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG +1C3B..1C3C ; Sentence_Terminal # Po [2] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION NYET THYOOM TA-ROL +1C7E..1C7F ; Sentence_Terminal # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD +203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG +2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK +2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP +2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK +3002 ; Sentence_Terminal # Po IDEOGRAPHIC FULL STOP +A4FF ; Sentence_Terminal # Po LISU PUNCTUATION FULL STOP +A60E..A60F ; Sentence_Terminal # Po [2] VAI FULL STOP..VAI QUESTION MARK +A6F3 ; Sentence_Terminal # Po BAMUM FULL STOP +A6F7 ; Sentence_Terminal # Po BAMUM QUESTION MARK +A876..A877 ; Sentence_Terminal # Po [2] PHAGS-PA MARK SHAD..PHAGS-PA MARK DOUBLE SHAD +A8CE..A8CF ; Sentence_Terminal # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A92F ; Sentence_Terminal # Po KAYAH LI SIGN SHYA +A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI +AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA +AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI +FE52 ; Sentence_Terminal # Po SMALL FULL STOP +FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK +FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK +FF0E ; Sentence_Terminal # Po FULLWIDTH FULL STOP +FF1F ; Sentence_Terminal # Po FULLWIDTH QUESTION MARK +FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP +10A56..10A57 ; Sentence_Terminal # Po [2] KHAROSHTHI PUNCTUATION DANDA..KHAROSHTHI PUNCTUATION DOUBLE DANDA +10F55..10F59 ; Sentence_Terminal # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT +10F86..10F89 ; Sentence_Terminal # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS +11047..11048 ; Sentence_Terminal # Po [2] BRAHMI DANDA..BRAHMI DOUBLE DANDA +110BE..110C1 ; Sentence_Terminal # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +11141..11143 ; Sentence_Terminal # Po [3] CHAKMA DANDA..CHAKMA QUESTION MARK +111C5..111C6 ; Sentence_Terminal # Po [2] SHARADA DANDA..SHARADA DOUBLE DANDA +111CD ; Sentence_Terminal # Po SHARADA SUTRA MARK +111DE..111DF ; Sentence_Terminal # Po [2] SHARADA SECTION MARK-1..SHARADA SECTION MARK-2 +11238..11239 ; Sentence_Terminal # Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA +1123B..1123C ; Sentence_Terminal # Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK +112A9 ; Sentence_Terminal # Po MULTANI SECTION MARK +1144B..1144C ; Sentence_Terminal # Po [2] NEWA DANDA..NEWA DOUBLE DANDA +115C2..115C3 ; Sentence_Terminal # Po [2] SIDDHAM DANDA..SIDDHAM DOUBLE DANDA +115C9..115D7 ; Sentence_Terminal # Po [15] SIDDHAM END OF TEXT MARK..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +11641..11642 ; Sentence_Terminal # Po [2] MODI DANDA..MODI DOUBLE DANDA +1173C..1173E ; Sentence_Terminal # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +11944 ; Sentence_Terminal # Po DIVES AKURU DOUBLE DANDA +11946 ; Sentence_Terminal # Po DIVES AKURU END OF TEXT MARK +11A42..11A43 ; Sentence_Terminal # Po [2] ZANABAZAR SQUARE MARK SHAD..ZANABAZAR SQUARE MARK DOUBLE SHAD +11A9B..11A9C ; Sentence_Terminal # Po [2] SOYOMBO MARK SHAD..SOYOMBO MARK DOUBLE SHAD +11C41..11C42 ; Sentence_Terminal # Po [2] BHAIKSUKI DANDA..BHAIKSUKI DOUBLE DANDA +11EF7..11EF8 ; Sentence_Terminal # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION +16A6E..16A6F ; Sentence_Terminal # Po [2] MRO DANDA..MRO DOUBLE DANDA +16AF5 ; Sentence_Terminal # Po BASSA VAH FULL STOP +16B37..16B38 ; Sentence_Terminal # Po [2] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS TSHAB CEEB +16B44 ; Sentence_Terminal # Po PAHAWH HMONG SIGN XAUS +16E98 ; Sentence_Terminal # Po MEDEFAIDRIN FULL STOP +1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP +1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP + +# Total code points: 152 + +# ================================================ + +180B..180D ; Variation_Selector # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180F ; Variation_Selector # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +FE00..FE0F ; Variation_Selector # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +E0100..E01EF ; Variation_Selector # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 260 + +# ================================================ + +0009..000D ; Pattern_White_Space # Cc [5] .. +0020 ; Pattern_White_Space # Zs SPACE +0085 ; Pattern_White_Space # Cc +200E..200F ; Pattern_White_Space # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2028 ; Pattern_White_Space # Zl LINE SEPARATOR +2029 ; Pattern_White_Space # Zp PARAGRAPH SEPARATOR + +# Total code points: 11 + +# ================================================ + +0021..0023 ; Pattern_Syntax # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Pattern_Syntax # Sc DOLLAR SIGN +0025..0027 ; Pattern_Syntax # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Pattern_Syntax # Ps LEFT PARENTHESIS +0029 ; Pattern_Syntax # Pe RIGHT PARENTHESIS +002A ; Pattern_Syntax # Po ASTERISK +002B ; Pattern_Syntax # Sm PLUS SIGN +002C ; Pattern_Syntax # Po COMMA +002D ; Pattern_Syntax # Pd HYPHEN-MINUS +002E..002F ; Pattern_Syntax # Po [2] FULL STOP..SOLIDUS +003A..003B ; Pattern_Syntax # Po [2] COLON..SEMICOLON +003C..003E ; Pattern_Syntax # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Pattern_Syntax # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET +005C ; Pattern_Syntax # Po REVERSE SOLIDUS +005D ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET +005E ; Pattern_Syntax # Sk CIRCUMFLEX ACCENT +0060 ; Pattern_Syntax # Sk GRAVE ACCENT +007B ; Pattern_Syntax # Ps LEFT CURLY BRACKET +007C ; Pattern_Syntax # Sm VERTICAL LINE +007D ; Pattern_Syntax # Pe RIGHT CURLY BRACKET +007E ; Pattern_Syntax # Sm TILDE +00A1 ; Pattern_Syntax # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Pattern_Syntax # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Pattern_Syntax # So BROKEN BAR +00A7 ; Pattern_Syntax # Po SECTION SIGN +00A9 ; Pattern_Syntax # So COPYRIGHT SIGN +00AB ; Pattern_Syntax # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Pattern_Syntax # Sm NOT SIGN +00AE ; Pattern_Syntax # So REGISTERED SIGN +00B0 ; Pattern_Syntax # So DEGREE SIGN +00B1 ; Pattern_Syntax # Sm PLUS-MINUS SIGN +00B6 ; Pattern_Syntax # Po PILCROW SIGN +00BB ; Pattern_Syntax # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BF ; Pattern_Syntax # Po INVERTED QUESTION MARK +00D7 ; Pattern_Syntax # Sm MULTIPLICATION SIGN +00F7 ; Pattern_Syntax # Sm DIVISION SIGN +2010..2015 ; Pattern_Syntax # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Pattern_Syntax # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Pattern_Syntax # Pi LEFT SINGLE QUOTATION MARK +2019 ; Pattern_Syntax # Pf RIGHT SINGLE QUOTATION MARK +201A ; Pattern_Syntax # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Pattern_Syntax # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Pattern_Syntax # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Pattern_Syntax # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Pattern_Syntax # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Pattern_Syntax # Po [8] DAGGER..HYPHENATION POINT +2030..2038 ; Pattern_Syntax # Po [9] PER MILLE SIGN..CARET +2039 ; Pattern_Syntax # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Pattern_Syntax # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Pattern_Syntax # Po [4] REFERENCE MARK..OVERLINE +2041..2043 ; Pattern_Syntax # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Pattern_Syntax # Sm FRACTION SLASH +2045 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Pattern_Syntax # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Pattern_Syntax # Sm COMMERCIAL MINUS SIGN +2053 ; Pattern_Syntax # Po SWUNG DASH +2055..205E ; Pattern_Syntax # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +2190..2194 ; Pattern_Syntax # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Pattern_Syntax # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Pattern_Syntax # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Pattern_Syntax # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Pattern_Syntax # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Pattern_Syntax # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Pattern_Syntax # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Pattern_Syntax # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Pattern_Syntax # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Pattern_Syntax # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Pattern_Syntax # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Pattern_Syntax # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Pattern_Syntax # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Pattern_Syntax # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Pattern_Syntax # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Pattern_Syntax # So DOWNWARDS DOUBLE ARROW +21D4 ; Pattern_Syntax # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Pattern_Syntax # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Pattern_Syntax # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Pattern_Syntax # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Pattern_Syntax # Ps LEFT CEILING +2309 ; Pattern_Syntax # Pe RIGHT CEILING +230A ; Pattern_Syntax # Ps LEFT FLOOR +230B ; Pattern_Syntax # Pe RIGHT FLOOR +230C..231F ; Pattern_Syntax # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Pattern_Syntax # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Pattern_Syntax # So [7] FROWN..KEYBOARD +2329 ; Pattern_Syntax # Ps LEFT-POINTING ANGLE BRACKET +232A ; Pattern_Syntax # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Pattern_Syntax # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Pattern_Syntax # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Pattern_Syntax # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Pattern_Syntax # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Pattern_Syntax # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Pattern_Syntax # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; Pattern_Syntax # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2427..243F ; Pattern_Syntax # Cn [25] .. +2440..244A ; Pattern_Syntax # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +244B..245F ; Pattern_Syntax # Cn [21] .. +2500..25B6 ; Pattern_Syntax # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Pattern_Syntax # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Pattern_Syntax # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Pattern_Syntax # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Pattern_Syntax # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Pattern_Syntax # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Pattern_Syntax # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Pattern_Syntax # Sm MUSIC SHARP SIGN +2670..2767 ; Pattern_Syntax # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Pattern_Syntax # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Pattern_Syntax # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Pattern_Syntax # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Pattern_Syntax # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Pattern_Syntax # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Pattern_Syntax # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Pattern_Syntax # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Pattern_Syntax # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Pattern_Syntax # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Pattern_Syntax # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Pattern_Syntax # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Pattern_Syntax # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2794..27BF ; Pattern_Syntax # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Pattern_Syntax # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Pattern_Syntax # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Pattern_Syntax # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Pattern_Syntax # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Pattern_Syntax # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Pattern_Syntax # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Pattern_Syntax # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Pattern_Syntax # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Pattern_Syntax # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Pattern_Syntax # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Pattern_Syntax # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Pattern_Syntax # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Pattern_Syntax # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2800..28FF ; Pattern_Syntax # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 +2900..2982 ; Pattern_Syntax # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Pattern_Syntax # Ps LEFT WHITE CURLY BRACKET +2984 ; Pattern_Syntax # Pe RIGHT WHITE CURLY BRACKET +2985 ; Pattern_Syntax # Ps LEFT WHITE PARENTHESIS +2986 ; Pattern_Syntax # Pe RIGHT WHITE PARENTHESIS +2987 ; Pattern_Syntax # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Pattern_Syntax # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Pattern_Syntax # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Pattern_Syntax # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Pattern_Syntax # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Pattern_Syntax # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Pattern_Syntax # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Pattern_Syntax # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Pattern_Syntax # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Pattern_Syntax # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Pattern_Syntax # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Pattern_Syntax # Ps LEFT WIGGLY FENCE +29D9 ; Pattern_Syntax # Pe RIGHT WIGGLY FENCE +29DA ; Pattern_Syntax # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Pattern_Syntax # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Pattern_Syntax # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Pattern_Syntax # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Pattern_Syntax # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Pattern_Syntax # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Pattern_Syntax # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Pattern_Syntax # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Pattern_Syntax # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Pattern_Syntax # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Pattern_Syntax # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B74..2B75 ; Pattern_Syntax # Cn [2] .. +2B76..2B95 ; Pattern_Syntax # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B96 ; Pattern_Syntax # Cn +2B97..2BFF ; Pattern_Syntax # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2E00..2E01 ; Pattern_Syntax # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Pattern_Syntax # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Pattern_Syntax # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Pattern_Syntax # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Pattern_Syntax # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Pattern_Syntax # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Pattern_Syntax # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Pattern_Syntax # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Pattern_Syntax # Po RAISED SQUARE +2E0C ; Pattern_Syntax # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Pattern_Syntax # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Pattern_Syntax # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Pattern_Syntax # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Pattern_Syntax # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Pattern_Syntax # Pd HYPHEN WITH DIAERESIS +2E1B ; Pattern_Syntax # Po TILDE WITH RING ABOVE +2E1C ; Pattern_Syntax # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Pattern_Syntax # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Pattern_Syntax # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Pattern_Syntax # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Pattern_Syntax # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Pattern_Syntax # Ps TOP LEFT HALF BRACKET +2E23 ; Pattern_Syntax # Pe TOP RIGHT HALF BRACKET +2E24 ; Pattern_Syntax # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Pattern_Syntax # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Pattern_Syntax # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Pattern_Syntax # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Pattern_Syntax # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Pattern_Syntax # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Pattern_Syntax # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Pattern_Syntax # Lm VERTICAL TILDE +2E30..2E39 ; Pattern_Syntax # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Pattern_Syntax # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Pattern_Syntax # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Pattern_Syntax # Pd DOUBLE HYPHEN +2E41 ; Pattern_Syntax # Po REVERSED COMMA +2E42 ; Pattern_Syntax # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Pattern_Syntax # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; Pattern_Syntax # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; Pattern_Syntax # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Pattern_Syntax # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Pattern_Syntax # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Pattern_Syntax # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Pattern_Syntax # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Pattern_Syntax # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Pattern_Syntax # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Pattern_Syntax # Pd OBLIQUE HYPHEN +2E5E..2E7F ; Pattern_Syntax # Cn [34] .. +3001..3003 ; Pattern_Syntax # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3008 ; Pattern_Syntax # Ps LEFT ANGLE BRACKET +3009 ; Pattern_Syntax # Pe RIGHT ANGLE BRACKET +300A ; Pattern_Syntax # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Pattern_Syntax # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Pattern_Syntax # Ps LEFT CORNER BRACKET +300D ; Pattern_Syntax # Pe RIGHT CORNER BRACKET +300E ; Pattern_Syntax # Ps LEFT WHITE CORNER BRACKET +300F ; Pattern_Syntax # Pe RIGHT WHITE CORNER BRACKET +3010 ; Pattern_Syntax # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Pattern_Syntax # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Pattern_Syntax # So [2] POSTAL MARK..GETA MARK +3014 ; Pattern_Syntax # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Pattern_Syntax # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Pattern_Syntax # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Pattern_Syntax # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Pattern_Syntax # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Pattern_Syntax # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Pattern_Syntax # Ps LEFT WHITE SQUARE BRACKET +301B ; Pattern_Syntax # Pe RIGHT WHITE SQUARE BRACKET +301C ; Pattern_Syntax # Pd WAVE DASH +301D ; Pattern_Syntax # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Pattern_Syntax # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Pattern_Syntax # So POSTAL MARK FACE +3030 ; Pattern_Syntax # Pd WAVY DASH +FD3E ; Pattern_Syntax # Pe ORNATE LEFT PARENTHESIS +FD3F ; Pattern_Syntax # Ps ORNATE RIGHT PARENTHESIS +FE45..FE46 ; Pattern_Syntax # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 2760 + +# ================================================ + +0600..0605 ; Prepended_Concatenation_Mark # Cf [6] ARABIC NUMBER SIGN..ARABIC NUMBER MARK ABOVE +06DD ; Prepended_Concatenation_Mark # Cf ARABIC END OF AYAH +070F ; Prepended_Concatenation_Mark # Cf SYRIAC ABBREVIATION MARK +0890..0891 ; Prepended_Concatenation_Mark # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +08E2 ; Prepended_Concatenation_Mark # Cf ARABIC DISPUTED END OF AYAH +110BD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN +110CD ; Prepended_Concatenation_Mark # Cf KAITHI NUMBER SIGN ABOVE + +# Total code points: 13 + +# ================================================ + +1F1E6..1F1FF ; Regional_Indicator # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z + +# Total code points: 26 + +# EOF diff --git a/pcre2/maint/Unicode.tables/PropertyAliases.txt b/pcre2/maint/Unicode.tables/PropertyAliases.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e4b429e455ec74d1bd3102a93ec13969e8fec03 --- /dev/null +++ b/pcre2/maint/Unicode.tables/PropertyAliases.txt @@ -0,0 +1,212 @@ +# PropertyAliases-14.0.0.txt +# Date: 2021-03-08, 19:35:48 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# This file contains aliases for properties used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line has two or more fields, separated by semicolons. +# +# First Field: The first field is the short name for the property. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the second field. +# For Unihan database tags, the short name is actually a longer string than +# the tag specified in the second field. +# +# Second Field: The second field is the long name for the property, +# typically the formal name used in documentation about the property. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, and '_' are ignored. For Numeric Property +# values, numeric equivalencies are applied: thus "01.00" is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ================================================ +# Numeric Properties +# ================================================ +cjkAccountingNumeric ; kAccountingNumeric +cjkOtherNumeric ; kOtherNumeric +cjkPrimaryNumeric ; kPrimaryNumeric +nv ; Numeric_Value + +# ================================================ +# String Properties +# ================================================ +cf ; Case_Folding +cjkCompatibilityVariant ; kCompatibilityVariant +dm ; Decomposition_Mapping +FC_NFKC ; FC_NFKC_Closure +lc ; Lowercase_Mapping +NFKC_CF ; NFKC_Casefold +scf ; Simple_Case_Folding ; sfc +slc ; Simple_Lowercase_Mapping +stc ; Simple_Titlecase_Mapping +suc ; Simple_Uppercase_Mapping +tc ; Titlecase_Mapping +uc ; Uppercase_Mapping + +# ================================================ +# Miscellaneous Properties +# ================================================ +bmg ; Bidi_Mirroring_Glyph +bpb ; Bidi_Paired_Bracket +cjkIICore ; kIICore +cjkIRG_GSource ; kIRG_GSource +cjkIRG_HSource ; kIRG_HSource +cjkIRG_JSource ; kIRG_JSource +cjkIRG_KPSource ; kIRG_KPSource +cjkIRG_KSource ; kIRG_KSource +cjkIRG_MSource ; kIRG_MSource +cjkIRG_SSource ; kIRG_SSource +cjkIRG_TSource ; kIRG_TSource +cjkIRG_UKSource ; kIRG_UKSource +cjkIRG_USource ; kIRG_USource +cjkIRG_VSource ; kIRG_VSource +cjkRSUnicode ; kRSUnicode ; Unicode_Radical_Stroke; URS +EqUIdeo ; Equivalent_Unified_Ideograph +isc ; ISO_Comment +JSN ; Jamo_Short_Name +na ; Name +na1 ; Unicode_1_Name +Name_Alias ; Name_Alias +scx ; Script_Extensions + +# ================================================ +# Catalog Properties +# ================================================ +age ; Age +blk ; Block +sc ; Script + +# ================================================ +# Enumerated Properties +# ================================================ +bc ; Bidi_Class +bpt ; Bidi_Paired_Bracket_Type +ccc ; Canonical_Combining_Class +dt ; Decomposition_Type +ea ; East_Asian_Width +gc ; General_Category +GCB ; Grapheme_Cluster_Break +hst ; Hangul_Syllable_Type +InPC ; Indic_Positional_Category +InSC ; Indic_Syllabic_Category +jg ; Joining_Group +jt ; Joining_Type +lb ; Line_Break +NFC_QC ; NFC_Quick_Check +NFD_QC ; NFD_Quick_Check +NFKC_QC ; NFKC_Quick_Check +NFKD_QC ; NFKD_Quick_Check +nt ; Numeric_Type +SB ; Sentence_Break +vo ; Vertical_Orientation +WB ; Word_Break + +# ================================================ +# Binary Properties +# ================================================ +AHex ; ASCII_Hex_Digit +Alpha ; Alphabetic +Bidi_C ; Bidi_Control +Bidi_M ; Bidi_Mirrored +Cased ; Cased +CE ; Composition_Exclusion +CI ; Case_Ignorable +Comp_Ex ; Full_Composition_Exclusion +CWCF ; Changes_When_Casefolded +CWCM ; Changes_When_Casemapped +CWKCF ; Changes_When_NFKC_Casefolded +CWL ; Changes_When_Lowercased +CWT ; Changes_When_Titlecased +CWU ; Changes_When_Uppercased +Dash ; Dash +Dep ; Deprecated +DI ; Default_Ignorable_Code_Point +Dia ; Diacritic +EBase ; Emoji_Modifier_Base +EComp ; Emoji_Component +EMod ; Emoji_Modifier +Emoji ; Emoji +EPres ; Emoji_Presentation +Ext ; Extender +ExtPict ; Extended_Pictographic +Gr_Base ; Grapheme_Base +Gr_Ext ; Grapheme_Extend +Gr_Link ; Grapheme_Link +Hex ; Hex_Digit +Hyphen ; Hyphen +IDC ; ID_Continue +Ideo ; Ideographic +IDS ; ID_Start +IDSB ; IDS_Binary_Operator +IDST ; IDS_Trinary_Operator +Join_C ; Join_Control +LOE ; Logical_Order_Exception +Lower ; Lowercase +Math ; Math +NChar ; Noncharacter_Code_Point +OAlpha ; Other_Alphabetic +ODI ; Other_Default_Ignorable_Code_Point +OGr_Ext ; Other_Grapheme_Extend +OIDC ; Other_ID_Continue +OIDS ; Other_ID_Start +OLower ; Other_Lowercase +OMath ; Other_Math +OUpper ; Other_Uppercase +Pat_Syn ; Pattern_Syntax +Pat_WS ; Pattern_White_Space +PCM ; Prepended_Concatenation_Mark +QMark ; Quotation_Mark +Radical ; Radical +RI ; Regional_Indicator +SD ; Soft_Dotted +STerm ; Sentence_Terminal +Term ; Terminal_Punctuation +UIdeo ; Unified_Ideograph +Upper ; Uppercase +VS ; Variation_Selector +WSpace ; White_Space ; space +XIDC ; XID_Continue +XIDS ; XID_Start +XO_NFC ; Expands_On_NFC +XO_NFD ; Expands_On_NFD +XO_NFKC ; Expands_On_NFKC +XO_NFKD ; Expands_On_NFKD + +# ================================================ +# Total: 129 + +# EOF diff --git a/pcre2/maint/Unicode.tables/PropertyValueAliases.txt b/pcre2/maint/Unicode.tables/PropertyValueAliases.txt new file mode 100644 index 0000000000000000000000000000000000000000..f0cb26bdab84eca2ccc81f1e3a3565ac244fda20 --- /dev/null +++ b/pcre2/maint/Unicode.tables/PropertyValueAliases.txt @@ -0,0 +1,1615 @@ +# PropertyValueAliases-14.0.0.txt +# Date: 2021-05-10, 21:08:53 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# This file contains aliases for property values used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line describes a property value name. +# This consists of three or more fields, separated by semicolons. +# +# First Field: The first field describes the property for which that +# property value name is used. +# +# Second Field: The second field is the short name for the property value. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the third field. +# +# Third Field: The third field is the long name for the property value, +# typically the formal name used in documentation about the property value. +# +# In the case of Canonical_Combining_Class (ccc), there are 4 fields: +# The second field is numeric, the third is the short name, and the fourth is the long name. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, hyphens, and '_' are ignored. +# For Numeric Property values, numeric equivalence is applied: thus "01.00" +# is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Canonical_Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ASCII_Hex_Digit (AHex) + +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True + +# Age (age) + +age; 1.1 ; V1_1 +age; 2.0 ; V2_0 +age; 2.1 ; V2_1 +age; 3.0 ; V3_0 +age; 3.1 ; V3_1 +age; 3.2 ; V3_2 +age; 4.0 ; V4_0 +age; 4.1 ; V4_1 +age; 5.0 ; V5_0 +age; 5.1 ; V5_1 +age; 5.2 ; V5_2 +age; 6.0 ; V6_0 +age; 6.1 ; V6_1 +age; 6.2 ; V6_2 +age; 6.3 ; V6_3 +age; 7.0 ; V7_0 +age; 8.0 ; V8_0 +age; 9.0 ; V9_0 +age; 10.0 ; V10_0 +age; 11.0 ; V11_0 +age; 12.0 ; V12_0 +age; 12.1 ; V12_1 +age; 13.0 ; V13_0 +age; 14.0 ; V14_0 +age; NA ; Unassigned + +# Alphabetic (Alpha) + +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True + +# Bidi_Class (bc) + +bc ; AL ; Arabic_Letter +bc ; AN ; Arabic_Number +bc ; B ; Paragraph_Separator +bc ; BN ; Boundary_Neutral +bc ; CS ; Common_Separator +bc ; EN ; European_Number +bc ; ES ; European_Separator +bc ; ET ; European_Terminator +bc ; FSI ; First_Strong_Isolate +bc ; L ; Left_To_Right +bc ; LRE ; Left_To_Right_Embedding +bc ; LRI ; Left_To_Right_Isolate +bc ; LRO ; Left_To_Right_Override +bc ; NSM ; Nonspacing_Mark +bc ; ON ; Other_Neutral +bc ; PDF ; Pop_Directional_Format +bc ; PDI ; Pop_Directional_Isolate +bc ; R ; Right_To_Left +bc ; RLE ; Right_To_Left_Embedding +bc ; RLI ; Right_To_Left_Isolate +bc ; RLO ; Right_To_Left_Override +bc ; S ; Segment_Separator +bc ; WS ; White_Space + +# Bidi_Control (Bidi_C) + +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True + +# Bidi_Mirrored (Bidi_M) + +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True + +# Bidi_Mirroring_Glyph (bmg) + +# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; + +# Bidi_Paired_Bracket (bpb) + +# @missing: 0000..10FFFF; Bidi_Paired_Bracket; + +# Bidi_Paired_Bracket_Type (bpt) + +bpt; c ; Close +bpt; n ; None +bpt; o ; Open +# @missing: 0000..10FFFF; Bidi_Paired_Bracket_Type; n + +# Block (blk) + +blk; Adlam ; Adlam +blk; Aegean_Numbers ; Aegean_Numbers +blk; Ahom ; Ahom +blk; Alchemical ; Alchemical_Symbols +blk; Alphabetic_PF ; Alphabetic_Presentation_Forms +blk; Anatolian_Hieroglyphs ; Anatolian_Hieroglyphs +blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation +blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers +blk; Ancient_Symbols ; Ancient_Symbols +blk; Arabic ; Arabic +blk; Arabic_Ext_A ; Arabic_Extended_A +blk; Arabic_Ext_B ; Arabic_Extended_B +blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols +blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; Arabic_PF_B ; Arabic_Presentation_Forms_B +blk; Arabic_Sup ; Arabic_Supplement +blk; Armenian ; Armenian +blk; Arrows ; Arrows +blk; ASCII ; Basic_Latin +blk; Avestan ; Avestan +blk; Balinese ; Balinese +blk; Bamum ; Bamum +blk; Bamum_Sup ; Bamum_Supplement +blk; Bassa_Vah ; Bassa_Vah +blk; Batak ; Batak +blk; Bengali ; Bengali +blk; Bhaiksuki ; Bhaiksuki +blk; Block_Elements ; Block_Elements +blk; Bopomofo ; Bopomofo +blk; Bopomofo_Ext ; Bopomofo_Extended +blk; Box_Drawing ; Box_Drawing +blk; Brahmi ; Brahmi +blk; Braille ; Braille_Patterns +blk; Buginese ; Buginese +blk; Buhid ; Buhid +blk; Byzantine_Music ; Byzantine_Musical_Symbols +blk; Carian ; Carian +blk; Caucasian_Albanian ; Caucasian_Albanian +blk; Chakma ; Chakma +blk; Cham ; Cham +blk; Cherokee ; Cherokee +blk; Cherokee_Sup ; Cherokee_Supplement +blk; Chess_Symbols ; Chess_Symbols +blk; Chorasmian ; Chorasmian +blk; CJK ; CJK_Unified_Ideographs +blk; CJK_Compat ; CJK_Compatibility +blk; CJK_Compat_Forms ; CJK_Compatibility_Forms +blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs +blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement +blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A +blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B +blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C +blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D +blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E +blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F +blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G +blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement +blk; CJK_Strokes ; CJK_Strokes +blk; CJK_Symbols ; CJK_Symbols_And_Punctuation +blk; Compat_Jamo ; Hangul_Compatibility_Jamo +blk; Control_Pictures ; Control_Pictures +blk; Coptic ; Coptic +blk; Coptic_Epact_Numbers ; Coptic_Epact_Numbers +blk; Counting_Rod ; Counting_Rod_Numerals +blk; Cuneiform ; Cuneiform +blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation +blk; Currency_Symbols ; Currency_Symbols +blk; Cypriot_Syllabary ; Cypriot_Syllabary +blk; Cypro_Minoan ; Cypro_Minoan +blk; Cyrillic ; Cyrillic +blk; Cyrillic_Ext_A ; Cyrillic_Extended_A +blk; Cyrillic_Ext_B ; Cyrillic_Extended_B +blk; Cyrillic_Ext_C ; Cyrillic_Extended_C +blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary +blk; Deseret ; Deseret +blk; Devanagari ; Devanagari +blk; Devanagari_Ext ; Devanagari_Extended +blk; Diacriticals ; Combining_Diacritical_Marks +blk; Diacriticals_Ext ; Combining_Diacritical_Marks_Extended +blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols +blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement +blk; Dingbats ; Dingbats +blk; Dives_Akuru ; Dives_Akuru +blk; Dogra ; Dogra +blk; Domino ; Domino_Tiles +blk; Duployan ; Duployan +blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform +blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls +blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Elbasan ; Elbasan +blk; Elymaic ; Elymaic +blk; Emoticons ; Emoticons +blk; Enclosed_Alphanum ; Enclosed_Alphanumerics +blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement +blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months +blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement +blk; Ethiopic ; Ethiopic +blk; Ethiopic_Ext ; Ethiopic_Extended +blk; Ethiopic_Ext_A ; Ethiopic_Extended_A +blk; Ethiopic_Ext_B ; Ethiopic_Extended_B +blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Geometric_Shapes ; Geometric_Shapes +blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended +blk; Georgian ; Georgian +blk; Georgian_Ext ; Georgian_Extended +blk; Georgian_Sup ; Georgian_Supplement +blk; Glagolitic ; Glagolitic +blk; Glagolitic_Sup ; Glagolitic_Supplement +blk; Gothic ; Gothic +blk; Grantha ; Grantha +blk; Greek ; Greek_And_Coptic +blk; Greek_Ext ; Greek_Extended +blk; Gujarati ; Gujarati +blk; Gunjala_Gondi ; Gunjala_Gondi +blk; Gurmukhi ; Gurmukhi +blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms +blk; Half_Marks ; Combining_Half_Marks +blk; Hangul ; Hangul_Syllables +blk; Hanifi_Rohingya ; Hanifi_Rohingya +blk; Hanunoo ; Hanunoo +blk; Hatran ; Hatran +blk; Hebrew ; Hebrew +blk; High_PU_Surrogates ; High_Private_Use_Surrogates +blk; High_Surrogates ; High_Surrogates +blk; Hiragana ; Hiragana +blk; IDC ; Ideographic_Description_Characters +blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation +blk; Imperial_Aramaic ; Imperial_Aramaic +blk; Indic_Number_Forms ; Common_Indic_Number_Forms +blk; Indic_Siyaq_Numbers ; Indic_Siyaq_Numbers +blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi +blk; Inscriptional_Parthian ; Inscriptional_Parthian +blk; IPA_Ext ; IPA_Extensions +blk; Jamo ; Hangul_Jamo +blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A +blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B +blk; Javanese ; Javanese +blk; Kaithi ; Kaithi +blk; Kana_Ext_A ; Kana_Extended_A +blk; Kana_Ext_B ; Kana_Extended_B +blk; Kana_Sup ; Kana_Supplement +blk; Kanbun ; Kanbun +blk; Kangxi ; Kangxi_Radicals +blk; Kannada ; Kannada +blk; Katakana ; Katakana +blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kayah_Li ; Kayah_Li +blk; Kharoshthi ; Kharoshthi +blk; Khitan_Small_Script ; Khitan_Small_Script +blk; Khmer ; Khmer +blk; Khmer_Symbols ; Khmer_Symbols +blk; Khojki ; Khojki +blk; Khudawadi ; Khudawadi +blk; Lao ; Lao +blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 +blk; Latin_Ext_A ; Latin_Extended_A +blk; Latin_Ext_Additional ; Latin_Extended_Additional +blk; Latin_Ext_B ; Latin_Extended_B +blk; Latin_Ext_C ; Latin_Extended_C +blk; Latin_Ext_D ; Latin_Extended_D +blk; Latin_Ext_E ; Latin_Extended_E +blk; Latin_Ext_F ; Latin_Extended_F +blk; Latin_Ext_G ; Latin_Extended_G +blk; Lepcha ; Lepcha +blk; Letterlike_Symbols ; Letterlike_Symbols +blk; Limbu ; Limbu +blk; Linear_A ; Linear_A +blk; Linear_B_Ideograms ; Linear_B_Ideograms +blk; Linear_B_Syllabary ; Linear_B_Syllabary +blk; Lisu ; Lisu +blk; Lisu_Sup ; Lisu_Supplement +blk; Low_Surrogates ; Low_Surrogates +blk; Lycian ; Lycian +blk; Lydian ; Lydian +blk; Mahajani ; Mahajani +blk; Mahjong ; Mahjong_Tiles +blk; Makasar ; Makasar +blk; Malayalam ; Malayalam +blk; Mandaic ; Mandaic +blk; Manichaean ; Manichaean +blk; Marchen ; Marchen +blk; Masaram_Gondi ; Masaram_Gondi +blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols +blk; Math_Operators ; Mathematical_Operators +blk; Mayan_Numerals ; Mayan_Numerals +blk; Medefaidrin ; Medefaidrin +blk; Meetei_Mayek ; Meetei_Mayek +blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions +blk; Mende_Kikakui ; Mende_Kikakui +blk; Meroitic_Cursive ; Meroitic_Cursive +blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs +blk; Miao ; Miao +blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows +blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A +blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B +blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs +blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Technical ; Miscellaneous_Technical +blk; Modi ; Modi +blk; Modifier_Letters ; Spacing_Modifier_Letters +blk; Modifier_Tone_Letters ; Modifier_Tone_Letters +blk; Mongolian ; Mongolian +blk; Mongolian_Sup ; Mongolian_Supplement +blk; Mro ; Mro +blk; Multani ; Multani +blk; Music ; Musical_Symbols +blk; Myanmar ; Myanmar +blk; Myanmar_Ext_A ; Myanmar_Extended_A +blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Nabataean ; Nabataean +blk; Nandinagari ; Nandinagari +blk; NB ; No_Block +blk; New_Tai_Lue ; New_Tai_Lue +blk; Newa ; Newa +blk; NKo ; NKo +blk; Number_Forms ; Number_Forms +blk; Nushu ; Nushu +blk; Nyiakeng_Puachue_Hmong ; Nyiakeng_Puachue_Hmong +blk; OCR ; Optical_Character_Recognition +blk; Ogham ; Ogham +blk; Ol_Chiki ; Ol_Chiki +blk; Old_Hungarian ; Old_Hungarian +blk; Old_Italic ; Old_Italic +blk; Old_North_Arabian ; Old_North_Arabian +blk; Old_Permic ; Old_Permic +blk; Old_Persian ; Old_Persian +blk; Old_Sogdian ; Old_Sogdian +blk; Old_South_Arabian ; Old_South_Arabian +blk; Old_Turkic ; Old_Turkic +blk; Old_Uyghur ; Old_Uyghur +blk; Oriya ; Oriya +blk; Ornamental_Dingbats ; Ornamental_Dingbats +blk; Osage ; Osage +blk; Osmanya ; Osmanya +blk; Ottoman_Siyaq_Numbers ; Ottoman_Siyaq_Numbers +blk; Pahawh_Hmong ; Pahawh_Hmong +blk; Palmyrene ; Palmyrene +blk; Pau_Cin_Hau ; Pau_Cin_Hau +blk; Phags_Pa ; Phags_Pa +blk; Phaistos ; Phaistos_Disc +blk; Phoenician ; Phoenician +blk; Phonetic_Ext ; Phonetic_Extensions +blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement +blk; Playing_Cards ; Playing_Cards +blk; Psalter_Pahlavi ; Psalter_Pahlavi +blk; PUA ; Private_Use_Area ; Private_Use +blk; Punctuation ; General_Punctuation +blk; Rejang ; Rejang +blk; Rumi ; Rumi_Numeral_Symbols +blk; Runic ; Runic +blk; Samaritan ; Samaritan +blk; Saurashtra ; Saurashtra +blk; Sharada ; Sharada +blk; Shavian ; Shavian +blk; Shorthand_Format_Controls ; Shorthand_Format_Controls +blk; Siddham ; Siddham +blk; Sinhala ; Sinhala +blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers +blk; Small_Forms ; Small_Form_Variants +blk; Small_Kana_Ext ; Small_Kana_Extension +blk; Sogdian ; Sogdian +blk; Sora_Sompeng ; Sora_Sompeng +blk; Soyombo ; Soyombo +blk; Specials ; Specials +blk; Sundanese ; Sundanese +blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sup_Arrows_A ; Supplemental_Arrows_A +blk; Sup_Arrows_B ; Supplemental_Arrows_B +blk; Sup_Arrows_C ; Supplemental_Arrows_C +blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators +blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A +blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B +blk; Sup_Punctuation ; Supplemental_Punctuation +blk; Sup_Symbols_And_Pictographs ; Supplemental_Symbols_And_Pictographs +blk; Super_And_Sub ; Superscripts_And_Subscripts +blk; Sutton_SignWriting ; Sutton_SignWriting +blk; Syloti_Nagri ; Syloti_Nagri +blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A +blk; Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +blk; Syriac ; Syriac +blk; Syriac_Sup ; Syriac_Supplement +blk; Tagalog ; Tagalog +blk; Tagbanwa ; Tagbanwa +blk; Tags ; Tags +blk; Tai_Le ; Tai_Le +blk; Tai_Tham ; Tai_Tham +blk; Tai_Viet ; Tai_Viet +blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Takri ; Takri +blk; Tamil ; Tamil +blk; Tamil_Sup ; Tamil_Supplement +blk; Tangsa ; Tangsa +blk; Tangut ; Tangut +blk; Tangut_Components ; Tangut_Components +blk; Tangut_Sup ; Tangut_Supplement +blk; Telugu ; Telugu +blk; Thaana ; Thaana +blk; Thai ; Thai +blk; Tibetan ; Tibetan +blk; Tifinagh ; Tifinagh +blk; Tirhuta ; Tirhuta +blk; Toto ; Toto +blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended +blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A +blk; Ugaritic ; Ugaritic +blk; Vai ; Vai +blk; Vedic_Ext ; Vedic_Extensions +blk; Vertical_Forms ; Vertical_Forms +blk; Vithkuqi ; Vithkuqi +blk; VS ; Variation_Selectors +blk; VS_Sup ; Variation_Selectors_Supplement +blk; Wancho ; Wancho +blk; Warang_Citi ; Warang_Citi +blk; Yezidi ; Yezidi +blk; Yi_Radicals ; Yi_Radicals +blk; Yi_Syllables ; Yi_Syllables +blk; Yijing ; Yijing_Hexagram_Symbols +blk; Zanabazar_Square ; Zanabazar_Square +blk; Znamenny_Music ; Znamenny_Musical_Notation + +# Canonical_Combining_Class (ccc) + +ccc; 0; NR ; Not_Reordered +ccc; 1; OV ; Overlay +ccc; 6; HANR ; Han_Reading +ccc; 7; NK ; Nukta +ccc; 8; KV ; Kana_Voicing +ccc; 9; VR ; Virama +ccc; 10; CCC10 ; CCC10 +ccc; 11; CCC11 ; CCC11 +ccc; 12; CCC12 ; CCC12 +ccc; 13; CCC13 ; CCC13 +ccc; 14; CCC14 ; CCC14 +ccc; 15; CCC15 ; CCC15 +ccc; 16; CCC16 ; CCC16 +ccc; 17; CCC17 ; CCC17 +ccc; 18; CCC18 ; CCC18 +ccc; 19; CCC19 ; CCC19 +ccc; 20; CCC20 ; CCC20 +ccc; 21; CCC21 ; CCC21 +ccc; 22; CCC22 ; CCC22 +ccc; 23; CCC23 ; CCC23 +ccc; 24; CCC24 ; CCC24 +ccc; 25; CCC25 ; CCC25 +ccc; 26; CCC26 ; CCC26 +ccc; 27; CCC27 ; CCC27 +ccc; 28; CCC28 ; CCC28 +ccc; 29; CCC29 ; CCC29 +ccc; 30; CCC30 ; CCC30 +ccc; 31; CCC31 ; CCC31 +ccc; 32; CCC32 ; CCC32 +ccc; 33; CCC33 ; CCC33 +ccc; 34; CCC34 ; CCC34 +ccc; 35; CCC35 ; CCC35 +ccc; 36; CCC36 ; CCC36 +ccc; 84; CCC84 ; CCC84 +ccc; 91; CCC91 ; CCC91 +ccc; 103; CCC103 ; CCC103 +ccc; 107; CCC107 ; CCC107 +ccc; 118; CCC118 ; CCC118 +ccc; 122; CCC122 ; CCC122 +ccc; 129; CCC129 ; CCC129 +ccc; 130; CCC130 ; CCC130 +ccc; 132; CCC132 ; CCC132 +ccc; 133; CCC133 ; CCC133 # RESERVED +ccc; 200; ATBL ; Attached_Below_Left +ccc; 202; ATB ; Attached_Below +ccc; 214; ATA ; Attached_Above +ccc; 216; ATAR ; Attached_Above_Right +ccc; 218; BL ; Below_Left +ccc; 220; B ; Below +ccc; 222; BR ; Below_Right +ccc; 224; L ; Left +ccc; 226; R ; Right +ccc; 228; AL ; Above_Left +ccc; 230; A ; Above +ccc; 232; AR ; Above_Right +ccc; 233; DB ; Double_Below +ccc; 234; DA ; Double_Above +ccc; 240; IS ; Iota_Subscript + +# Case_Folding (cf) + +# @missing: 0000..10FFFF; Case_Folding; + +# Case_Ignorable (CI) + +CI ; N ; No ; F ; False +CI ; Y ; Yes ; T ; True + +# Cased (Cased) + +Cased; N ; No ; F ; False +Cased; Y ; Yes ; T ; True + +# Changes_When_Casefolded (CWCF) + +CWCF; N ; No ; F ; False +CWCF; Y ; Yes ; T ; True + +# Changes_When_Casemapped (CWCM) + +CWCM; N ; No ; F ; False +CWCM; Y ; Yes ; T ; True + +# Changes_When_Lowercased (CWL) + +CWL; N ; No ; F ; False +CWL; Y ; Yes ; T ; True + +# Changes_When_NFKC_Casefolded (CWKCF) + +CWKCF; N ; No ; F ; False +CWKCF; Y ; Yes ; T ; True + +# Changes_When_Titlecased (CWT) + +CWT; N ; No ; F ; False +CWT; Y ; Yes ; T ; True + +# Changes_When_Uppercased (CWU) + +CWU; N ; No ; F ; False +CWU; Y ; Yes ; T ; True + +# Composition_Exclusion (CE) + +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True + +# Dash (Dash) + +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True + +# Decomposition_Mapping (dm) + +# @missing: 0000..10FFFF; Decomposition_Mapping; + +# Decomposition_Type (dt) + +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; Wide ; wide + +# Default_Ignorable_Code_Point (DI) + +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True + +# Deprecated (Dep) + +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True + +# Diacritic (Dia) + +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True + +# East_Asian_Width (ea) + +ea ; A ; Ambiguous +ea ; F ; Fullwidth +ea ; H ; Halfwidth +ea ; N ; Neutral +ea ; Na ; Narrow +ea ; W ; Wide + +# Emoji (Emoji) + +Emoji; N ; No ; F ; False +Emoji; Y ; Yes ; T ; True + +# Emoji_Component (EComp) + +EComp; N ; No ; F ; False +EComp; Y ; Yes ; T ; True + +# Emoji_Modifier (EMod) + +EMod; N ; No ; F ; False +EMod; Y ; Yes ; T ; True + +# Emoji_Modifier_Base (EBase) + +EBase; N ; No ; F ; False +EBase; Y ; Yes ; T ; True + +# Emoji_Presentation (EPres) + +EPres; N ; No ; F ; False +EPres; Y ; Yes ; T ; True + +# Equivalent_Unified_Ideograph (EqUIdeo) + +# @missing: 0000..10FFFF; Equivalent_Unified_Ideograph; + +# Expands_On_NFC (XO_NFC) + +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True + +# Expands_On_NFD (XO_NFD) + +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True + +# Expands_On_NFKC (XO_NFKC) + +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True + +# Expands_On_NFKD (XO_NFKD) + +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True + +# Extended_Pictographic (ExtPict) + +ExtPict; N ; No ; F ; False +ExtPict; Y ; Yes ; T ; True + +# Extender (Ext) + +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True + +# FC_NFKC_Closure (FC_NFKC) + +# @missing: 0000..10FFFF; FC_NFKC_Closure; + +# Full_Composition_Exclusion (Comp_Ex) + +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True + +# General_Category (gc) + +gc ; C ; Other # Cc | Cf | Cn | Co | Cs +gc ; Cc ; Control ; cntrl +gc ; Cf ; Format +gc ; Cn ; Unassigned +gc ; Co ; Private_Use +gc ; Cs ; Surrogate +gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu +gc ; LC ; Cased_Letter # Ll | Lt | Lu +gc ; Ll ; Lowercase_Letter +gc ; Lm ; Modifier_Letter +gc ; Lo ; Other_Letter +gc ; Lt ; Titlecase_Letter +gc ; Lu ; Uppercase_Letter +gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn +gc ; Mc ; Spacing_Mark +gc ; Me ; Enclosing_Mark +gc ; Mn ; Nonspacing_Mark +gc ; N ; Number # Nd | Nl | No +gc ; Nd ; Decimal_Number ; digit +gc ; Nl ; Letter_Number +gc ; No ; Other_Number +gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps +gc ; Pc ; Connector_Punctuation +gc ; Pd ; Dash_Punctuation +gc ; Pe ; Close_Punctuation +gc ; Pf ; Final_Punctuation +gc ; Pi ; Initial_Punctuation +gc ; Po ; Other_Punctuation +gc ; Ps ; Open_Punctuation +gc ; S ; Symbol # Sc | Sk | Sm | So +gc ; Sc ; Currency_Symbol +gc ; Sk ; Modifier_Symbol +gc ; Sm ; Math_Symbol +gc ; So ; Other_Symbol +gc ; Z ; Separator # Zl | Zp | Zs +gc ; Zl ; Line_Separator +gc ; Zp ; Paragraph_Separator +gc ; Zs ; Space_Separator +# @missing: 0000..10FFFF; General_Category; Unassigned + +# Grapheme_Base (Gr_Base) + +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True + +# Grapheme_Cluster_Break (GCB) + +GCB; CN ; Control +GCB; CR ; CR +GCB; EB ; E_Base +GCB; EBG ; E_Base_GAZ +GCB; EM ; E_Modifier +GCB; EX ; Extend +GCB; GAZ ; Glue_After_Zwj +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; RI ; Regional_Indicator +GCB; SM ; SpacingMark +GCB; T ; T +GCB; V ; V +GCB; XX ; Other +GCB; ZWJ ; ZWJ + +# Grapheme_Extend (Gr_Ext) + +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True + +# Grapheme_Link (Gr_Link) + +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True + +# Hangul_Syllable_Type (hst) + +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo + +# Hex_Digit (Hex) + +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True + +# Hyphen (Hyphen) + +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True + +# IDS_Binary_Operator (IDSB) + +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True + +# IDS_Trinary_Operator (IDST) + +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True + +# ID_Continue (IDC) + +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True + +# ID_Start (IDS) + +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True + +# ISO_Comment (isc) + +# @missing: 0000..10FFFF; ISO_Comment; + +# Ideographic (Ideo) + +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Indic_Positional_Category (InPC) + +InPC; Bottom ; Bottom +InPC; Bottom_And_Left ; Bottom_And_Left +InPC; Bottom_And_Right ; Bottom_And_Right +InPC; Left ; Left +InPC; Left_And_Right ; Left_And_Right +InPC; NA ; NA +InPC; Overstruck ; Overstruck +InPC; Right ; Right +InPC; Top ; Top +InPC; Top_And_Bottom ; Top_And_Bottom +InPC; Top_And_Bottom_And_Left ; Top_And_Bottom_And_Left +InPC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right +InPC; Top_And_Left ; Top_And_Left +InPC; Top_And_Left_And_Right ; Top_And_Left_And_Right +InPC; Top_And_Right ; Top_And_Right +InPC; Visual_Order_Left ; Visual_Order_Left + +# Indic_Syllabic_Category (InSC) + +InSC; Avagraha ; Avagraha +InSC; Bindu ; Bindu +InSC; Brahmi_Joining_Number ; Brahmi_Joining_Number +InSC; Cantillation_Mark ; Cantillation_Mark +InSC; Consonant ; Consonant +InSC; Consonant_Dead ; Consonant_Dead +InSC; Consonant_Final ; Consonant_Final +InSC; Consonant_Head_Letter ; Consonant_Head_Letter +InSC; Consonant_Initial_Postfixed ; Consonant_Initial_Postfixed +InSC; Consonant_Killer ; Consonant_Killer +InSC; Consonant_Medial ; Consonant_Medial +InSC; Consonant_Placeholder ; Consonant_Placeholder +InSC; Consonant_Preceding_Repha ; Consonant_Preceding_Repha +InSC; Consonant_Prefixed ; Consonant_Prefixed +InSC; Consonant_Subjoined ; Consonant_Subjoined +InSC; Consonant_Succeeding_Repha ; Consonant_Succeeding_Repha +InSC; Consonant_With_Stacker ; Consonant_With_Stacker +InSC; Gemination_Mark ; Gemination_Mark +InSC; Invisible_Stacker ; Invisible_Stacker +InSC; Joiner ; Joiner +InSC; Modifying_Letter ; Modifying_Letter +InSC; Non_Joiner ; Non_Joiner +InSC; Nukta ; Nukta +InSC; Number ; Number +InSC; Number_Joiner ; Number_Joiner +InSC; Other ; Other +InSC; Pure_Killer ; Pure_Killer +InSC; Register_Shifter ; Register_Shifter +InSC; Syllable_Modifier ; Syllable_Modifier +InSC; Tone_Letter ; Tone_Letter +InSC; Tone_Mark ; Tone_Mark +InSC; Virama ; Virama +InSC; Visarga ; Visarga +InSC; Vowel ; Vowel +InSC; Vowel_Dependent ; Vowel_Dependent +InSC; Vowel_Independent ; Vowel_Independent + +# Jamo_Short_Name (JSN) + +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU +# @missing: 0000..10FFFF; Jamo_Short_Name; + +# Join_Control (Join_C) + +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True + +# Joining_Group (jg) + +jg ; African_Feh ; African_Feh +jg ; African_Noon ; African_Noon +jg ; African_Qaf ; African_Qaf +jg ; Ain ; Ain +jg ; Alaph ; Alaph +jg ; Alef ; Alef +jg ; Beh ; Beh +jg ; Beth ; Beth +jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Dal ; Dal +jg ; Dalath_Rish ; Dalath_Rish +jg ; E ; E +jg ; Farsi_Yeh ; Farsi_Yeh +jg ; Fe ; Fe +jg ; Feh ; Feh +jg ; Final_Semkath ; Final_Semkath +jg ; Gaf ; Gaf +jg ; Gamal ; Gamal +jg ; Hah ; Hah +jg ; Hanifi_Rohingya_Kinna_Ya ; Hanifi_Rohingya_Kinna_Ya +jg ; Hanifi_Rohingya_Pa ; Hanifi_Rohingya_Pa +jg ; He ; He +jg ; Heh ; Heh +jg ; Heh_Goal ; Heh_Goal +jg ; Heth ; Heth +jg ; Kaf ; Kaf +jg ; Kaph ; Kaph +jg ; Khaph ; Khaph +jg ; Knotted_Heh ; Knotted_Heh +jg ; Lam ; Lam +jg ; Lamadh ; Lamadh +jg ; Malayalam_Bha ; Malayalam_Bha +jg ; Malayalam_Ja ; Malayalam_Ja +jg ; Malayalam_Lla ; Malayalam_Lla +jg ; Malayalam_Llla ; Malayalam_Llla +jg ; Malayalam_Nga ; Malayalam_Nga +jg ; Malayalam_Nna ; Malayalam_Nna +jg ; Malayalam_Nnna ; Malayalam_Nnna +jg ; Malayalam_Nya ; Malayalam_Nya +jg ; Malayalam_Ra ; Malayalam_Ra +jg ; Malayalam_Ssa ; Malayalam_Ssa +jg ; Malayalam_Tta ; Malayalam_Tta +jg ; Manichaean_Aleph ; Manichaean_Aleph +jg ; Manichaean_Ayin ; Manichaean_Ayin +jg ; Manichaean_Beth ; Manichaean_Beth +jg ; Manichaean_Daleth ; Manichaean_Daleth +jg ; Manichaean_Dhamedh ; Manichaean_Dhamedh +jg ; Manichaean_Five ; Manichaean_Five +jg ; Manichaean_Gimel ; Manichaean_Gimel +jg ; Manichaean_Heth ; Manichaean_Heth +jg ; Manichaean_Hundred ; Manichaean_Hundred +jg ; Manichaean_Kaph ; Manichaean_Kaph +jg ; Manichaean_Lamedh ; Manichaean_Lamedh +jg ; Manichaean_Mem ; Manichaean_Mem +jg ; Manichaean_Nun ; Manichaean_Nun +jg ; Manichaean_One ; Manichaean_One +jg ; Manichaean_Pe ; Manichaean_Pe +jg ; Manichaean_Qoph ; Manichaean_Qoph +jg ; Manichaean_Resh ; Manichaean_Resh +jg ; Manichaean_Sadhe ; Manichaean_Sadhe +jg ; Manichaean_Samekh ; Manichaean_Samekh +jg ; Manichaean_Taw ; Manichaean_Taw +jg ; Manichaean_Ten ; Manichaean_Ten +jg ; Manichaean_Teth ; Manichaean_Teth +jg ; Manichaean_Thamedh ; Manichaean_Thamedh +jg ; Manichaean_Twenty ; Manichaean_Twenty +jg ; Manichaean_Waw ; Manichaean_Waw +jg ; Manichaean_Yodh ; Manichaean_Yodh +jg ; Manichaean_Zayin ; Manichaean_Zayin +jg ; Meem ; Meem +jg ; Mim ; Mim +jg ; No_Joining_Group ; No_Joining_Group +jg ; Noon ; Noon +jg ; Nun ; Nun +jg ; Nya ; Nya +jg ; Pe ; Pe +jg ; Qaf ; Qaf +jg ; Qaph ; Qaph +jg ; Reh ; Reh +jg ; Reversed_Pe ; Reversed_Pe +jg ; Rohingya_Yeh ; Rohingya_Yeh +jg ; Sad ; Sad +jg ; Sadhe ; Sadhe +jg ; Seen ; Seen +jg ; Semkath ; Semkath +jg ; Shin ; Shin +jg ; Straight_Waw ; Straight_Waw +jg ; Swash_Kaf ; Swash_Kaf +jg ; Syriac_Waw ; Syriac_Waw +jg ; Tah ; Tah +jg ; Taw ; Taw +jg ; Teh_Marbuta ; Teh_Marbuta +jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teth ; Teth +jg ; Thin_Yeh ; Thin_Yeh +jg ; Vertical_Tail ; Vertical_Tail +jg ; Waw ; Waw +jg ; Yeh ; Yeh +jg ; Yeh_Barree ; Yeh_Barree +jg ; Yeh_With_Tail ; Yeh_With_Tail +jg ; Yudh ; Yudh +jg ; Yudh_He ; Yudh_He +jg ; Zain ; Zain +jg ; Zhain ; Zhain + +# Joining_Type (jt) + +jt ; C ; Join_Causing +jt ; D ; Dual_Joining +jt ; L ; Left_Joining +jt ; R ; Right_Joining +jt ; T ; Transparent +jt ; U ; Non_Joining + +# Line_Break (lb) + +lb ; AI ; Ambiguous +lb ; AL ; Alphabetic +lb ; B2 ; Break_Both +lb ; BA ; Break_After +lb ; BB ; Break_Before +lb ; BK ; Mandatory_Break +lb ; CB ; Contingent_Break +lb ; CJ ; Conditional_Japanese_Starter +lb ; CL ; Close_Punctuation +lb ; CM ; Combining_Mark +lb ; CP ; Close_Parenthesis +lb ; CR ; Carriage_Return +lb ; EB ; E_Base +lb ; EM ; E_Modifier +lb ; EX ; Exclamation +lb ; GL ; Glue +lb ; H2 ; H2 +lb ; H3 ; H3 +lb ; HL ; Hebrew_Letter +lb ; HY ; Hyphen +lb ; ID ; Ideographic +lb ; IN ; Inseparable ; Inseperable +lb ; IS ; Infix_Numeric +lb ; JL ; JL +lb ; JT ; JT +lb ; JV ; JV +lb ; LF ; Line_Feed +lb ; NL ; Next_Line +lb ; NS ; Nonstarter +lb ; NU ; Numeric +lb ; OP ; Open_Punctuation +lb ; PO ; Postfix_Numeric +lb ; PR ; Prefix_Numeric +lb ; QU ; Quotation +lb ; RI ; Regional_Indicator +lb ; SA ; Complex_Context +lb ; SG ; Surrogate +lb ; SP ; Space +lb ; SY ; Break_Symbols +lb ; WJ ; Word_Joiner +lb ; XX ; Unknown +lb ; ZW ; ZWSpace +lb ; ZWJ ; ZWJ + +# Logical_Order_Exception (LOE) + +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True + +# Lowercase (Lower) + +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True + +# Lowercase_Mapping (lc) + +# @missing: 0000..10FFFF; Lowercase_Mapping; + +# Math (Math) + +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True + +# NFC_Quick_Check (NFC_QC) + +NFC_QC; M ; Maybe +NFC_QC; N ; No +NFC_QC; Y ; Yes + +# NFD_Quick_Check (NFD_QC) + +NFD_QC; N ; No +NFD_QC; Y ; Yes + +# NFKC_Casefold (NFKC_CF) + +# @missing: 0000..10FFFF; NFKC_Casefold; + +# NFKC_Quick_Check (NFKC_QC) + +NFKC_QC; M ; Maybe +NFKC_QC; N ; No +NFKC_QC; Y ; Yes + +# NFKD_Quick_Check (NFKD_QC) + +NFKD_QC; N ; No +NFKD_QC; Y ; Yes + +# Name (na) + +# @missing: 0000..10FFFF; Name; + +# Name_Alias (Name_Alias) + +# @missing: 0000..10FFFF; Name_Alias; + +# Noncharacter_Code_Point (NChar) + +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True + +# Numeric_Type (nt) + +nt ; De ; Decimal +nt ; Di ; Digit +nt ; None ; None +nt ; Nu ; Numeric + +# Numeric_Value (nv) + +# @missing: 0000..10FFFF; Numeric_Value; NaN + +# Other_Alphabetic (OAlpha) + +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True + +# Other_Default_Ignorable_Code_Point (ODI) + +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True + +# Other_Grapheme_Extend (OGr_Ext) + +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True + +# Other_ID_Continue (OIDC) + +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True + +# Other_ID_Start (OIDS) + +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True + +# Other_Lowercase (OLower) + +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True + +# Other_Math (OMath) + +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True + +# Other_Uppercase (OUpper) + +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True + +# Pattern_Syntax (Pat_Syn) + +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True + +# Pattern_White_Space (Pat_WS) + +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True + +# Prepended_Concatenation_Mark (PCM) + +PCM; N ; No ; F ; False +PCM; Y ; Yes ; T ; True + +# Quotation_Mark (QMark) + +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True + +# Radical (Radical) + +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True + +# Regional_Indicator (RI) + +RI ; N ; No ; F ; False +RI ; Y ; Yes ; T ; True + +# Script (sc) + +sc ; Adlm ; Adlam +sc ; Aghb ; Caucasian_Albanian +sc ; Ahom ; Ahom +sc ; Arab ; Arabic +sc ; Armi ; Imperial_Aramaic +sc ; Armn ; Armenian +sc ; Avst ; Avestan +sc ; Bali ; Balinese +sc ; Bamu ; Bamum +sc ; Bass ; Bassa_Vah +sc ; Batk ; Batak +sc ; Beng ; Bengali +sc ; Bhks ; Bhaiksuki +sc ; Bopo ; Bopomofo +sc ; Brah ; Brahmi +sc ; Brai ; Braille +sc ; Bugi ; Buginese +sc ; Buhd ; Buhid +sc ; Cakm ; Chakma +sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham +sc ; Cher ; Cherokee +sc ; Chrs ; Chorasmian +sc ; Copt ; Coptic ; Qaac +sc ; Cpmn ; Cypro_Minoan +sc ; Cprt ; Cypriot +sc ; Cyrl ; Cyrillic +sc ; Deva ; Devanagari +sc ; Diak ; Dives_Akuru +sc ; Dogr ; Dogra +sc ; Dsrt ; Deseret +sc ; Dupl ; Duployan +sc ; Egyp ; Egyptian_Hieroglyphs +sc ; Elba ; Elbasan +sc ; Elym ; Elymaic +sc ; Ethi ; Ethiopic +sc ; Geor ; Georgian +sc ; Glag ; Glagolitic +sc ; Gong ; Gunjala_Gondi +sc ; Gonm ; Masaram_Gondi +sc ; Goth ; Gothic +sc ; Gran ; Grantha +sc ; Grek ; Greek +sc ; Gujr ; Gujarati +sc ; Guru ; Gurmukhi +sc ; Hang ; Hangul +sc ; Hani ; Han +sc ; Hano ; Hanunoo +sc ; Hatr ; Hatran +sc ; Hebr ; Hebrew +sc ; Hira ; Hiragana +sc ; Hluw ; Anatolian_Hieroglyphs +sc ; Hmng ; Pahawh_Hmong +sc ; Hmnp ; Nyiakeng_Puachue_Hmong +sc ; Hrkt ; Katakana_Or_Hiragana +sc ; Hung ; Old_Hungarian +sc ; Ital ; Old_Italic +sc ; Java ; Javanese +sc ; Kali ; Kayah_Li +sc ; Kana ; Katakana +sc ; Khar ; Kharoshthi +sc ; Khmr ; Khmer +sc ; Khoj ; Khojki +sc ; Kits ; Khitan_Small_Script +sc ; Knda ; Kannada +sc ; Kthi ; Kaithi +sc ; Lana ; Tai_Tham +sc ; Laoo ; Lao +sc ; Latn ; Latin +sc ; Lepc ; Lepcha +sc ; Limb ; Limbu +sc ; Lina ; Linear_A +sc ; Linb ; Linear_B +sc ; Lisu ; Lisu +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian +sc ; Mahj ; Mahajani +sc ; Maka ; Makasar +sc ; Mand ; Mandaic +sc ; Mani ; Manichaean +sc ; Marc ; Marchen +sc ; Medf ; Medefaidrin +sc ; Mend ; Mende_Kikakui +sc ; Merc ; Meroitic_Cursive +sc ; Mero ; Meroitic_Hieroglyphs +sc ; Mlym ; Malayalam +sc ; Modi ; Modi +sc ; Mong ; Mongolian +sc ; Mroo ; Mro +sc ; Mtei ; Meetei_Mayek +sc ; Mult ; Multani +sc ; Mymr ; Myanmar +sc ; Nand ; Nandinagari +sc ; Narb ; Old_North_Arabian +sc ; Nbat ; Nabataean +sc ; Newa ; Newa +sc ; Nkoo ; Nko +sc ; Nshu ; Nushu +sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki +sc ; Orkh ; Old_Turkic +sc ; Orya ; Oriya +sc ; Osge ; Osage +sc ; Osma ; Osmanya +sc ; Ougr ; Old_Uyghur +sc ; Palm ; Palmyrene +sc ; Pauc ; Pau_Cin_Hau +sc ; Perm ; Old_Permic +sc ; Phag ; Phags_Pa +sc ; Phli ; Inscriptional_Pahlavi +sc ; Phlp ; Psalter_Pahlavi +sc ; Phnx ; Phoenician +sc ; Plrd ; Miao +sc ; Prti ; Inscriptional_Parthian +sc ; Rjng ; Rejang +sc ; Rohg ; Hanifi_Rohingya +sc ; Runr ; Runic +sc ; Samr ; Samaritan +sc ; Sarb ; Old_South_Arabian +sc ; Saur ; Saurashtra +sc ; Sgnw ; SignWriting +sc ; Shaw ; Shavian +sc ; Shrd ; Sharada +sc ; Sidd ; Siddham +sc ; Sind ; Khudawadi +sc ; Sinh ; Sinhala +sc ; Sogd ; Sogdian +sc ; Sogo ; Old_Sogdian +sc ; Sora ; Sora_Sompeng +sc ; Soyo ; Soyombo +sc ; Sund ; Sundanese +sc ; Sylo ; Syloti_Nagri +sc ; Syrc ; Syriac +sc ; Tagb ; Tagbanwa +sc ; Takr ; Takri +sc ; Tale ; Tai_Le +sc ; Talu ; New_Tai_Lue +sc ; Taml ; Tamil +sc ; Tang ; Tangut +sc ; Tavt ; Tai_Viet +sc ; Telu ; Telugu +sc ; Tfng ; Tifinagh +sc ; Tglg ; Tagalog +sc ; Thaa ; Thaana +sc ; Thai ; Thai +sc ; Tibt ; Tibetan +sc ; Tirh ; Tirhuta +sc ; Tnsa ; Tangsa +sc ; Toto ; Toto +sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai +sc ; Vith ; Vithkuqi +sc ; Wara ; Warang_Citi +sc ; Wcho ; Wancho +sc ; Xpeo ; Old_Persian +sc ; Xsux ; Cuneiform +sc ; Yezi ; Yezidi +sc ; Yiii ; Yi +sc ; Zanb ; Zanabazar_Square +sc ; Zinh ; Inherited ; Qaai +sc ; Zyyy ; Common +sc ; Zzzz ; Unknown + +# Script_Extensions (scx) + +# @missing: 0000..10FFFF; Script_Extensions;