How should I wrap the text to get the same result in any engine
provided that "the user" can type the actual Japanese char, not some
escaping sequences?
This isn't currently possible with a single command. But if you don't care about upLaTeX, then the following example
prints E782B9 with pdfLaTeX, XeLaTeX, and LuaLaTeX:
\ExplSyntaxOn
\str_set_convert:Nnnn \l_tmpa_str { 点 } { default } { utf8/hex }
\iow_term:e { \l_tmpa_str }
\use:c { @@end }
Here's a sample file that tests the encoding functions on various
characters with various engines:
\ExplSyntaxOn
\tl_new:N \l__example_char_tl
\tl_new:N \l__example_from_tl
\tl_new:N \l__example_to_tl
\str_new:N \l__example_result_str
\str_new:N \l__example_codes_str
\str_new:N \g__example_error_str
\cs_set:Nn \__msg_interrupt_wrap:nnn {
\str_gset:Ne \g__example_error_str { #1 }
}
\cs_generate_variant:Nn \str_set_convert:Nnnn { NVVV }
\cs_generate_variant:Nn \iow_term:n { V }
\cs_generate_variant:Nn \tl_analysis_map_inline:nn { Vn }
\iow_term:e {
| ~
Engine \c_space_tl | ~
Char \c_space_tl | ~
From \c_space_tl | ~
To \c_space_tl | ~
Error \c_space_tl | ~
Result \c_space_tl | ~
Codepoints \c_space_tl | ~
}
\iow_term:e {
| ~
--- \c_space_tl | ~
--- \c_space_tl | ~
--- \c_space_tl | ~
--- \c_space_tl | ~
--- \c_space_tl | ~
--- \c_space_tl | ~
--- \c_space_tl | ~
}
\cs_new:Nn \__example_error_status: {
\bool_if:NTF \g__example_error_bool { Error } { — }
}
\clist_map_variable:nNn { x, á, 点 } \l__example_char_tl {
\clist_map_variable:nNn { {} , utf8, default } \l__example_from_tl {
\clist_map_variable:nNn { {}, utf8, utf8 / hex } \l__example_to_tl {
\str_clear:N \g__example_error_str
\str_set_convert:NVVV
\l__example_result_str
\l__example_char_tl
\l__example_from_tl
\l__example_to_tl
\str_clear:N \l__example_codes_str
\tl_analysis_map_inline:Vn \l__example_result_str {
\str_put_right:Ne \l__example_codes_str {
\int_to_Hex:n { #2 } \c_space_tl
}
}
\iow_term:e {
| ~
\c_sys_engine_str \c_space_tl | ~
\l__example_char_tl \c_space_tl | ~
\l__example_from_tl \c_space_tl | ~
\l__example_to_tl \c_space_tl | ~
\g__example_error_str \c_space_tl | ~
\l__example_result_str \c_space_tl | ~
\l__example_codes_str \c_space_tl | ~
}
}
}
}
\use:c { @@end }
And here are the results:
| Engine |
Char |
From |
To |
Error |
Result |
Codepoints |
| pdfTeX |
x |
|
|
|
x |
78 |
| XeTeX |
x |
|
|
|
x |
78 |
| LuaTeX |
x |
|
|
|
x |
78 |
| upTeX |
x |
|
|
|
x |
78 |
| pdfTeX |
x |
|
utf8 |
|
x |
78 |
| XeTeX |
x |
|
utf8 |
|
x |
78 |
| LuaTeX |
x |
|
utf8 |
|
x |
78 |
| upTeX |
x |
|
utf8 |
|
x |
78 |
| pdfTeX |
x |
|
utf8/hex |
|
78 |
37 38 |
| XeTeX |
x |
|
utf8/hex |
|
78 |
37 38 |
| LuaTeX |
x |
|
utf8/hex |
|
78 |
37 38 |
| upTeX |
x |
|
utf8/hex |
|
78 |
37 38 |
| pdfTeX |
x |
default |
|
|
x |
78 |
| XeTeX |
x |
default |
|
|
x |
78 |
| LuaTeX |
x |
default |
|
|
x |
78 |
| upTeX |
x |
default |
|
|
x |
78 |
| pdfTeX |
x |
default |
utf8 |
|
x |
78 |
| XeTeX |
x |
default |
utf8 |
|
x |
78 |
| LuaTeX |
x |
default |
utf8 |
|
x |
78 |
| upTeX |
x |
default |
utf8 |
|
x |
78 |
| pdfTeX |
x |
default |
utf8/hex |
|
78 |
37 38 |
| XeTeX |
x |
default |
utf8/hex |
|
78 |
37 38 |
| LuaTeX |
x |
default |
utf8/hex |
|
78 |
37 38 |
| upTeX |
x |
default |
utf8/hex |
|
78 |
37 38 |
| pdfTeX |
x |
utf8 |
|
|
x |
78 |
| XeTeX |
x |
utf8 |
|
|
x |
78 |
| LuaTeX |
x |
utf8 |
|
|
x |
78 |
| upTeX |
x |
utf8 |
|
|
x |
78 |
| pdfTeX |
x |
utf8 |
utf8 |
|
x |
78 |
| XeTeX |
x |
utf8 |
utf8 |
|
x |
78 |
| LuaTeX |
x |
utf8 |
utf8 |
|
x |
78 |
| upTeX |
x |
utf8 |
utf8 |
|
x |
78 |
| pdfTeX |
x |
utf8 |
utf8/hex |
|
78 |
37 38 |
| XeTeX |
x |
utf8 |
utf8/hex |
|
78 |
37 38 |
| LuaTeX |
x |
utf8 |
utf8/hex |
|
78 |
37 38 |
| upTeX |
x |
utf8 |
utf8/hex |
|
78 |
37 38 |
| pdfTeX |
á |
|
|
|
á |
C3 A1 |
| XeTeX |
á |
|
|
|
á |
E1 |
| LuaTeX |
á |
|
|
|
á |
E1 |
| upTeX |
á |
|
|
|
^^c3^^a1 |
C3 A1 |
| pdfTeX |
á |
|
utf8 |
|
á |
C3 83 C2 A1 |
| XeTeX |
á |
|
utf8 |
|
á |
C3 A1 |
| LuaTeX |
á |
|
utf8 |
|
á |
C3 A1 |
| upTeX |
á |
|
utf8 |
|
^^c3^^83^^c2^^a1 |
C3 83 C2 A1 |
| pdfTeX |
á |
|
utf8/hex |
|
C383C2A1 |
43 33 38 33 43 32 41 31 |
| XeTeX |
á |
|
utf8/hex |
|
C3A1 |
43 33 41 31 |
| LuaTeX |
á |
|
utf8/hex |
|
C3A1 |
43 33 41 31 |
| upTeX |
á |
|
utf8/hex |
|
C383C2A1 |
43 33 38 33 43 32 41 31 |
| pdfTeX |
á |
default |
|
|
� |
E1 |
| XeTeX |
á |
default |
|
|
á |
E1 |
| LuaTeX |
á |
default |
|
|
á |
E1 |
| upTeX |
á |
default |
|
|
^^e1 |
E1 |
| pdfTeX |
á |
default |
utf8 |
|
á |
C3 A1 |
| XeTeX |
á |
default |
utf8 |
|
á |
C3 A1 |
| LuaTeX |
á |
default |
utf8 |
|
á |
C3 A1 |
| upTeX |
á |
default |
utf8 |
|
^^c3^^a1 |
C3 A1 |
| pdfTeX |
á |
default |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| XeTeX |
á |
default |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| LuaTeX |
á |
default |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| upTeX |
á |
default |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| pdfTeX |
á |
utf8 |
|
|
� |
E1 |
| XeTeX |
á |
utf8 |
|
Invalid UTF-8 string: missing continuation byte (x1). |
� |
FFFD |
| LuaTeX |
á |
utf8 |
|
Invalid UTF-8 string: missing continuation byte (x1). |
� |
FFFD |
| upTeX |
á |
utf8 |
|
|
^^e1 |
E1 |
| pdfTeX |
á |
utf8 |
utf8 |
|
á |
C3 A1 |
| XeTeX |
á |
utf8 |
utf8 |
Invalid UTF-8 string: missing continuation byte (x1). |
� |
EF BF BD |
| LuaTeX |
á |
utf8 |
utf8 |
Invalid UTF-8 string: missing continuation byte (x1). |
� |
EF BF BD |
| upTeX |
á |
utf8 |
utf8 |
|
^^c3^^a1 |
C3 A1 |
| pdfTeX |
á |
utf8 |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| XeTeX |
á |
utf8 |
utf8/hex |
Invalid UTF-8 string: missing continuation byte (x1). |
EFBFBD |
45 46 42 46 42 44 |
| LuaTeX |
á |
utf8 |
utf8/hex |
Invalid UTF-8 string: missing continuation byte (x1). |
EFBFBD |
45 46 42 46 42 44 |
| upTeX |
á |
utf8 |
utf8/hex |
|
C3A1 |
43 33 41 31 |
| pdfTeX |
点 |
|
|
|
点 |
E7 82 B9 |
| XeTeX |
点 |
|
|
|
点 |
70B9 |
| LuaTeX |
点 |
|
|
|
点 |
70B9 |
| upTeX |
点 |
|
|
Character code too large for this engine. |
? |
3F |
| pdfTeX |
点 |
|
utf8 |
|
ç¹ |
C3 A7 C2 82 C2 B9 |
| XeTeX |
点 |
|
utf8 |
|
ç^^82¹ |
E7 82 B9 |
| LuaTeX |
点 |
|
utf8 |
|
ç¹ |
E7 82 B9 |
| upTeX |
点 |
|
utf8 |
|
^^e7^^82^^b9 |
E7 82 B9 |
| pdfTeX |
点 |
|
utf8/hex |
|
C3A7C282C2B9 |
43 33 41 37 43 32 38 32 43 32 42 39 |
| XeTeX |
点 |
|
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| LuaTeX |
点 |
|
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| upTeX |
点 |
|
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| pdfTeX |
点 |
default |
|
Character code too large for this engine. |
? |
3F |
| XeTeX |
点 |
default |
|
|
点 |
70B9 |
| LuaTeX |
点 |
default |
|
|
点 |
70B9 |
| upTeX |
点 |
default |
|
Character code too large for this engine. |
? |
3F |
| pdfTeX |
点 |
default |
utf8 |
|
点 |
E7 82 B9 |
| XeTeX |
点 |
default |
utf8 |
|
ç^^82¹ |
E7 82 B9 |
| LuaTeX |
点 |
default |
utf8 |
|
ç¹ |
E7 82 B9 |
| upTeX |
点 |
default |
utf8 |
Invalid UTF-8 string: missing continuation byte (x1). |
^^ef^^bf^^bd |
EF BF BD |
| pdfTeX |
点 |
default |
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| XeTeX |
点 |
default |
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| LuaTeX |
点 |
default |
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| upTeX |
点 |
default |
utf8/hex |
Invalid UTF-8 string: missing continuation byte (x1). |
EFBFBD |
45 46 42 46 42 44 |
| pdfTeX |
点 |
utf8 |
|
Character code too large for this engine. |
? |
3F |
| XeTeX |
点 |
utf8 |
|
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| LuaTeX |
点 |
utf8 |
|
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| upTeX |
点 |
utf8 |
|
Character code too large for this engine. |
? |
3F |
| pdfTeX |
点 |
utf8 |
utf8 |
|
点 |
E7 82 B9 |
| XeTeX |
点 |
utf8 |
utf8 |
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| LuaTeX |
点 |
utf8 |
utf8 |
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| upTeX |
点 |
utf8 |
utf8 |
Invalid UTF-8 string: missing continuation byte (x1). |
^^ef^^bf^^bd |
EF BF BD |
| pdfTeX |
点 |
utf8 |
utf8/hex |
|
E782B9 |
45 37 38 32 42 39 |
| XeTeX |
点 |
utf8 |
utf8/hex |
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| LuaTeX |
点 |
utf8 |
utf8/hex |
String invalid in escaping 'bytes': it may only contain bytes. |
|
|
| upTeX |
点 |
utf8 |
utf8/hex |
Invalid UTF-8 string: missing continuation byte (x1). |
EFBFBD |
45 46 42 46 42 44 |
As you can see, there is no single option that returns E782B9 with all
engines.