convert unicode text to code point
convert unicode text to code point
- Subject: convert unicode text to code point
- From: bill <email@hidden>
- Date: Mon, 16 Jan 2006 06:55:54 +0800
Hello,
Ooops, nul character is no good, send again :)
Last week I re-read an old thread "Re: Producing Unicode-only
characters" and found that Niger suggested an exploitation of the
single-record-list-to-string coercion bug:
<http://lists.apple.com/archives/applescript-users/2005/Oct/
msg00840.html>
which's a quite different way for converting unicode text to code
point than the temporary file approach.
After some experimentations, some interesting stuffs:
-- script
set uText to ("z" as Unicode text)
set gText to {{a:uText}} as string
-- visually, the result is "dle2TEXT0usrflistTEXTAutxtz"
However, if we try:
-- script
set uText to ("z" as Unicode text)
set gText to {{a:uText}} as string
set gList to {}
repeat with i in (characters of gText)
set end of gList to {(contents of i), ASCII number (contents of i)}
end repeat
gList -- {{"d", 100}, {"l", 108}, {"e", 101}, {"2", 50}, {"", 0},
{"", 0}, {"", 0}, {"", 0}, {"T", 84}, {"E", 69}, {"X", 88}, {"T",
84}, {"", 0}, {"", 0}, {"", 0}, {"0", 48}, {"", 0}, {"", 0}, {"", 0},
{"", 1}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"u", 117}, {"s", 115},
{"r", 114}, {"f", 102}, {"l", 108}, {"i", 105}, {"s", 115}, {"t",
116}, {"", 0}, {"", 0}, {"", 0}, {"", 28}, {"", 0}, {"", 0}, {"",
0}, {"", 2}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"T", 84}, {"E",
69}, {"X", 88}, {"T", 84}, {"", 0}, {"", 0}, {"", 0}, {"", 1}, {"a",
97}, {"", 0}, {"u", 117}, {"t", 116}, {"x", 120}, {"t", 116}, {"",
0}, {"", 0}, {"", 0}, {"", 2}, {"", 0}, {"z", 122}}
Then, if we change the record label to "abc" & uText to "xyz",
-- script
set uText to ("xyz" as Unicode text)
set gText to {{abc:uText}} as string
set gList to {}
repeat with i in (characters of gText)
set end of gList to {(contents of i), ASCII number (contents of i)}
end repeat
gList -- {{"d", 100}, {"l", 108}, {"e", 101}, {"2", 50}, {"", 0},
{"", 0}, {"", 0}, {"", 0}, {"T", 84}, {"E", 69}, {"X", 88}, {"T",
84}, {"", 0}, {"", 0}, {"", 0}, {"6", 54}, {"", 0}, {"", 0}, {"", 0},
{"", 1}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"u", 117}, {"s", 115},
{"r", 114}, {"f", 102}, {"l", 108}, {"i", 105}, {"s", 115}, {"t",
116}, {"", 0}, {"", 0}, {"", 0}, {"\"", 34}, {"", 0}, {"", 0}, {"",
0}, {"", 2}, {"", 0}, {"", 0}, {"", 0}, {"", 0}, {"T", 84}, {"E",
69}, {"X", 88}, {"T", 84}, {"", 0}, {"", 0}, {"", 0}, {"", 3}, {"a",
97}, {"b", 98}, {"c", 99}, {"", 0}, {"u", 117}, {"t", 116}, {"x",
120}, {"t", 116}, {"", 0}, {"", 0}, {"", 0}, {"", 6}, {"", 0}, {"x",
120}, {"", 0}, {"y", 121}, {"", 0}, {"z", 122}}
One can find that the number before "abc" changed from 1 to 3, that I
suspected it represented the length of the record label. Similarly,
the fourth number after "utxt" changed from 2 to 6, which I suspected
it represented the length of record value.
So, if we use unicode text:
-- script
set uText to («data utxt60B2» as Unicode text)
set gText to {{abc:uText}} as string
set gList to text 65 thru -1 of gText
-- "`≤" -- appear to be garbled
set uList to {}
repeat with i in gList
set end of uList to ASCII number (contents of i)
end repeat
uList -- {96, 178}
Voilà, decimal 96 is hexadecimal 60; and decimal 178 is hexadecimal
B2 :)
More unicode characters:
-- script
-- U+60B2, U+0000 & U+2F9F4 (U+D87E, U+DDF4)
set uText to («data utxt60B20000D87EDDF4» as Unicode text)
set gText to {{a:uText}} as string
set gList to text 63 thru -1 of gText
set uList to {}
repeat with i in gList
set end of uList to ASCII number (contents of i)
end repeat
uList -- {96, 178, 0, 0, 216, 126, 221, 244}
So, based on Nigel Garvey's exploitation, and Mark J. Reed's
explanation on code point beyond U+FFFF; I would suggest this handler
for converting unicode text to code point.
-- handler starts
-- for handler codePoint from utxt
property hexcode : {"00", "01", "02", "03", "04", "05", "06", "07",
"08", "09", "0A", "0B", "0C", "0D", "0E", "0F", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D",
"1E", "1F", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "2A", "2B", "2C", "2D", "2E", "2F", "30", "31", "32", "33",
"34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E",
"3F", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49",
"4A", "4B", "4C", "4D", "4E", "4F", "50", "51", "52", "53", "54",
"55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A",
"6B", "6C", "6D", "6E", "6F", "70", "71", "72", "73", "74", "75",
"76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F", "80",
"81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B",
"8C", "8D", "8E", "8F", "90", "91", "92", "93", "94", "95", "96",
"97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F", "A0", "A1",
"A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC",
"AD", "AE", "AF", "B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7",
"B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF", "C0", "C1", "C2",
"C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD",
"CE", "CF", "D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8",
"D9", "DA", "DB", "DC", "DD", "DE", "DF", "E0", "E1", "E2", "E3",
"E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE",
"EF", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9",
"FA", "FB", "FC", "FD", "FE", "FF"}
on codePoint from unicodeText
-- purpose: find the code point from unicode text
-- input: unicodeText: unicode text
-- output: unicode text
set uCodePoint to "" as Unicode text
set garbledList to items 63 thru -1 of ({{a:unicodeText}} as text)
repeat with i from 1 to (count of garbledList)
set uCodePoint to uCodePoint & my hexcode's item ((ASCII number
(item i of garbledList)) + 1)
end repeat
end codePoint
-- handler ends
codePoint of me from «data utxt60B20000D87EDDF4» as Unicode text
-- "60B20000D87EDDF4"
Well, I tried to use repeat loop to simulate the function of ASCII
number, however, the speed is very slow. Hope someone on the list can
improve this one, and make it a vanilla method :)
bill
Attachment:
smime.p7s
Description: S/MIME cryptographic signature
_______________________________________________
Do not post admin requests to the list. They will be ignored.
Applescript-users mailing list (email@hidden)
Help/Unsubscribe/Update your Subscription:
This email sent to email@hidden