• Open Menu Close Menu
  • Apple
  • Shopping Bag
  • Apple
  • Mac
  • iPad
  • iPhone
  • Watch
  • TV
  • Music
  • Support
  • Search apple.com
  • Shopping Bag

Lists

Open Menu Close Menu
  • Terms and Conditions
  • Lists hosted on this site
  • Email the Postmaster
  • Tips for posting to public mailing lists
Re: Producing Unicode-only characters
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Producing Unicode-only characters


  • Subject: Re: Producing Unicode-only characters
  • From: "Mark J. Reed" <email@hidden>
  • Date: Wed, 26 Oct 2005 11:42:43 -0400

Here's a slightly more complete and legible version of my earlier solution.  One question: how to do the reverse of unicodeCharacter (which I would probably call unicodeNumber by analogy with the ASCII functions).  I can write and read a temporary file to convert a given piece of Unicode text into the numeric data which comprises it - is there any way to do it in memory?

-- Constants for decimal/hexadecimal conversion
set HEX_DIGIT_LIST to {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F"}
set ASCII_ZERO to ASCII number of "0"
set ASCII_UPPER_A to ASCII number of "A"
set ASCII_LOWER_A to ASCII number of "a"

-- convert decimal to hexadecimal
on hex(aNumber)
    global HEX_DIGIT_LIST
    set hexString to ""
    repeat while aNumber > 0
        set hexString to (item (aNumber mod 16 + 1) of HEX_DIGIT_LIST) & hexString
        set aNumber to aNumber div 16
    end repeat
    repeat while length of hexString < 4
        set hexString to "0" & hexString
    end repeat
    return hexString
end hex

-- convert hexadecimal to decimal
on dec(hexString)
    global ASCII_ZERO, ASCII_UPPER_A, ASCII_LOWER_A
    set resultNum to 0
    repeat with digitChar in (characters of hexString)
        set asciiCode to ASCII number of digitChar
        if (asciiCode ≥ ASCII_ZERO and asciiCode < ASCII_ZERO + 10) then
            set digitVal to asciiCode - ASCII_ZERO
        else if (asciiCode ≥ ASCII_UPPER_A and asciiCode < ASCII_UPPER_A + 6) then
            set digitVal to asciiCode - ASCII_UPPER_A + 10
        else if (asciiCode ≥ ASCII_LOWER_A and asciiCode < ASCII_LOWER_A + 6) then
            set digitVal to asciiCode - ASCII_LOWER_A + 10
        else
            error "Illegal hex digit '" & digitChar & "'"
        end if
        set resultNum to resultNum * 16 + digitVal
    end repeat
    return resultNum
end dec

--Unicode constants
set UNICODE_END_BMP to dec("10000")
set UNICODE_LOW_SURROGATE_START to dec("D800")
set UNICODE_HIGH_SURROGATE_START to dec("DC00")
set UNICODE_SURROGATE_RADIX to dec("0400")

-- convert a Unicode scalar value to a character
on unicodeCharacter(scalarValue)
    global UNICODE_END_BMP, UNICODE_SURROGATE_RADIX, UNICODE_LOW_SURROGATE_START, UNICODE_HIGH_SURROGATE_START
   
    if (scalarValue < UNICODE_END_BMP) then
        set scriptString to hex(scalarValue)
    else
        set excess to scalarValue - UNICODE_END_BMP
        set highSurrogate to excess div UNICODE_SURROGATE_RADIX + UNICODE_LOW_SURROGATE_START
        set lowSurrogate to excess mod UNICODE_SURROGATE_RADIX + UNICODE_HIGH_SURROGATE_START
        set scriptString to hex(highSurrogate) & hex(lowSurrogate)
    end if
    set scriptString to "«data utxt" & scriptString & "» as unicode text"
    return (run script scriptString)
end unicodeCharacter

--
Mark J. Reed <email@hidden>
 _______________________________________________
Do not post admin requests to the list. They will be ignored.
Applescript-users mailing list      (email@hidden)
Help/Unsubscribe/Update your Subscription:

This email sent to email@hidden

  • Follow-Ups:
    • Re: Producing Unicode-only characters
      • From: Emmanuel <email@hidden>
References: 
 >Re: Producing Unicode-only characters (From: "Nigel Garvey" <email@hidden>)
 >Re: Producing Unicode-only characters (From: "Mark J. Reed" <email@hidden>)
 >Re: Producing Unicode-only characters (From: "Mark J. Reed" <email@hidden>)

  • Prev by Date: Re: Simple Backup Script
  • Next by Date: running apps "in the background"
  • Previous by thread: Re: Producing Unicode-only characters
  • Next by thread: Re: Producing Unicode-only characters
  • Index(es):
    • Date
    • Thread