• Open Menu Close Menu
  • Apple
  • Shopping Bag
  • Apple
  • Mac
  • iPad
  • iPhone
  • Watch
  • TV
  • Music
  • Support
  • Search apple.com
  • Shopping Bag

Lists

Open Menu Close Menu
  • Terms and Conditions
  • Lists hosted on this site
  • Email the Postmaster
  • Tips for posting to public mailing lists
Re: Translating accented characters to HTML entity equivalents
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Translating accented characters to HTML entity equivalents


  • Subject: Re: Translating accented characters to HTML entity equivalents
  • From: Aki Inoue <email@hidden>
  • Date: Thu, 13 Mar 2003 15:38:27 -0800

Phil,

As others already mentioned, @"" format only supports 7bit ASCII.

The sample attached is not terribly efficient but should give you the basic idea.

Aki


static NSString *ISO8859_1_Table[] = {
@"&nbsp;", // "&#160;"
@"&iexcl;", // "&#161;"
@"&cent;", // "&#162;"
@"&pound;", // "&#163;"
@"&curren;", // "&#164;"
@"&yen;", // "&#165;"
@"&brvbar;", // "&#166;"
@"&sect;", // "&#167;"
@"&uml;", // "&#168;"
@"&copy;", // "&#169;"
@"&ordf;", // "&#170;"
@"&laquo;", // "&#171;"
@"&not;", // "&#172;"
@"&shy;", // "&#173;"
@"&reg;", // "&#174;"
@"&macr;", // "&#175;"
@"&deg;", // "&#176;"
@"&plusmn;", // "&#177;"
@"&sup2;", // "&#178;"
@"&sup3;", // "&#179;"
@"&acute;", // "&#180;"
@"&micro;", // "&#181;"
@"&para;", // "&#182;"
@"&middot;", // "&#183;"
@"&cedil;", // "&#184;"
@"&sup1;", // "&#185;"
@"&ordm;", // "&#186;"
@"&raquo;", // "&#187;"
@"&frac14;", // "&#188;"
@"&frac12;", // "&#189;"
@"&frac34;", // "&#190;"
@"&iquest;", // "&#191;"
@"&Agrave;", // "&#192;"
@"&Aacute;", // "&#193;"
@"&Acirc;", // "&#194;"
@"&Atilde;", // "&#195;"
@"&Auml;", // "&#196;"
@"&Aring;", // "&#197;"
@"&AElig;", // "&#198;"
@"&Ccedil;", // "&#199;"
@"&Egrave;", // "&#200;"
@"&Eacute;", // "&#201;"
@"&Ecirc;", // "&#202;"
@"&Euml;", // "&#203;"
@"&Igrave;", // "&#204;"
@"&Iacute;", // "&#205;"
@"&Icirc;", // "&#206;"
@"&Iuml;", // "&#207;"
@"&ETH;", // "&#208;"
@"&Ntilde;", // "&#209;"
@"&Ograve;", // "&#210;"
@"&Oacute;", // "&#211;"
@"&Ocirc;", // "&#212;"
@"&Otilde;", // "&#213;"
@"&Ouml;", // "&#214;"
@"&times;", // "&#215;"
@"&Oslash;", // "&#216;"
@"&Ugrave;", // "&#217;"
@"&Uacute;", // "&#218;"
@"&Ucirc;", // "&#219;"
@"&Uuml;", // "&#220;"
@"&Yacute;", // "&#221;"
@"&THORN;", // "&#222;"
@"&szlig;", // "&#223;"
@"&agrave;", // "&#224;"
@"&aacute;", // "&#225;"
@"&acirc;", // "&#226;"
@"&atilde;", // "&#227;"
@"&auml;", // "&#228;"
@"&aring;", // "&#229;"
@"&aelig;", // "&#230;"
@"&ccedil;", // "&#231;"
@"&egrave;", // "&#232;"
@"&eacute;", // "&#233;"
@"&ecirc;", // "&#234;"
@"&euml;", // "&#235;"
@"&igrave;", // "&#236;"
@"&iacute;", // "&#237;"
@"&icirc;", // "&#238;"
@"&iuml;", // "&#239;"
@"&eth;", // "&#240;"
@"&ntilde;", // "&#241;"
@"&ograve;", // "&#242;"
@"&oacute;", // "&#243;"
@"&ocirc;", // "&#244;"
@"&otilde;", // "&#245;"
@"&ouml;", // "&#246;"
@"&divide;", // "&#247;"
@"&oslash;", // "&#248;"
@"&ugrave;", // "&#249;"
@"&uacute;", // "&#250;"
@"&ucirc;", // "&#251;"
@"&uuml;", // "&#252;"
@"&yacute;", // "&#253;"
@"&thorn;", // "&#254;"
@"&yuml;", // "&#255;"
};

static NSString *createStringWithCharacterReference(NSString *string) {
NSMutableString *mString = nil;
NSString *result = string;
NSString *entityString;
NSRange currentRange = NSMakeRange(0, 0);
unsigned int stringLength = [string length];
unsigned int currentIndex = 0;
UniChar character;

while (currentIndex < stringLength) {
character = [string characterAtIndex:currentIndex];

if (character < 0xA0) { // ASCII & C1
switch (character) {
case '"': entityString = @"&quot;"; break;
case '&': entityString = @"&amp;"; break;
case '<': entityString = @"&lt;"; break;
case '>': entityString = @"&gt;"; break;
default: entityString = nil;
}
} else if (character < 0x100) { // ISO 8859-1
entityString = ISO8859_1_Table[character - 0xA0];
} else if (character < 0xFFFE) { // Rest of Unicode
entityString = [NSString stringWithFormat:@"#x%X", character];
} else {
entityString = @""; // UFFFE & UFFFF shouldn't be in HTML
}

if (entityString) {
if (!mString) {
mString = [NSMutableString string];
result = mString;
}

if (currentRange.location < currentIndex) {
currentRange.length = currentIndex - currentRange.location;
[mString appendString:[string substringWithRange:currentRange]];
}

[mString appendString:entityString];
currentRange.location = currentIndex + 1;
}
++currentIndex;
}

// The remaining run
if (mString && (currentRange.location < currentIndex)) {
currentRange.length = currentIndex - currentRange.location;
[mString appendString:[string substringWithRange:currentRange]];
}

return result;
}
_______________________________________________
cocoa-dev mailing list | email@hidden
Help/Unsubscribe/Archives: http://www.lists.apple.com/mailman/listinfo/cocoa-dev
Do not post admin requests to the list. They will be ignored.

References: 
 >Translating accented characters to HTML entity equivalents (From: Phillip Ulrich <email@hidden>)

  • Prev by Date: is there a way to extract text from pdfs?
  • Next by Date: UI responsiveness
  • Previous by thread: Re: Translating accented characters to HTML entity equivalents
  • Next by thread: WebServicesCore Proxy example
  • Index(es):
    • Date
    • Thread