FYI: NSCharacterSet bug
FYI: NSCharacterSet bug
- Subject: FYI: NSCharacterSet bug
- From: Dirk Theisen <email@hidden>
- Date: Sun, 30 Sep 2001 02:11:35 +0200 (Europe/Berlin)
Hi!
I just wanted to let you know that something is broken in
NSCharacterSet as of 10.1:
2001-09-30 01:43:44.691 NSCharsetTester[14424] The union of {[0-127]}
with {[128-65534]} is {[0-127]}
...where the second argument is an inversion of the first. Doing the
inversion manually works, though.
This has worked in 10.0.4. I reported this bug to Apple.
As a workaround, I wrote the following (REALLY dump!) replacement (but
it works for me!).
However, I cannot get it to pose correctly for NSCFCharacterSet to
eliminate the bug.
Anyone taking the challenge?
// This could be done much faster, but I leave this to the Apple engineers.
- (NSCharacterSet*) invertedSet {
NSMutableCharacterSet* result = [NSMutableCharacterSet
characterSetWithRange: NSMakeRange(0,UINT16_MAX)];
unsigned int i;
int rangeStart = -1;
for (i=0;i<UINT16_MAX;i++) {
BOOL found = [self characterIsMember: i];
if (found && rangeStart<0) rangeStart = i;
if (!found && rangeStart>=0) {
[result removeCharactersInRange: NSMakeRange(rangeStart, i-
rangeStart)];
rangeStart = -1;
}
}
if (rangeStart>=0) {
[result removeCharactersInRange: NSMakeRange(rangeStart, i-
rangeStart)];
}
return [[result copy] autorelease]; // just to be compatible
}
Regards,
Dirk
Here is the code to reproduce the bug:
int main (int argc, const char *argv[])
{
NSAutoreleasePool* pool = [[NSAutoreleasePool alloc] init];
NSCharacterSet* ascii = [NSCharacterSet
characterSetWithRange: NSMakeRange(0, 128)];
NSCharacterSet* nonAscii = [ascii invertedSet];
NSCharacterSet* nonAscii2 = [NSCharacterSet
characterSetWithRange: NSMakeRange(128, UINT16_MAX-128)];
NSMutableCharacterSet* result;
NSMutableCharacterSet* result2;
[(nonAscii = [ascii mutableCopy]) invert];
NSLog(@"ascii is an %@", [ascii class]);
[(result = [ascii mutableCopy]) formUnionWithCharacterSet:nonAscii];
NSLog(@"The union of %@ with %@ is %@", [ascii characterRangesString],
[nonAscii characterRangesString],
[result characterRangesString]);
[(result2 = [ascii mutableCopy]) formUnionWithCharacterSet:nonAscii2];
NSLog(@"The union of %@ with %@ is %@", [ascii characterRangesString],
[nonAscii2 characterRangesString],
[result2 characterRangesString]);
[pool release];
exit(0); // insure the process exit status is 0
return 0; // ...and make main fit the ANSI spec.
}
@implementation NSCharacterSet (Display)
- (NSString*) characterRangesString {
NSMutableString* result = [NSMutableString stringWithString: @"{"];
unsigned int i;
int rangeStart = -1;
for (i=0;i<UINT16_MAX;i++) {
BOOL found = [self characterIsMember: i];
//NSLog(@"char %d included: %d", i, found);
if (found && rangeStart<0) rangeStart = i;
if (!found && rangeStart>=0) {
[result appendFormat: @"[%d-%d]", rangeStart, i-1];
rangeStart = -1;
}
}
if (rangeStart>=0) {
[result appendFormat: @"[%d-%d]", rangeStart, i-1];
rangeStart = -1;
}
[result appendString: @"}"];
return result;
}
@end
--
Dirk Theisen <email@hidden>,
Diplominformatiker
University of Bonn, Institute of Computer Science III
Rvmerstr. 164, D-53117 Bonn (Germany)
Tel. +4922873-4504 (Fax: -4382)