Okay, I took the advice given about the test I ran, modified it and ran it again tonight.
Here is a summary of the test and the files I used to test are located at the bottom incase you want to validate my findings.
I created two direct actions. One that uses a boolean array map and one that uses a regular _expression_ to strip invalid ISO 8859-1 characters. This time I used a much larger string for the test. I also compiled the pattern in the contructor of Regex style object to that pattern compiling only happens once per test. Each test ran the string through the parser 10,000 times. I rebuilt the application prior to every test that was run.
Here are the results:
Boolean Array Map (in milliseconds) 9589 9517 9809 9570 9674 9558 9460 9704 9629 10051
Regex ( in milliseconds) 28915 28281 28218 29444 37207 29006 29019 28908 28813 28907
Looks like the array map was 3 times more efficient.
Here are the files I created for the test.
DirectAction.java ----------------------- // // DirectAction.java // Project Norway // // Created by ericstewart on 2/15/06 //
import com.webobjects.foundation.*; import com.webobjects.appserver.*; import com.webobjects.appserver.xml.*; import com.webobjects.eocontrol.*; import java.util.*;
public class DirectAction extends WODirectAction {
public DirectAction(WORequest aRequest) { super(aRequest); }
public WOActionResults defaultAction() { return pageWithName("Main"); }
public WOActionResults charSpeedArrayMapAction() { // build test string StringBuffer testString = new StringBuffer("kfdlas;n 0wqm dsagjnoisa fd;af[aghjr3q-tifnewna fafjpewiq nor0dafnlw;l jfh0w flw;f saofh8"); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ "); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ "); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ ");
// Strip illegal characters. NSTimestamp time1 = new NSTimestamp(); ISOLatin1CharacterUtility charUtility = new ISOLatin1CharacterUtility(); String resultString = ""; for(int i = 0; i < 10000; i++) { resultString = charUtility.stripInvalidCharsFromString(testString.toString()); } NSTimestamp time2 = new NSTimestamp(); GregorianCalendar startCal = new GregorianCalendar(); GregorianCalendar endCal = new GregorianCalendar(); long diffMillis = 0; startCal.setTime(time1); endCal.setTime(time2); diffMillis = endCal.getTimeInMillis() - startCal.getTimeInMillis(); NSLog.debug.appendln("Array map time to parse string: "+diffMillis);
Main page = (Main)pageWithName("Main");
return page; }
public WOActionResults charSpeedRegexAction() { // build test string StringBuffer testString = new StringBuffer("kfdlas;n 0wqm dsagjnoisa fd;af[aghjr3q-tifnewna fafjpewiq nor0dafnlw;l jfh0w flw;f saofh8"); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ "); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ "); testString.append((char)1000); testString.append("fd0 f023 fkdls anflrwjap fsa[w fjnw f[2- dawjv094 tn3oh9k04r3 309r3hg854mvrm3w0v5nw[0 v9"); testString.append("qmgn vjdsop 00 89w nv3ni0vr nmv p3orm vnrv rm v fw mdndw sjuio490n v uckm4uv4n fj iivkmcj"); testString.append("o489jnrbnv8m 5tjvb6fci9 uv77vj vu v7v 678i9ls fdgo09 i9 r98 jk f78 fm,f juy fiker fdmf"); testString.append((char)10000); testString.append("irmvn 984mn juf78 km 4 d0v76 7 m j37 67k 6mbvjk8cv56 6yjn r vcjv u7849md cx;df]c0-8 end"); testString.append("4mvj930 fn89 2 no98304 nr0mj v8v87395 09vm vwlr e ;vd s,mnrv K VUYRMNVDHJ SUISVI DVO$MMV"); testString.append("i4m *$N lfju67 K$(N kjgurn jkd7 KMN* JND^&V kf9]6l4m,d id 8f4 j md k3idd8j4m cems duij4m"); testString.append((char)100000); testString.append("imn4nf8 IUj4nvjud8mner iec 883mnd J893M K VEniw8923m mdwjw8m vmskl w o290894 vw m s s94"); testString.append("fwjo wo fro3neqwvr03 f94fdwc J VW)RJ)VJ EQW( VNDSHVV@HPNVDSOPV)J*(J V)W)RHjiwo vhjdwj vlj"); testString.append("fwonnf 02f0 nesa0fe23q0cwajnv fj f jwjv jdwoav0ds a9=dsa j dsaj0 vdwavs00nv r3 j3jvr0j vj"); testString.append((char)1000000); testString.append("jfdlas 0i0 0)) JN(HD J CHDH kdsa 30 nrvmnds 90 ) JNC(h0wejvj-r3j dns huvj0rje 0-fj v3jnv"); testString.append("0230m w0ondnw n003n n vd93n vds nah 39nsnv l;vMVOP MNVNVNV NO NDN LVN H JKJOPW_ ()_U*( 8"); testString.append("jfu39 in jvsa 803i njfwe u03 hu9n iv3ni-ih8v jvi99 0h rh nine-JMIW)IJ ERNM VR ()V) VW V("); testString.append((char)10000000); testString.append("8jkn4n u7854iujK&&MR jfb7bf6j JEF7VR374KFB MNHJUEF KLI88 AMC898 WJ0 E MNMNP, W0E89898Zj"); testString.append("oo9ij v 930 N(DNJ80 390n vu989*^%R@#$() vmknv98u&*^&& vnsjkv sg o78h 9nN H(RHnir nov ew8"); testString.append("mmn vrn9 8 NN Jn 9uh70 e4hnbno v nu9vb7ev uh9n4njNJU( H&*( V)H* CNIn uve3u 089dn 9dmn3 90"); testString.append((char)10000); testString.append("0-,rfvk9c 8v rjnn Su h9r43h90 r30i j nmvdsnj cu h90eh83ij0 vniinodh9 8g7r37 h vdj huvds g"); testString.append("0f3 9in vn H&A nifmnk no JvA H*(j8 h3j jieh89 rvh8 -34 j[v nd diuvzh 89vreh vj9v0j89w v"); testString.append("9034 jN( AF#NU(nOP JWhj0je v 9j 0 [J) j 0verj gj089mefi iv9ni ji jv3j 0vj0j bj 80 bj8 vj"); testString.append((char)100000); testString.append("030kmc 8j7896 nNYG&*&*GH nvr3invfrh9vr34j8hn H* H*( h89 43jhj8 sh737u095 vnhv uherj9wj9"); testString.append("903nmdvnhu98wehjr3ndfh8 we9n ufe78 hv54u9 wm njn a 8h h8fj9 vh89h 89 H* )J*vj0 3j09 j mew"); testString.append("09 vmhij jve3i vn wv h vsh v kln;ms p[]hij 8y 9ty7h u5 jnvkln ;v Shj8 u8-u59 0gj vu8 jkrj"); testString.append((char)1000000); testString.append("NUIO*(n v3r i00)(**(j 3 u()&T^ewju jkbdj v89h j rmnJI HJ Ijiv rj948jv bih vjvrjvvjvrjhve"); testString.append("9498 vnm v93j89r439j0fj nvjj3rjjvr3j90 vj9 j5mpbejsvh8bw4kop5jinovjv3j 9v0j93vj 09v35j 0v"); testString.append("m,fi0nr n jvjn vj8j0-9 j35 i5jijv er hjvsj v j99 3-g5 verj ivj9 v rj9 g4ov zswjvr3i jfKJ ");
// Strip illegal characters. NSTimestamp time1 = new NSTimestamp(); ISOLatin1CharacterUtilityRegex charUtility = new ISOLatin1CharacterUtilityRegex(); String resultString = ""; for(int i = 0; i < 10000; i++) { resultString = charUtility.stripInvalidCharsFromString(testString.toString()); } NSTimestamp time2 = new NSTimestamp(); GregorianCalendar startCal = new GregorianCalendar(); GregorianCalendar endCal = new GregorianCalendar(); long diffMillis = 0; startCal.setTime(time1); endCal.setTime(time2); diffMillis = endCal.getTimeInMillis() - startCal.getTimeInMillis(); NSLog.debug.appendln("Regex time to parse string: "+diffMillis);
Main page = (Main)pageWithName("Main");
return page; }
}
ArrayMap Solution ------------------------- // // ISOLatin1CharacterUtilityArrayMap.java // Norway // // Created by Eric Stewart on 2/19/06. // Copyright 2006 __MyCompanyName__. All rights reserved. //
public class ISOLatin1CharacterUtility { private boolean[] charMap = new boolean[256];
public ISOLatin1CharacterUtility() { // Initialize ISO-8859-1 character map. charMap[9] = true; charMap[10] = true; charMap[13] = true; charMap[32] = true; charMap[33] = true; charMap[34] = true; charMap[35] = true; charMap[36] = true; charMap[37] = true; charMap[38] = true; charMap[39] = true; charMap[40] = true; charMap[41] = true; charMap[42] = true; charMap[43] = true; charMap[44] = true; charMap[45] = true; charMap[46] = true; charMap[47] = true; charMap[48] = true; charMap[49] = true; charMap[50] = true; charMap[51] = true; charMap[52] = true; charMap[53] = true; charMap[54] = true; charMap[55] = true; charMap[56] = true; charMap[57] = true; charMap[58] = true; charMap[59] = true; charMap[60] = true; charMap[61] = true; charMap[62] = true; charMap[63] = true; charMap[64] = true; charMap[65] = true; charMap[66] = true; charMap[67] = true; charMap[68] = true; charMap[69] = true; charMap[70] = true; charMap[71] = true; charMap[72] = true; charMap[73] = true; charMap[74] = true; charMap[75] = true; charMap[76] = true; charMap[77] = true; charMap[78] = true; charMap[79] = true; charMap[80] = true; charMap[81] = true; charMap[82] = true; charMap[83] = true; charMap[84] = true; charMap[85] = true; charMap[86] = true; charMap[87] = true; charMap[88] = true; charMap[89] = true; charMap[90] = true; charMap[91] = true; charMap[92] = true; charMap[93] = true; charMap[94] = true; charMap[95] = true; charMap[96] = true; charMap[97] = true; charMap[98] = true; charMap[99] = true; charMap[100] = true; charMap[101] = true; charMap[102] = true; charMap[103] = true; charMap[104] = true; charMap[105] = true; charMap[106] = true; charMap[107] = true; charMap[108] = true; charMap[109] = true; charMap[110] = true; charMap[111] = true; charMap[112] = true; charMap[113] = true; charMap[114] = true; charMap[115] = true; charMap[116] = true; charMap[117] = true; charMap[118] = true; charMap[119] = true; charMap[120] = true; charMap[121] = true; charMap[122] = true; charMap[123] = true; charMap[124] = true; charMap[125] = true; charMap[126] = true; charMap[160] = true; charMap[161] = true; charMap[162] = true; charMap[163] = true; charMap[164] = true; charMap[165] = true; charMap[166] = true; charMap[167] = true; charMap[168] = true; charMap[169] = true; charMap[170] = true; charMap[171] = true; charMap[172] = true; charMap[173] = true; charMap[174] = true; charMap[175] = true; charMap[176] = true; charMap[177] = true; charMap[178] = true; charMap[179] = true; charMap[180] = true; charMap[181] = true; charMap[182] = true; charMap[183] = true; charMap[184] = true; charMap[185] = true; charMap[186] = true; charMap[187] = true; charMap[188] = true; charMap[189] = true; charMap[190] = true; charMap[191] = true; charMap[192] = true; charMap[193] = true; charMap[194] = true; charMap[195] = true; charMap[196] = true; charMap[197] = true; charMap[198] = true; charMap[199] = true; charMap[200] = true; charMap[201] = true; charMap[202] = true; charMap[203] = true; charMap[204] = true; charMap[205] = true; charMap[206] = true; charMap[207] = true; charMap[208] = true; charMap[209] = true; charMap[210] = true; charMap[211] = true; charMap[212] = true; charMap[213] = true; charMap[214] = true; charMap[215] = true; charMap[216] = true; charMap[217] = true; charMap[218] = true; charMap[219] = true; charMap[220] = true; charMap[221] = true; charMap[222] = true; charMap[223] = true; charMap[224] = true; charMap[225] = true; charMap[226] = true; charMap[227] = true; charMap[228] = true; charMap[229] = true; charMap[230] = true; charMap[231] = true; charMap[232] = true; charMap[233] = true; charMap[234] = true; charMap[235] = true; charMap[236] = true; charMap[237] = true; charMap[238] = true; charMap[239] = true; charMap[240] = true; charMap[241] = true; charMap[242] = true; charMap[243] = true; charMap[244] = true; charMap[245] = true; charMap[246] = true; charMap[247] = true; charMap[248] = true; charMap[249] = true; charMap[250] = true; charMap[251] = true; charMap[252] = true; charMap[253] = true; charMap[254] = true; charMap[255] = true; }
/* * Determines if a char is a valid ISO-8859-1 character. */ public boolean isCharValid(char value) { if (((int)value) < 256 && charMap[(int)value]) { return true; } else { return false; } }
/* * Returns a string clean of all invalid ISO-8859-1 characters. */ public String stripInvalidCharsFromString(String value) { StringBuffer buffer = new StringBuffer(); char[] charArray = value.toCharArray(); int charArrayLength = charArray.length; for (int i = 0; i < charArrayLength; i++) { if (((int)charArray[i]) < 256 && charMap[(int)charArray[i]]) { buffer.append(charArray[i]); } } return buffer.toString(); } }
Regex Solution --------------------- // // ISOLatin1CharCleaner.java // Norway // // Created by Eric Stewart on 2/15/06. // Copyright 2006 __MyCompanyName__. All rights reserved. //
import java.util.regex.*;
public class ISOLatin1CharacterUtilityRegex { private String regExp; private Pattern p;
public ISOLatin1CharacterUtilityRegex() { regExp = "[^\\x09\\x0A\\x0D\\x20-\\x7E\\xA0-\\xFF]+"; p = Pattern.compile(regExp); }
/* * Returns a string clean of all invalid ISO-8859-1 characters. */ public String stripInvalidCharsFromString(String value) { String result = p.matcher(value).replaceAll(""); return result; } } |