Re: Sorting characters of the text - script doesn't work as expected
Re: Sorting characters of the text - script doesn't work as expected
- Subject: Re: Sorting characters of the text - script doesn't work as expected
- From: "Nigel Garvey" <email@hidden>
- Date: Sun, 28 May 2017 19:37:26 +0100
In my message of Sun, 28 May 2017 15:35:49 +0100, I wrote:
>I've had a go at writing my own version of the script
Yvan's pointed out to me off list that there were couple of problems
with the script I posted earlier. It turns out that one of the variables
I used is a reserved word in Script Editor (but not in Script Debugger),
the Lithuanian characters got mangled in the post, and I missed that the
rates for English texts are higher than those for the other languages.
I've also come to the conclusion that Ilja's random-pick system for
identifying Russian texts isn't entirely satisfactory. If the script
happens to pick more numbers than words, the text isn't identified as
being Russian. In the hope that only Russian documents will contain
Russian characters, I've changed the system to be the same as for
Lithuanian texts: if the text contains one or more Russian characters,
it's Russian.
use AppleScript version "2.4" -- Mac OS 10.10 (Yosemite) or later
use framework "Foundation"
use scripting additions
main()
on main() -- All the action in an ordinary handler to keep the variables local and non-persistent.
set folderPath to (path to downloads folder as text) & "For ASC forums:"
tell application "Finder" to set theFiles to items of folder folderPath as alias list -- theFiles is a list of alias(es) to one or more txt files.
set |?| to current application
-- A basic word-finding regex: finds either a run of word characters (but allowing single instances of "." or "'" between word characters or "," between digits) or one of a small collection of currency or copyright symbols. Adjust as/if necessary.
set wordsNSRegex to |?|'s class "NSRegularExpression"'s regularExpressionWithPattern:("(?:(?:\\w|(?<=\\w)[.'](?=\\w)|(?<=\\d),(?=\\d))++)|[£$€¢©®™]") options:(|?|'s NSRegularExpressionUseUnicodeWordBoundaries) |error|:(missing value)
set RussianCharacterRegex to |?|'s class "NSString"'s stringWithString:("[:script=cyrillic:]") -- Regex to find any Russian character.
set LithuanianCharacterRegex to |?|'s class "NSString"'s stringWithString:("(?i)[ąčęšėįųūž]") -- Regex to find any Lithuanian character.
-- Initialise variables for the word counts.
set EnWordsCount to 0
set LtWordsCount to 0
set RuWordsCount to 0
-- Go through the files in turn.
repeat with thisFile in theFiles
-- Read the text directly from the file, letting the system guess the text encoding.
set fileURL to (|?|'s class "NSURL"'s fileURLWithPath:(POSIX path of thisFile))
set MyDoc to (|?|'s class "NSString"'s stringWithContentsOfURL:(fileURL) usedEncoding:(missing value) |error|:(missing value))
set docRange to {0, MyDoc's |length|()}
-- Match and count the words (as recognised by my regex) in the document text.
set wordMatches to (wordsNSRegex's matchesInString:(MyDoc) options:(0) range:(docRange))
set WordsCount to (wordMatches's |count|())
-- Update the appropriate word count according to whether the text contains any Russian characters, any Lithuanian characters, or none of these.
if ((MyDoc's rangeOfString:(RussianCharacterRegex) options:(|?|'s NSRegularExpressionSearch) range:(docRange))'s |length| > 0) then
set RuWordsCount to RuWordsCount + WordsCount
else if ((MyDoc's rangeOfString:(LithuanianCharacterRegex) options:(|?|'s NSRegularExpressionSearch) range:(docRange))'s |length| > 0) then --since the Lithuanian and the English ABCs both stems from the Latin ABC we need only to check whether the text contains Lithuanian letters.
set LtWordsCount to LtWordsCount + WordsCount
else
set EnWordsCount to EnWordsCount + WordsCount
end if
end repeat
#Having made a single language lists consisting of records storing the name and the language properties we're now calculating the price with regard to words across all documents written in the same language (that is, interpreting separate docs as a single if these doc are in the same language)
set NotificationMessageEn to getNotificationMessage("English", EnWordsCount)
set NotificationMessageLt to getNotificationMessage("Lithuanian", LtWordsCount)
set NotificationMessageRu to getNotificationMessage("Russian", RuWordsCount)
{NotificationMessageEn, NotificationMessageLt, NotificationMessageRu}
end main
on getNotificationMessage(theLanguage, wordCount)
set pageCount to (wordCount / 230) as integer
set {higherRate, lowerRate} to {3, 2}
if (theLanguage is "English") then set {higherRate, lowerRate} to {4, 3}
if (pageCount ≤ 20) then
set docPrice to pageCount * higherRate
else
set docPrice to pageCount * lowerRate
end if
if (wordCount > 0) then
return ("Language: " & theLanguage & linefeed) & ("Words count: " & wordCount & linefeed) & ("Pages count: " & pageCount & linefeed) & ("Price (Eu): " & docPrice & linefeed & linefeed)
else
return ""
end if
end getNotificationMessage
NG
_______________________________________________
Do not post admin requests to the list. They will be ignored.
AppleScript-Users mailing list (email@hidden)
Help/Unsubscribe/Update your Subscription:
Archives: http://lists.apple.com/archives/applescript-users
This email sent to email@hidden