After a long break I sat down to revise my original idea and came up with completely different solution which proved to be a lot more workable than the old one. The idea is not to count characters - it takes very long on my machine with her mechanical drive - but to check if a random word of a certain language is on every page (standard page for that matter: read my comments in the script to understand better): if positive then we know what the language that language is. The languages now are not Greek but Lithuanian, Russian and, of course, English. Its purpose is still to calculate the price based on the language to translate from. It works as intended except one strange thing: for some completely unfathomable reason it returns unexpected result though in other two identical cases it behaves correctly. I can't grasp what's going on. Though I can copy paste my script here it'll require to have these text (.txt) files to test and have them I found no alternatives but to share a downloadable Google Drive link since the total size was to big to attach in this mail message. Once again, I out-commented certain lines of my script for better understanding.
set theFile to {"(*aliases of the files go there - a list of four of them, whenever you decide to put them on your system*)"} --IMPORTANT!
property MyLanguage : ""
set RussianCharacterSet to {"а", "б", "в", "г", "д", "е", "ё", "ж", "з", "и", "й", "к", "л", "м", "н", "о", "п", "р", "с", "т", "у", "ф", "х", "ц", "ч", "ш", "ъ", "щ", "ь", "ы", "э", "ю", "я"} --Russian ABC letters
set LithuanianCharacters to {"ą", "č", "ę", "š", "ė", "į", "ų", "ū", "ž"} --Lithuanian glyphs
tell application "Finder"
repeat with DocRef in theFile
open (contents of DocRef as alias) --opens all of the aforementioned files
end repeat
end tell
tell application "TextEdit"
activate
set WordsCount to 0
set PriceCount to 0
delay 1
set DocInfo to {} --a list of records each regarding the main properties of a document: its name and its language for price evaluation. Read more below.
repeat with i from 1 to (count documents) --the outer loop directs the actions below to every opened document. The loop is not for counting words but is merely for identifying a language.
set MyDoc to text of document i
set WordsCount to (count words of MyDoc)
set PageCount to (WordsCount / 230) as integer --on how did 230 emerged read the comments below.
set PageNumber to 0
set TextToProcess to MyDoc --Portion of the text to process once a standard page is created.
set PaginatedText to {} --the whole document is now a list containing sublists that are standard pages.
repeat PageCount times --the 1st inner loop makes the text divided into pages of 230 words per page. The figure is for translation price evaluation based on amount of standard pages of printed text each consisting of 230 words. The actions of this loop must repeat as many times as needed to match the number of standard pages in each document.
set OnePage to {} --a standard page consisting of 230 words.
repeat with j from 1 to 230 --the 2nd inner loop collects 230 words making for a single standard page.
set end of OnePage to word j of TextToProcess
end repeat
set end of PaginatedText to items of OnePage --the whole document is now a list containing sublists that are standard pages.
set PageNumber to PageNumber + 1
set TextToProcess to text (word (230 * PageNumber + 1)) thru (word WordsCount) of MyDoc --once a standard page is created the loop moves onto the rest of the original document text starting from the 1st word following the created standard page (i.e., the 1st word of the rest of the text)
if (count words of TextToProcess) < 230 then exit repeat --if the number of standard pages is not integer the remainder (which adds nothing in this case) is ignored and the 1st inner loop finishes the job.
end repeat
PaginatedText
set ConfirmationCollector to {} --collects boolean values of the actions below.
set IsRussian to true --since one of 3 tested languages is Cyrillian ABC with other two being Latin ABC we first need to resolve regarding the Russian language.
repeat with PageRef in PaginatedText
set theText to some item of contents of PageRef --in order to define a Russian word we pick a random word from each of standard pages. If any Russian word is on every standard page or in the majority of them then the document is a Russian text.
if some character of theText is in RussianCharacterSet then --based on the assumption that if a word contains letters (number of which in any given word is not of primary importance) from Russian ABC then it's a Russian word.
set IsRussian to (true as text)
else
set IsRussian to (false as text)
end if
set end of ConfirmationCollector to IsRussian
end repeat
"log ConfirmationCollector:"
log ConfirmationCollector
set CountTrue to 0
set CountFalse to 0
repeat with k from 1 to count ConfirmationCollector --this loop counts "true" (the word is a Russian word) and "false" (the word is not a Russian word)
if contents of item k of ConfirmationCollector is "true" then set CountTrue to CountTrue + 1
if contents of item k of ConfirmationCollector is "false" then set CountFalse to CountFalse + 1
end repeat
if CountTrue > CountFalse then --comparing "true" and "false" gives the ultimate resolution on whether the text is Russian (Cyrillian)
set MyLanguage to "Russian."
set RusWordsCount to WordsCount
else if some item of LithuanianCharacters is in MyDoc then --since the Lithuanian and the English ABCs both stems from the Latin ABC we need only to check whether the text contains Lithuanian letters.
set MyLanguage to "Lithuanian."
set LtWordsCount to WordsCount
else
set MyLanguage to "English."
set EnWordsCount to WordsCount
end if
set WinID to (id of window i) as text
set DocName to name of document i
set SingleRecord to {TheWindowID:WinID, TheName:DocName, DocLanguage:MyLanguage} --for the following actions we're making records linking the doc's name to the language it's written in. Useful when verifying several documents written in different languages (of the 3).
set end of DocInfo to SingleRecord --a list of records each regarding the main properties of a document: its name and its language for price evaluation.
end repeat
DocInfo
set EnglishRecords to {} --properties of an English-written doc; a list of records each storing the aforementioned properties.
set RussianRecords to {} --properties of an Russian-written doc; a list of records each storing the aforementioned properties.
set LithuanianRecords to {} ----properties of an Lithuanian-written doc; a list of records each storing the aforementioned properties.
repeat with SingleRecordRef in DocInfo --sorting the list of records to make sublists each containing records regarding the documents written in the same language.
if DocLanguage of contents of SingleRecordRef = "English." then set end of EnglishRecords to contents of SingleRecordRef
if DocLanguage of contents of SingleRecordRef = "Russian." then set end of RussianRecords to contents of SingleRecordRef
if DocLanguage of contents of SingleRecordRef = "Lithuanian." then set end of LithuanianRecords to contents of SingleRecordRef
end repeat
#Having made a single language lists consisting of records storing the name and the language properties we're now calculating the price with regard to words across all documents written in the same language (that is, interpreting separate docs as a single if these doc are in the same language)
set EnWordsCount to 0
set EnPageCount to 0
set EnDocPrice to 0
repeat with EnglishDoc in EnglishRecords --success
set theText to text of document named (TheName of contents of EnglishDoc)
set EnWordsCount to EnWordsCount + (count words of theText)
set EnPageCount to (EnWordsCount / 230) as integer
end repeat
if EnPageCount ≤ 20 then
set EnDocPrice to EnPageCount * 4
else if EnPageCount > 20 then
set EnDocPrice to EnPageCount * 3
end if
set LtWordsCount to 0
set LtPageCount to 0
set LtDocPrice to 0
repeat with LithuanianDoc in LithuanianRecords --success
set theText to text of document named (TheName of contents of LithuanianDoc)
set LtWordsCount to LtWordsCount + (count words of theText)
set LtPageCount to (LtWordsCount / 230) as integer
end repeat
if LtPageCount ≤ 20 then
set LtDocPrice to LtPageCount * 3
else if LtPageCount > 20 then
set LtDocPrice to LtPageCount * 2
end if
set RuWordsCount to 0
set RuPageCount to 0
set RusDocPrice to 0
repeat with RussianDoc in RussianRecords --failure. It correctly calculates the numbers of words and standard pages but for some mysterious reason returns 0 as the price as if ignoring formulas I direct it at in the "if" block!
set theText to text of document named (TheName of contents of RussianDoc)
set RuWordsCount to RuWordsCount + (count words of theText) --success
set RuPageCount to (RuWordsCount / 230) as integer --success
end repeat
if RuPageCount ≤ 20 then
set RusDocPrice to RuPageCount * 3 --failure. Returns an initial value
else if PageCount > 20 then
set RusDocPrice to RuPageCount * 2 --failure. Returns an initial value
end if
"log RusDocPrice:"
log RusDocPrice
if EnglishRecords is not {} then set NotificationMessageEn to "Language: English" & linefeed & ("Words count: " & EnWordsCount as text) & linefeed & "Pages count: " & (EnPageCount as text) & linefeed & "Price (Eu): " & (EnDocPrice as text) & linefeed & linefeed
if LithuanianRecords is not {} then set NotificationMessageLt to "Language: Lithuanian" & linefeed & ("Words count: " & LtWordsCount as text) & linefeed & "Pages count: " & (LtPageCount as text) & linefeed & "Price (Eu): " & (LtDocPrice as text) & linefeed & linefeed
if RussianRecords is not {} then set NotificationMessageRu to "Language: Russian" & linefeed & ("Words count: " & RuWordsCount as text) & linefeed & "Pages count: " & (RuPageCount as text) & linefeed & "Price (Eu): " & (RusDocPrice as text)
end tell
{NotificationMessageEn, NotificationMessageLt, NotificationMessageRu}