------------------------------------------------------------------------------
# Auth: Christopher Stone
# dCre: 2017/04/20 18:00 CDT
# dMod: 2017/04/24 21:16 CDT
# Appl: Safari
# Task: Parse Wall Street Journal Page for JSON elements.
# Libs: None
# Osax: None
# Tags: @Applescript, @Script, @ASObjC, @Safari, @_javascript_, @JSON, @Parse, @WSJ
# Vers: 1.0
------------------------------------------------------------------------------
use AppleScript version "2.4"
use framework "Foundation"
use scripting additions
------------------------------------------------------------------------------
property NSJSONSerialization : a reference to current application's NSJSONSerialization
property NSUTF8StringEncoding : a reference to 4
property NSString : a reference to current application's NSString
------------------------------------------------------------------------------
set jsCMD to "
var docSrc = document.body.parentNode.outerHTML;
var regEx = /<script type=\"application\\/ld\\+json[^•]+?<\\/script>/ig;
var reMatchArray = docSrc.match(regEx);
reMatchArray[0];
"
set jsonStr to doJavaScriptInSafari(jsCMD)
set jsonStr to its cngStr:"<script type=\"application/ld\\+json\">|</script>" intoString:"" inString:jsonStr
set jsonData to (NSString's stringWithString:jsonStr)'s dataUsingEncoding:NSUTF8StringEncoding
set jsonObject to NSJSONSerialization's JSONObjectWithData:jsonData options:0 |error|:(missing value)
set nsDict to its convertJSONToDictionary:jsonStr
set theAuthor to ((nsDict's objectForKey:"author")'s objectForKey:"name") as text
set theHeadline to (nsDict's objectForKey:"headline") as text
set theHeadline to its cngStr:"\\A\\s+|\\s+\\Z" intoString:"" inString:theHeadline
set theHeadline to "\"" & theHeadline & "\""
set thePublisher to ((nsDict's valueForKeyPath:"publisher")'s objectForKey:"name") as text
set datePublished to (jsonObject's objectForKey:"datePublished") as text
set datePublished to its cngStr:"T" intoString:" " inString:datePublished
set datePublished to its getMyDateStringFrom:datePublished
set dateAccessed to "accessed " & (its formatDate:(current date) usingFormat:"MMMM dd, Y")
set articleURL to ((nsDict's valueForKeyPath:"mainEntityOfPage")'s objectForKey:"@id") as text
set AppleScript's text item delimiters to ", "
set theReference to {theAuthor, theHeadline, thePublisher, datePublished, dateAccessed, articleURL} as text
return theReference
------------------------------------------------------------------------------
--» HANDLERS
------------------------------------------------------------------------------
on cngStr:findString intoString:replaceString inString:dataString
set anNSString to current application's NSString's stringWithString:dataString
set dataString to (anNSString's ¬
stringByReplacingOccurrencesOfString:findString withString:replaceString ¬
options:(current application's NSRegularExpressionSearch) range:{0, length of dataString}) as text
end cngStr:intoString:inString:
------------------------------------------------------------------------------
on convertJSONToDictionary:jsonString
set aString to NSString's stringWithString:jsonString
set theData to aString's dataUsingEncoding:(current application's NSUTF8StringEncoding)
set {theDict, theError} to current application's NSJSONSerialization's JSONObjectWithData:theData options:0 |error|:(reference)
if theDict is missing value then error (theError's localizedDescription() as text) number -10000
return theDict
end convertJSONToDictionary:
------------------------------------------------------------------------------
on doJavaScriptInSafari(jsCMD)
try
tell application "Safari" to do _javascript_ jsCMD in front document
on error e
error "Error in handler doJavaScriptInSafari() of library NLb!" & return & return & e
end try
end doJavaScriptInSafari
------------------------------------------------------------------------------
on formatDate:theDate usingFormat:formatString
if class of theDate is date then set theDate to my makeNSDateFrom:theDate
set theFormatter to current application's NSDateFormatter's new()
theFormatter's setLocale:(current application's NSLocale's localeWithLocaleIdentifier:"en_US_POSIX")
theFormatter's setDateFormat:formatString
set theString to theFormatter's stringFromDate:theDate
return theString as text
end formatDate:usingFormat:
------------------------------------------------------------------------------
on getDatesIn:aString
set anNSString to current application's NSString's stringWithString:aString
set {theDetector, theError} to current application's NSDataDetector's dataDetectorWithTypes:(current application's NSTextCheckingTypeDate) |error|:(reference)
set theMatches to theDetector's matchesInString:anNSString options:0 range:{0, anNSString's |length|()}
set theDates to current application's NSMutableArray's array()
repeat with i from 1 to theMatches's |count|()
set thisMatch to (theMatches's objectAtIndex:(i - 1))
(theDates's addObject:(thisMatch's |date|()))
end repeat
return (theDates as list)
end getDatesIn:
------------------------------------------------------------------------------
on getMyDateStringFrom:dateStr
set dateList to my getDatesIn:dateStr
if length of dateList = 0 then
error "No dates were returned from the given string!"
else if length of dateList = 1 then
set dateStr to item 1 of dateList
set dateString to my formatDate:dateStr usingFormat:"Y-MM-dd"
else if length of dateList > 1 then
error "Too many dates were found in the given string!"
end if
return dateString
end getMyDateStringFrom:
------------------------------------------------------------------------------
on makeNSDateFrom:theASDate
set {theYear, theMonth, theDay, theSeconds} to theASDate's {year, month, day, time}
if theYear < 0 then
set theYear to -theYear
set theEra to 0
else
set theEra to 1
end if
set theCalendar to current application's NSCalendar's currentCalendar()
set newDate to theCalendar's dateWithEra:theEra |year|:theYear |month|:(theMonth as integer) ¬
|day|:theDay hour:0 minute:0 |second|:theSeconds nanosecond:0
return newDate
end makeNSDateFrom:
------------------------------------------------------------------------------