• Open Menu Close Menu
  • Apple
  • Shopping Bag
  • Apple
  • Mac
  • iPad
  • iPhone
  • Watch
  • TV
  • Music
  • Support
  • Search apple.com
  • Shopping Bag

Lists

Open Menu Close Menu
  • Terms and Conditions
  • Lists hosted on this site
  • Email the Postmaster
  • Tips for posting to public mailing lists
Re: Matching Postal Addresses
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Matching Postal Addresses


  • Subject: Re: Matching Postal Addresses
  • From: Michael Rothwell <email@hidden>
  • Date: Wed, 12 Nov 2003 15:13:15 -0500

Here's a snippet of Python code. It scrapes the usps.com website and returns XML. I wish Apple would embrace Python as a first-class OSX development language -- with Aqua widget binding, etc.


# imported libraries
###################################################################
import httplib, sys, string, urllib
import exceptions
import string, os, sys

from mimetools import choose_boundary
from MimeWriter import MimeWriter
import base64
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO

def extractData(text):
ret = ""
try:
ret = string.split(text,">")[1]
ret = string.split(ret,"<")[0]
return string.strip(ret)
except:
return text

def CSZ(text):
ret = ""
try:
ret = "<csz>" + string.replace(string.replace(string.strip(text + "")," -", "-"),"&nbsp;"," ") + "</csz>\n"
ary = string.split(text,"&nbsp;")
CS=string.strip(ary[0])
C=CS[0:-3]
S=string.strip(CS[-3:])
Z=string.replace(string.strip(ary[1])," ","")
ret = ret + "<city>" + C + "</city>\n<state>" + S + "</state>\n<zip>" + Z + "</zip>\n"
except:
return text
return ret

def pyGetAddressFromUSPS(address1,address2,city,state,addrline="0"):
server = "www.usps.com"
params = urllib.urlencode({"zipcode": "", "Selection": 1, "address": address1 + " " + address2, "address1": address1, "address2": address2, "city": city,"state": state,"urbanization":"","firm":""})
url = "http://"; + server + "/zip4/zip4_response.jsp?" + params
method = "GET"
stuff = ""
ret = ""

try:
conn = httplib.HTTPConnection(server + ":80")
conn.connect()
#conn.sock.settimeout(100.0)
headers = {"Host": server, "Accept": "text/html"}
conn.request(method, url, params, headers)
response = conn.getresponse()
stuff = response.read()
conn.close()
except:
return ""

if stuff + "" != "":
count = 0
try:
ary = string.split(stuff,"\n")
for line in ary:
if (string.find(line,"<Firm Line/>") >= 0):
tmp = extractData(line)
if (tmp + "" != ""):
ret = ret + "<firm>" + tmp + "</firm>\n"
if (string.find(line,"<Address Line/>") >= 0):
tmp = extractData(line)
if tmp[:1] == "(":
tmp=""
if (tmp + "" != ""):
ret = ret + "<address>" + tmp + "</address>\n"
if (string.find(line,"<City-State-ZIP/>") >= 0):
tmp = CSZ(extractData(line))
if (tmp + "" != ""):
ret = ret + tmp
if (ret + "" != ""):
ret = "<usps><line>" + string.strip(addrline) + "</line>" + ret + "</usps>"
return ret
except:
return ""


Jason McInnes wrote:

Thansk, everyone for your help,

Given my timeframe, (a couple of weeks) and the nature
of my application (batch conversion routine), I'm
partial to the screen scraping suggestion by Michael.
I've never done it before. Any suggestions on where I
could quickly learn how to do it?

I searched the archive, but didn't come up with
anything concrete...

Thanks!

Jason
_______________________________________________
cocoa-dev mailing list | email@hidden
Help/Unsubscribe/Archives: http://www.lists.apple.com/mailman/listinfo/cocoa-dev
Do not post admin requests to the list. They will be ignored.

  • Follow-Ups:
    • Re: Matching Postal Addresses
      • From: Ronald Oussoren <email@hidden>
    • Re: Matching Postal Addresses
      • From: Michael Rothwell <email@hidden>
    • Re: Matching Postal Addresses
      • From: Nicholas Riley <email@hidden>
References: 
 >Re: Matching Postal Addresses (From: Jason McInnes <email@hidden>)

  • Prev by Date: Re: PopUp button + Controller Layer
  • Next by Date: Extending NSURLProtocol
  • Previous by thread: Re: Matching Postal Addresses
  • Next by thread: Re: Matching Postal Addresses
  • Index(es):
    • Date
    • Thread