Source code for hmda_tools.data.geo

import os, re, string, tempfile, zipfile, csv

import sqlsoup

from .. import download_file
from ..unicode_csv import UnicodeReader

[docs]def csv_row_to_dict(headers, row): return dict(zip(headers, map(string.strip, row)))
[docs]def load_all(db_uri): load_gazetteer(db_uri) load_crosswalk(db_uri)
[docs]def load_gazetteer(db_uri): print "Downloading gazetteer..." gaz_zip = download_gazetteer() print "Unzipping gazetteer..." gaz_file = unzip_gazetteer(gaz_zip) print "Inserting gazetteer data..." insert_gaz_data(db_uri, gaz_file) os.remove(gaz_file)
[docs]def load_crosswalk(db_uri): print "Downloading crosswalk data..." filename = download_crosswalk() print "Inserting crosswalk data..." insert_crosswalk_data(db_uri, filename)
[docs]def download_gazetteer(): gaz_url = 'http://www.census.gov/geo/www/gazetteer/files/Gaz_counties_national.zip' return download_file(gaz_url)
[docs]def unzip_gazetteer(gaz_zip_file): gaz_txt_file = 'Gaz_counties_national.txt' gaz_zip = zipfile.ZipFile(gaz_zip_file) gaz_zip.extract(gaz_txt_file) return gaz_txt_file
[docs]def insert_gaz_data(db_uri, gaz_file): db = sqlsoup.SQLSoup(db_uri) db.state.delete() db.county.delete() with open(gaz_file, 'rb') as csvfile: reader = UnicodeReader(csvfile, dialect='excel-tab', encoding='iso-8859-2') headers = map(string.strip, reader.next()) states_seen = set() for row in reader: row = csv_row_to_dict(headers, row) geoid = row['GEOID'] state_fips = int(geoid[0:2]) county_fips = int(geoid[2:5]) if state_fips not in states_seen: db.state.insert(fips_code=state_fips, abbr=row['USPS']) states_seen.add(state_fips) db.county.insert( state_fips_code=state_fips, county_fips_code=county_fips, name=row['NAME'], ansi_code=row['ANSICODE'], population=int(row['POP10']), housing_units=int(row['HU10']), land_area=int(row['ALAND']), water_area=int(row['AWATER']), latitude=row['INTPTLAT'], longitude=row['INTPTLONG']) db.commit()
[docs]def download_crosswalk(): crosswalk_url = "http://www.nber.org/cbsa-msa-fips-ssa-county-crosswalk/2011/FY%2011%20NPRM%20County%20to%20CBSA%20Xwalk.txt" return download_file(crosswalk_url)
[docs]def insert_crosswalk_data(db_uri, filename): db = sqlsoup.SQLSoup(db_uri) with open(filename, 'rb') as csvfile: reader = csv.reader(csvfile, dialect='excel-tab') headers = map(string.strip, reader.next()) for row in reader: row = csv_row_to_dict(headers, row) geoid = row['fipscd'] state_fips = int(geoid[0:2]) county_fips = int(geoid[2:5]) if row['CBSA']: db.county.filter_by(state_fips_code=state_fips, county_fips_code=county_fips).update({ 'cbsa_code': int(row['CBSA'])}) db.commit()