Source code for hmda_tools.data.geo

import os, re, string, tempfile, zipfile, csv

import sqlsoup

from .. import download_file
from ..unicode_csv import UnicodeReader

[docs]def csv_row_to_dict(headers, row):
    return dict(zip(headers, map(string.strip, row)))

[docs]def load_all(db_uri):
    load_gazetteer(db_uri)
    load_crosswalk(db_uri)

[docs]def load_gazetteer(db_uri):
    print "Downloading gazetteer..."
    gaz_zip = download_gazetteer()
    print "Unzipping gazetteer..."
    gaz_file = unzip_gazetteer(gaz_zip)
    print "Inserting gazetteer data..."
    insert_gaz_data(db_uri, gaz_file)
    os.remove(gaz_file)

[docs]def load_crosswalk(db_uri):
    print "Downloading crosswalk data..."
    filename = download_crosswalk()
    print "Inserting crosswalk data..."
    insert_crosswalk_data(db_uri, filename)

[docs]def download_gazetteer():
    gaz_url = 'http://www.census.gov/geo/www/gazetteer/files/Gaz_counties_national.zip'
    return download_file(gaz_url)

[docs]def unzip_gazetteer(gaz_zip_file):
    gaz_txt_file = 'Gaz_counties_national.txt'
    gaz_zip = zipfile.ZipFile(gaz_zip_file)
    gaz_zip.extract(gaz_txt_file)
    return gaz_txt_file

[docs]def insert_gaz_data(db_uri, gaz_file):
    db = sqlsoup.SQLSoup(db_uri)
    db.state.delete()
    db.county.delete()
    
    with open(gaz_file, 'rb') as csvfile:
        reader = UnicodeReader(csvfile, dialect='excel-tab', encoding='iso-8859-2')
        headers = map(string.strip, reader.next())

        states_seen = set()
        
        for row in reader:
            row = csv_row_to_dict(headers, row)
            geoid = row['GEOID']
            state_fips = int(geoid[0:2])
            county_fips = int(geoid[2:5])

            if state_fips not in states_seen:
                db.state.insert(fips_code=state_fips, abbr=row['USPS'])
                states_seen.add(state_fips)

            db.county.insert(
                state_fips_code=state_fips,
                county_fips_code=county_fips,
                name=row['NAME'],
                ansi_code=row['ANSICODE'],
                population=int(row['POP10']),
                housing_units=int(row['HU10']),
                land_area=int(row['ALAND']),
                water_area=int(row['AWATER']),
                latitude=row['INTPTLAT'],
                longitude=row['INTPTLONG'])
            
    db.commit()

[docs]def download_crosswalk():
    crosswalk_url = "http://www.nber.org/cbsa-msa-fips-ssa-county-crosswalk/2011/FY%2011%20NPRM%20County%20to%20CBSA%20Xwalk.txt"
    return download_file(crosswalk_url)

[docs]def insert_crosswalk_data(db_uri, filename):
    db = sqlsoup.SQLSoup(db_uri)
    
    with open(filename, 'rb') as csvfile:
        reader = csv.reader(csvfile, dialect='excel-tab')
        headers = map(string.strip, reader.next())

        for row in reader:
            row = csv_row_to_dict(headers, row)
            geoid = row['fipscd']
            state_fips = int(geoid[0:2])
            county_fips = int(geoid[2:5])

            if row['CBSA']:                
                db.county.filter_by(state_fips_code=state_fips,
                                 county_fips_code=county_fips).update({
                                     'cbsa_code': int(row['CBSA'])})
    db.commit()