Source code for get_counties_uris

import requests
import pandas as pd
import os

[docs] def get_counties_in_batches(username:str)->list: """returns a list of all counties in US from the geonames platform, uncluding a unique uri. Args: username (str): valid username for the geonames api Returns: list: list of counties in US with the associated metadata """ url = 'http://api.geonames.org/searchJSON' max_rows = 1000 # maximum start_row = 0 total_results = 3144 # number of counties in USA all_counties = [] while start_row < total_results: # Define the parameters for this batch params = { 'formatted': 'true', 'country': 'US', 'featureCode': 'ADM2', 'maxRows': max_rows, 'startRow': start_row, 'username': username } # Make the API request response = requests.get(url, params=params) data = response.json() # Append counties from this batch to the list all_counties.extend(data.get('geonames', [])) # Update the total number of results total_results = data.get('totalResultsCount', 0) # Move to the next batch start_row += max_rows return all_counties
[docs] def main(): """returns a csv with the uri of each county in USA. It assumes that you've a valid username to use the geonames api and that the value is stored as an environment variable as GEONAMES_USERNAME """ username = os.getenv('GEONAMES_USERNAME') counties = get_counties_in_batches(username) # reorganise the data to create a csv with the uris of each pair state - county_dict = {} for county in counties: uri = f"https://www.geonames.org/{county['geonameId']}" county_dict[(county['adminName1'],county['toponymName'])]= uri county_s = pd.Series(county_dict) county_s.index.names = ['adminName1','toponymName'] county_s.rename('uri').to_csv('counties_uris.csv')
if __name__ == "__main__": main()