7. Using GeoPandas

Setting up the conda env:

conda create -n geo python=3.8
conda activate geo
conda install mamba -c conda-forge
mamba install geemap geopandas descartes rtree=0.9.3 -c conda-forge
mamba install ipython-sql sqlalchemy psycopg2 -c conda-forge

Sample dataset:

References:

7.1. Connecting to the database

import os
from sqlalchemy import create_engine
host = "localhost"
database = "nyc"
user = os.getenv('SQL_USER')
password = os.getenv('SQL_PASSWORD')
connection_string = f"postgresql://{user}:{password}@{host}/{database}"
engine = create_engine(connection_string)
from sqlalchemy import inspect
insp = inspect(engine)
insp.get_table_names()
['spatial_ref_sys',
 'nyc_neighborhoods',
 'nyc_streets',
 'nyc_homicides',
 'geometries',
 'nyc_census_blocks',
 'nyc_subway_stations']

7.2. Reading data from PostGIS

import geopandas as gpd
sql = 'SELECT * FROM nyc_neighborhoods'
gdf = gpd.read_postgis(sql, con=engine)
gdf
id geom boroname name
0 1 MULTIPOLYGON (((582771.426 4495167.427, 584651... Brooklyn Bensonhurst
1 2 MULTIPOLYGON (((585508.753 4509691.267, 586826... Manhattan East Village
2 3 MULTIPOLYGON (((583263.278 4509242.626, 583276... Manhattan West Village
3 4 MULTIPOLYGON (((597640.009 4520272.720, 597647... The Bronx Throggs Neck
4 5 MULTIPOLYGON (((595285.205 4525938.798, 595348... The Bronx Wakefield-Williamsbridge
... ... ... ... ...
124 125 MULTIPOLYGON (((584212.898 4502321.474, 584306... Brooklyn Red Hook
125 126 MULTIPOLYGON (((605082.288 4513540.148, 605091... Queens Douglastown-Little Neck
126 127 MULTIPOLYGON (((600138.493 4516909.499, 600138... Queens Whitestone
127 128 MULTIPOLYGON (((593231.553 4515088.539, 593306... Queens Steinway
128 129 MULTIPOLYGON (((579051.030 4495284.647, 579062... Staten Island Rosebank

129 rows × 4 columns

gdf.crs
<Projected CRS: EPSG:26918>
Name: NAD83 / UTM zone 18N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: North America - between 78°W and 72°W - onshore and offshore. Canada - Nunavut; Ontario; Quebec. United States (USA) - Connecticut; Delaware; Maryland; Massachusetts; New Hampshire; New Jersey; New York; North Carolina; Pennsylvania; Virginia; Vermont.
- bounds: (-78.0, 28.28, -72.0, 84.0)
Coordinate Operation:
- name: UTM zone 18N
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

7.3. Writing files

out_dir = os.path.expanduser('~/Downloads')
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
out_json = os.path.join(out_dir, 'nyc_neighborhoods.geojson')
gdf.to_file(out_json, driver="GeoJSON")
out_shp = os.path.join(out_dir, 'nyc_neighborhoods.shp')
gdf.to_file(out_shp)
gdf.crs
<Projected CRS: EPSG:26918>
Name: NAD83 / UTM zone 18N
Axis Info [cartesian]:
- E[east]: Easting (metre)
- N[north]: Northing (metre)
Area of Use:
- name: North America - between 78°W and 72°W - onshore and offshore. Canada - Nunavut; Ontario; Quebec. United States (USA) - Connecticut; Delaware; Maryland; Massachusetts; New Hampshire; New Jersey; New York; North Carolina; Pennsylvania; Virginia; Vermont.
- bounds: (-78.0, 28.28, -72.0, 84.0)
Coordinate Operation:
- name: UTM zone 18N
- method: Transverse Mercator
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich

7.4. Measuring area

gdf = gdf.set_index("name")
gdf["area"] = gdf.area
gdf["area"]
name
Bensonhurst                 5.828014e+06
East Village                1.632117e+06
West Village                1.044615e+06
Throggs Neck                8.274104e+06
Wakefield-Williamsbridge    6.925334e+06
                                ...     
Red Hook                    1.146229e+05
Douglastown-Little Neck     1.753248e+07
Whitestone                  2.804182e+04
Steinway                    1.232809e+06
Rosebank                    4.855008e+04
Name: area, Length: 129, dtype: float64

7.5. Getting polygon bounary

gdf['boundary'] = gdf.boundary
gdf['boundary']
name
Bensonhurst                 MULTILINESTRING ((582771.426 4495167.427, 5846...
East Village                MULTILINESTRING ((585508.753 4509691.267, 5868...
West Village                MULTILINESTRING ((583263.278 4509242.626, 5832...
Throggs Neck                MULTILINESTRING ((597640.009 4520272.720, 5976...
Wakefield-Williamsbridge    MULTILINESTRING ((595285.205 4525938.798, 5953...
                                                  ...                        
Red Hook                    MULTILINESTRING ((584212.898 4502321.474, 5843...
Douglastown-Little Neck     MULTILINESTRING ((605082.288 4513540.148, 6050...
Whitestone                  MULTILINESTRING ((600138.493 4516909.499, 6001...
Steinway                    MULTILINESTRING ((593231.553 4515088.539, 5933...
Rosebank                    MULTILINESTRING ((579051.030 4495284.647, 5790...
Name: boundary, Length: 129, dtype: geometry

7.6. Getting polygon centroid

gdf['centroid'] = gdf.centroid
gdf['centroid']
name
Bensonhurst                 POINT (584653.928 4495863.485)
East Village                POINT (585979.191 4508859.571)
West Village                POINT (583832.311 4509898.957)
Throggs Neck                POINT (599226.101 4519629.728)
Wakefield-Williamsbridge    POINT (596720.653 4527170.627)
                                         ...              
Red Hook                    POINT (583382.229 4501563.758)
Douglastown-Little Neck     POINT (607360.858 4511731.933)
Whitestone                  POINT (599563.892 4517128.417)
Steinway                    POINT (592429.851 4515507.528)
Rosebank                    POINT (579260.556 4495409.906)
Name: centroid, Length: 129, dtype: geometry

7.7. Making maps

gdf.plot()
<AxesSubplot:>
../_images/geopandas_27_1.png
gdf.plot("area", legend=True, figsize=(10, 8))
<AxesSubplot:>
../_images/geopandas_28_1.png
gdf = gdf.set_geometry("centroid")
gdf.plot("area", legend=True,figsize=(10, 8))
<AxesSubplot:>
../_images/geopandas_29_1.png
ax = gdf["geom"].plot(figsize=(10, 8))
gdf["centroid"].plot(ax=ax, color="black")
<AxesSubplot:>
../_images/geopandas_30_1.png
gdf = gdf.set_geometry("geom")

7.8. Reprojecting data

sql = 'SELECT * FROM nyc_neighborhoods'
gdf = gpd.read_postgis(sql, con=engine)
gdf_crs = gdf.to_crs(epsg="4326")
gdf_crs
id geom boroname name
0 1 MULTIPOLYGON (((-74.02167 40.60318, -73.99913 ... Brooklyn Bensonhurst
1 2 MULTIPOLYGON (((-73.98734 40.73372, -73.97184 ... Manhattan East Village
2 3 MULTIPOLYGON (((-74.01399 40.72991, -74.01381 ... Manhattan West Village
3 4 MULTIPOLYGON (((-73.84204 40.82767, -73.84190 ... The Bronx Throggs Neck
4 5 MULTIPOLYGON (((-73.86910 40.87898, -73.86831 ... The Bronx Wakefield-Williamsbridge
... ... ... ... ...
124 125 MULTIPOLYGON (((-74.00367 40.66747, -74.00256 ... Brooklyn Red Hook
125 126 MULTIPOLYGON (((-73.75494 40.76612, -73.75483 ... Queens Douglastown-Little Neck
126 127 MULTIPOLYGON (((-73.81296 40.79708, -73.81296 ... Queens Whitestone
127 128 MULTIPOLYGON (((-73.89509 40.78149, -73.89419 ... Queens Steinway
128 129 MULTIPOLYGON (((-74.06562 40.60460, -74.06548 ... Staten Island Rosebank

129 rows × 4 columns

geojson = gdf_crs.__geo_interface__

7.9. Displaying data on an interative map

import geemap
m = geemap.Map(center=[40.7341, -73.9113], zoom=10, ee_initialize=False)
m
style = {
    "stroke": True,
    "color": "#000000",
    "weight": 2,
    "opacity": 1,
    "fill": True,
    "fillColor": "#0000ff",
    "fillOpacity": 0.4,
}
m.add_geojson(geojson, style=style, layer_name="nyc neighborhoods")
sql2 = 'SELECT * FROM nyc_subway_stations'
gdf_subway = gpd.read_postgis(sql2, con=engine)
gdf_subway_crs = gdf_subway.to_crs(epsg="4326")
subway_geojson = gdf_subway_crs.__geo_interface__
m.add_geojson(subway_geojson, layer_name="nyc subway stations")