4. Working with Geometries

Setting up the conda env:

conda create -n sql python
conda activate sql
conda install ipython-sql sqlalchemy psycopg2 notebook pandas -c conda-forge

Sample dataset:

References:

4.1. Connecting to the database

%load_ext sql
import os
host = "localhost"
database = "nyc"
user = os.getenv('SQL_USER')
password = os.getenv('SQL_PASSWORD')
connection_string = f"postgresql://{user}:{password}@{host}/{database}"
%sql $connection_string
%%sql 

SELECT * FROM nyc_neighborhoods WHERE FALSE
 * postgresql://postgres:***@localhost/nyc
0 rows affected.
id geom boroname name

4.2. Creating geometries

%%sql

CREATE TABLE geometries (name varchar, geom geometry);

INSERT INTO geometries VALUES
  ('Point', 'POINT(0 0)'),
  ('Linestring', 'LINESTRING(0 0, 1 1, 2 1, 2 2)'),
  ('Polygon', 'POLYGON((0 0, 1 0, 1 1, 0 1, 0 0))'),
  ('PolygonWithHole', 'POLYGON((0 0, 10 0, 10 10, 0 10, 0 0),(1 1, 1 2, 2 2, 2 1, 1 1))'),
  ('Collection', 'GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0, 1 0, 1 1, 0 1, 0 0)))');

SELECT name, ST_AsText(geom) FROM geometries;
 * postgresql://postgres:***@localhost/nyc
(psycopg2.errors.DuplicateTable) relation "geometries" already exists

[SQL: CREATE TABLE geometries (name varchar, geom geometry);]
(Background on this error at: http://sqlalche.me/e/13/f405)

4.3. Metadata tables

%%sql 

SELECT * FROM spatial_ref_sys LIMIT 10
 * postgresql://postgres:***@localhost/nyc
10 rows affected.
srid auth_name auth_srid srtext proj4text
3819 EPSG 3819 GEOGCS["HD1909",DATUM["Hungarian_Datum_1909",SPHEROID["Bessel 1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]],TOWGS84[595.48,121.69,515.35,4.115,-2.9383,0.853,-3.408],AUTHORITY["EPSG","1024"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","3819"]] +proj=longlat +ellps=bessel +towgs84=595.48,121.69,515.35,4.115,-2.9383,0.853,-3.408 +no_defs
3821 EPSG 3821 GEOGCS["TWD67",DATUM["Taiwan_Datum_1967",SPHEROID["GRS 1967 Modified",6378160,298.25,AUTHORITY["EPSG","7050"]],AUTHORITY["EPSG","1025"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","3821"]] +proj=longlat +ellps=aust_SA +no_defs
3824 EPSG 3824 GEOGCS["TWD97",DATUM["Taiwan_Datum_1997",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","1026"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","3824"]] +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
3889 EPSG 3889 GEOGCS["IGRS",DATUM["Iraqi_Geospatial_Reference_System",SPHEROID["GRS 1980",6378137,298.257222101,AUTHORITY["EPSG","7019"]],TOWGS84[0,0,0,0,0,0,0],AUTHORITY["EPSG","1029"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","3889"]] +proj=longlat +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +no_defs
3906 EPSG 3906 GEOGCS["MGI 1901",DATUM["MGI_1901",SPHEROID["Bessel 1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]],TOWGS84[682,-203,480,0,0,0,0],AUTHORITY["EPSG","1031"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","3906"]] +proj=longlat +ellps=bessel +towgs84=682,-203,480,0,0,0,0 +no_defs
4001 EPSG 4001 GEOGCS["Unknown datum based upon the Airy 1830 ellipsoid",DATUM["Not_specified_based_on_Airy_1830_ellipsoid",SPHEROID["Airy 1830",6377563.396,299.3249646,AUTHORITY["EPSG","7001"]],AUTHORITY["EPSG","6001"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4001"]] +proj=longlat +ellps=airy +no_defs
4002 EPSG 4002 GEOGCS["Unknown datum based upon the Airy Modified 1849 ellipsoid",DATUM["Not_specified_based_on_Airy_Modified_1849_ellipsoid",SPHEROID["Airy Modified 1849",6377340.189,299.3249646,AUTHORITY["EPSG","7002"]],AUTHORITY["EPSG","6002"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4002"]] +proj=longlat +ellps=mod_airy +no_defs
4003 EPSG 4003 GEOGCS["Unknown datum based upon the Australian National Spheroid",DATUM["Not_specified_based_on_Australian_National_Spheroid",SPHEROID["Australian National Spheroid",6378160,298.25,AUTHORITY["EPSG","7003"]],AUTHORITY["EPSG","6003"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4003"]] +proj=longlat +ellps=aust_SA +no_defs
4004 EPSG 4004 GEOGCS["Unknown datum based upon the Bessel 1841 ellipsoid",DATUM["Not_specified_based_on_Bessel_1841_ellipsoid",SPHEROID["Bessel 1841",6377397.155,299.1528128,AUTHORITY["EPSG","7004"]],AUTHORITY["EPSG","6004"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4004"]] +proj=longlat +ellps=bessel +no_defs
4005 EPSG 4005 GEOGCS["Unknown datum based upon the Bessel Modified ellipsoid",DATUM["Not_specified_based_on_Bessel_Modified_ellipsoid",SPHEROID["Bessel Modified",6377492.018,299.1528128,AUTHORITY["EPSG","7005"]],AUTHORITY["EPSG","6005"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4005"]] +proj=longlat +a=6377492.018 +b=6356173.508712696 +no_defs
%%sql

SELECT * FROM geometry_columns
 * postgresql://postgres:***@localhost/nyc
6 rows affected.
f_table_catalog f_table_schema f_table_name f_geometry_column coord_dimension srid type
nyc public nyc_homicides geom 2 26918 POINT
nyc public nyc_census_blocks geom 2 26918 MULTIPOLYGON
nyc public nyc_neighborhoods geom 2 26918 MULTIPOLYGON
nyc public nyc_streets geom 2 26918 MULTILINESTRING
nyc public nyc_subway_stations geom 2 26918 POINT
nyc public geometries geom 2 0 GEOMETRY
%%sql 

SELECT name, ST_GeometryType(geom), ST_NDims(geom), ST_SRID(geom)
  FROM geometries;
 * postgresql://postgres:***@localhost/nyc
5 rows affected.
name st_geometrytype st_ndims st_srid
Point ST_Point 2 0
Linestring ST_LineString 2 0
Polygon ST_Polygon 2 0
PolygonWithHole ST_Polygon 2 0
Collection ST_GeometryCollection 2 0

4.4. Points

A spatial point represents a single location on the Earth. This point is represented by a single coordinate (including either 2-, 3- or 4-dimensions). Points are used to represent objects when the exact details, such as shape and size, are not important at the target scale. For example, cities on a map of the world can be described as points, while a map of a single state might represent cities as polygons.

%%sql

SELECT ST_AsText(geom)
  FROM geometries
  WHERE name = 'Point';
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
st_astext
POINT(0 0)

Some of the specific spatial functions for working with points are:

  • ST_X(geometry) returns the X ordinate

  • ST_Y(geometry) returns the Y ordinate

So, we can read the ordinates from a point like this:

%%sql

SELECT ST_X(geom), ST_Y(geom)
  FROM geometries
  WHERE name = 'Point';
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
st_x st_y
0.0 0.0
%%sql

SELECT name, ST_AsText(geom)
  FROM nyc_subway_stations
  LIMIT 10;
 * postgresql://postgres:***@localhost/nyc
10 rows affected.
name st_astext
Cortlandt St POINT(583521.854408956 4507077.86259909)
Rector St POINT(583324.48663246 4506805.37316021)
South Ferry POINT(583304.182399475 4506069.65404812)
138th St POINT(590250.10594797 4518558.01992433)
149th St POINT(590454.739989117 4519145.71961785)
149th St POINT(590465.893419111 4519168.6974832)
161st St POINT(590573.169495527 4520214.76617728)
167th St POINT(591252.83141041 4520950.35335555)
167th St POINT(590946.3972263 4521077.31897688)
170th St POINT(591583.611145281 4521434.84662681)

4.5. Linestrings

A linestring is a path between locations. It takes the form of an ordered series of two or more points. Roads and rivers are typically represented as linestrings. A linestring is said to be closed if it starts and ends on the same point. It is said to be simple if it does not cross or touch itself (except at its endpoints if it is closed). A linestring can be both closed and simple.

The street network for New York (nyc_streets) was loaded earlier in the workshop. This dataset contains details such as name, and type. A single real world street may consist of many linestrings, each representing a segment of road with different attributes.

The following SQL query will return the geometry associated with one linestring (in the ST_AsText column).

%%sql

SELECT ST_AsText(geom)
  FROM geometries
  WHERE name = 'Linestring';
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
st_astext
LINESTRING(0 0,1 1,2 1,2 2)

Some of the specific spatial functions for working with linestrings are:

  • ST_Length(geometry) returns the length of the linestring

  • ST_StartPoint(geometry) returns the first coordinate as a point

  • ST_EndPoint(geometry) returns the last coordinate as a point

  • ST_NPoints(geometry) returns the number of coordinates in the linestring

So, the length of our linestring is:

%%sql 

SELECT ST_Length(geom)
  FROM geometries
  WHERE name = 'Linestring';
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
st_length
3.414213562373095

4.6. Polygons

A polygon is a representation of an area. The outer boundary of the polygon is represented by a ring. This ring is a linestring that is both closed and simple as defined above. Holes within the polygon are also represented by rings.

Polygons are used to represent objects whose size and shape are important. City limits, parks, building footprints or bodies of water are all commonly represented as polygons when the scale is sufficiently high to see their area. Roads and rivers can sometimes be represented as polygons.

The following SQL query will return the geometry associated with one polygon (in the ST_AsText column).

%%sql

SELECT ST_AsText(geom)
  FROM geometries
  WHERE name LIKE 'Polygon%';
 * postgresql://postgres:***@localhost/nyc
2 rows affected.
st_astext
POLYGON((0 0,1 0,1 1,0 1,0 0))
POLYGON((0 0,10 0,10 10,0 10,0 0),(1 1,1 2,2 2,2 1,1 1))

Some of the specific spatial functions for working with polygons are:

  • ST_Area(geometry) returns the area of the polygons

  • ST_NRings(geometry) returns the number of rings (usually 1, more of there are holes)

  • ST_ExteriorRing(geometry) returns the outer ring as a linestring

  • ST_InteriorRingN(geometry,n) returns a specified interior ring as a linestring

  • ST_Perimeter(geometry) returns the length of all the rings

We can calculate the area of our polygons using the area function:

%%sql

SELECT name, ST_Area(geom)
  FROM geometries
  WHERE name LIKE 'Polygon%';
 * postgresql://postgres:***@localhost/nyc
2 rows affected.
name st_area
Polygon 1.0
PolygonWithHole 99.0

4.7. Collections

There are four collection types, which group multiple simple geometries into sets.

  • MultiPoint, a collection of points

  • MultiLineString, a collection of linestrings

  • MultiPolygon, a collection of polygons

  • GeometryCollection, a heterogeneous collection of any geometry (including other collections)

Collections are another concept that shows up in GIS software more than in generic graphics software. They are useful for directly modeling real world objects as spatial objects. For example, how to model a lot that is split by a right-of-way? As a MultiPolygon, with a part on either side of the right-of-way.

Our example collection contains a polygon and a point:

%%sql

SELECT name, ST_AsText(geom)
  FROM geometries
  WHERE name = 'Collection';
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
name st_astext
Collection GEOMETRYCOLLECTION(POINT(2 0),POLYGON((0 0,1 0,1 1,0 1,0 0)))

Some of the specific spatial functions for working with collections are:

  • ST_NumGeometries(geometry) returns the number of parts in the collection

  • ST_GeometryN(geometry,n) returns the specified part

  • ST_Area(geometry) returns the total area of all polygonal parts

  • ST_Length(geometry) returns the total length of all linear parts

4.8. Geometry Input and Output

Within the database, geometries are stored on disk in a format only used by the PostGIS program. In order for external programs to insert and retrieve useful geometries, they need to be converted into a format that other applications can understand. Fortunately, PostGIS supports emitting and consuming geometries in a large number of formats:

  • Well-known text (WKT)

    • ST_GeomFromText(text, srid) returns geometry

    • ST_AsText(geometry) returns text

    • ST_AsEWKT(geometry) returns text

  • Well-known binary (WKB)

    • ST_GeomFromWKB(bytea) returns geometry

    • ST_AsBinary(geometry) returns bytea

    • ST_AsEWKB(geometry) returns bytea

  • Geographic Mark-up Language (GML)

    • ST_GeomFromGML(text) returns geometry

    • ST_AsGML(geometry) returns text

  • Keyhole Mark-up Language (KML)

    • ST_GeomFromKML(text) returns geometry

    • ST_AsKML(geometry) returns text

  • GeoJSON

    • ST_AsGeoJSON(geometry) returns text

  • Scalable Vector Graphics (SVG)

    • ST_AsSVG(geometry) returns text

In addition to the ST_GeometryFromText function, there are many other ways to create geometries from well-known text or similar formatted inputs:

%%sql

-- Using ST_GeomFromText with the SRID parameter
SELECT ST_GeomFromText('POINT(2 2)',4326);

-- Using ST_GeomFromText without the SRID parameter
SELECT ST_SetSRID(ST_GeomFromText('POINT(2 2)'),4326);

-- Using a ST_Make* function
SELECT ST_SetSRID(ST_MakePoint(2, 2), 4326);

-- Using PostgreSQL casting syntax and ISO WKT
SELECT ST_SetSRID('POINT(2 2)'::geometry, 4326);

-- Using PostgreSQL casting syntax and extended WKT
SELECT 'SRID=4326;POINT(2 2)'::geometry;
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
1 rows affected.
geometry
0101000020E610000000000000000000400000000000000040

4.9. Casting from Text

The WKT strings we’ve see so far have been of type ‘text’ and we have been converting them to type ‘geometry’ using PostGIS functions like ST_GeomFromText().

PostgreSQL includes a short form syntax that allows data to be converted from one type to another, the casting syntax, oldata::newtype. So for example, this SQL converts a double into a text string.

%%sql

SELECT 0.9::text;
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
text
0.9

Less trivially, this SQL converts a WKT string into a geometry:

%%sql

SELECT 'POINT(0 0)'::geometry;
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
geometry
010100000000000000000000000000000000000000

One thing to note about using casting to create geometries: unless you specify the SRID, you will get a geometry with an unknown SRID. You can specify the SRID using the “extended” well-known text form, which includes an SRID block at the front:

%%sql

SELECT 'SRID=4326;POINT(0 0)'::geometry;
 * postgresql://postgres:***@localhost/nyc
1 rows affected.
geometry
0101000020E610000000000000000000000000000000000000

4.10. Function List

ST_Area: Returns the area of the surface if it is a polygon or multi-polygon. For “geometry” type area is in SRID units. For “geography” area is in square meters.

ST_AsText: Returns the Well-Known Text (WKT) representation of the geometry/geography without SRID metadata.

ST_AsBinary: Returns the Well-Known Binary (WKB) representation of the geometry/geography without SRID meta data.

ST_EndPoint: Returns the last point of a LINESTRING geometry as a POINT.

ST_AsEWKB: Returns the Well-Known Binary (WKB) representation of the geometry with SRID meta data.

ST_AsEWKT: Returns the Well-Known Text (WKT) representation of the geometry with SRID meta data.

ST_AsGeoJSON: Returns the geometry as a GeoJSON element.

ST_AsGML: Returns the geometry as a GML version 2 or 3 element.

ST_AsKML: Returns the geometry as a KML element. Several variants. Default version=2, default precision=15.

ST_AsSVG: Returns a Geometry in SVG path data given a geometry or geography object.

ST_ExteriorRing: Returns a line string representing the exterior ring of the POLYGON geometry. Return NULL if the geometry is not a polygon. Will not work with MULTIPOLYGON

ST_GeometryN: Returns the 1-based Nth geometry if the geometry is a GEOMETRYCOLLECTION, MULTIPOINT, MULTILINESTRING, MULTICURVE or MULTIPOLYGON. Otherwise, return NULL.

ST_GeomFromGML: Takes as input GML representation of geometry and outputs a PostGIS geometry object.

ST_GeomFromKML: Takes as input KML representation of geometry and outputs a PostGIS geometry object

ST_GeomFromText: Returns a specified ST_Geometry value from Well-Known Text representation (WKT).

ST_GeomFromWKB: Creates a geometry instance from a Well-Known Binary geometry representation (WKB) and optional SRID.

ST_GeometryType: Returns the geometry type of the ST_Geometry value.

ST_InteriorRingN: Returns the Nth interior linestring ring of the polygon geometry. Return NULL if the geometry is not a polygon or the given N is out of range.

ST_Length: Returns the 2d length of the geometry if it is a linestring or multilinestring. geometry are in units of spatial reference and geogra