Geographic Information Systems Asked on November 1, 2021
I was wondering if it’s possible to use the R package sf
for extracting the names of the fields in a file without reading it. For example, I would like to replicate the following behaviour:
system(paste0("ogrinfo -so ", system.file("shape/nc.shp", package = "sf"), " nc"))
#> INFO: Open of `/home/andrea/R/x86_64-pc-linux-gnu-library/3.6/sf/shape/nc.shp'
#> using driver `ESRI Shapefile' successful.
#> Layer name: nc
#> Metadata:
#> DBF_DATE_LAST_UPDATE=2016-10-26
#> Geometry: Polygon
#> Feature Count: 100
#> Extent: (-84.323853, 33.881992) - (-75.456978, 36.589649)
#> Layer SRS WKT:
#> GEOGCRS["NAD27",
#> DATUM["North American Datum 1927",
#> ELLIPSOID["Clarke 1866",6378206.4,294.978698213898,
#> LENGTHUNIT["metre",1]]],
#> PRIMEM["Greenwich",0,
#> ANGLEUNIT["degree",0.0174532925199433]],
#> CS[ellipsoidal,2],
#> AXIS["latitude",north,
#> ORDER[1],
#> ANGLEUNIT["degree",0.0174532925199433]],
#> AXIS["longitude",east,
#> ORDER[2],
#> ANGLEUNIT["degree",0.0174532925199433]],
#> ID["EPSG",4267]]
#> Data axis to CRS axis mapping: 2,1
#> AREA: Real (24.15)
#> PERIMETER: Real (24.15)
#> CNTY_: Real (24.15)
#> CNTY_ID: Real (24.15)
#> NAME: String (80.0)
#> FIPS: String (80.0)
#> FIPSNO: Real (24.15)
#> CRESS_ID: Integer (9.0)
#> BIR74: Real (24.15)
#> SID74: Real (24.15)
#> NWBIR74: Real (24.15)
#> BIR79: Real (24.15)
#> SID79: Real (24.15)
#> NWBIR79: Real (24.15)
without directly calling system or ogrinfo
.
If you use st_read
with a query that selects zero rows you get back an empty dataframe with all the columns intact:
> nc_sql = st_read(system.file("shape/nc.shp", package="sf"), query="select * from "nc" limit 0")
> dim(nc_sql)
[1] 0 15
> names(nc_sql)
[1] "AREA" "PERIMETER" "CNTY_" "CNTY_ID"
[5] "NAME" "FIPS" "FIPSNO" "CRESS_ID"
[9] "BIR74" "SID74" "NWBIR74" "BIR79"
[13] "SID79" "NWBIR79" "_ogr_geometry_"
If you are also interested in the column types you can get those even though the data frame has no rows:
> class(nc_sql$FIPS)
[1] "factor"
> class(nc_sql$FIPSNO)
[1] "numeric"
and the projection:
> st_crs(nc_sql)
Coordinate Reference System:
User input: 4267
wkt:
GEOGCS["GCS_North_American_1927",
DATUM["North_American_Datum_1927",
SPHEROID["Clarke_1866",6378206.4,294.9786982]],
PRIMEM["Greenwich",0],
UNIT["Degree",0.017453292519943295],
AUTHORITY["EPSG","4267"]]
I'm not sure if this counts as "without reading it", since at some level the code has to read the file. I don't think this reads the whole object and then drops all the rows, I think it is probably smart enough to stop after zero rows have been read. Would need to be tested to see how long it takes to read zero rows from a huge shapefile.
Another way which is basically wrapped around calling ogrinfo
is to use the gdalUtils
package:
> gdalUtils::ogrinfo(system.file("shape/nc.shp", package="sf"),al=TRUE,so=TRUE)
[1] "INFO: Open of `/home/rowlings/R/x86_64-pc-linux-gnu-library/3.6/sf/shape/nc.shp'"
[2] " using driver `ESRI Shapefile' successful."
[3] ""
[4] "Layer name: nc"
[5] "Metadata:"
[6] " DBF_DATE_LAST_UPDATE=2016-10-26"
[7] "Geometry: Polygon"
[8] "Feature Count: 100"
[9] "Extent: (-84.323853, 33.881992) - (-75.456978, 36.589649)"
[10] "Layer SRS WKT:"
[11] "GEOGCS["GCS_North_American_1927","
[12] " DATUM["North_American_Datum_1927","
[13] " SPHEROID["Clarke_1866",6378206.4,294.9786982]],"
[14] " PRIMEM["Greenwich",0],"
[15] " UNIT["Degree",0.017453292519943295],"
[16] " AUTHORITY["EPSG","4267"]]"
[17] "AREA: Real (24.15)"
[18] "PERIMETER: Real (24.15)"
[19] "CNTY_: Real (24.15)"
[20] "CNTY_ID: Real (24.15)"
[21] "NAME: String (80.0)"
[22] "FIPS: String (80.0)"
[23] "FIPSNO: Real (24.15)"
[24] "CRESS_ID: Integer (9.0)"
[25] "BIR74: Real (24.15)"
[26] "SID74: Real (24.15)"
[27] "NWBIR74: Real (24.15)"
[28] "BIR79: Real (24.15)"
[29] "SID79: Real (24.15)"
[30] "NWBIR79: Real (24.15)"
but you'd then have to analyse the output carefully to find the lines of the field names.
Answered by Spacedman on November 1, 2021
Get help from others!
Recent Answers
Recent Questions
© 2024 TransWikia.com. All rights reserved. Sites we Love: PCI Database, UKBizDB, Menu Kuliner, Sharing RPP