Data
Caches results to defaul cache + downloads a large zip
shpBuildings <- cache_csv_sf_wrapper('eval_fonciere_mtl',
fun = get_zipped_remote_shapefile,
url = 'https://data.montreal.ca/dataset/4ad6baea-4d2c-460f-a8bf-5d000db498f7/resource/43c2cccf-a439-429b-a3c8-5d4ebce53e1b/download/uniteevaluationfonciere.zip',
id_name='ID_UEV')
#> Loading required package: glue
#> Loading required package: readr
#> Loading required package: sf
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
#> Loading required package: purrr
#>
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#>
#> set_names
#> [1] "Using default cache: /home/runner/.cache ..."
#> /home/runner/.cache/eval_fonciere_mtl.zip does not exist yet - running get_zipped_remote_shapefile ...
#> [1] "Downloading to tmp directory"
#> Warning in fun(...): Warning! fuckup with tmp files: forcewriting to data dir
#> Reading layer `uniteevaluationfonciere' from data source `/tmp/RtmpLQ7v32' using driver `ESRI Shapefile'
#> Simple feature collection with 501591 features and 18 fields
#> Geometry type: MULTIPOLYGON
#> Dimension: XY
#> Bounding box: xmin: 267585.6 ymin: 5029232 xmax: 306671.1 ymax: 5062643
#> Projected CRS: NAD83 / MTM zone 8
#> Writing layer `eval_fonciere_mtl' to data source
#> `/home/runner/.cache/eval_fonciere_mtl.shp' using driver `ESRI Shapefile'
#> Writing 501591 features with 1 fields and geometry type Multi Polygon.
#> Saving results:
#> /home/runner/.cache/eval_fonciere_mtl.csv
#> /home/runner/.cache/eval_fonciere_mtl.shp...
#Clean up
shpBuildings %<>% mutate( ANNEE_CONS = plyr::mapvalues(ANNEE_CONS, from=9999, to=NA) )
#Sample
shpBuildings %<>% sample_frac(size=0.001)
Spatial k mean to thin out large number of points
Year built
#No need to get the centroids, spatialKMeans does it automatically
shpBuildingsAgg <- spatialKMeans (shpBuildings,
numCentroids = as.integer(nrow(shpBuildings)/10),
numClosestPoints = 10,
var="ANNEE_CONS",
aggFct=function(x) {median(x,na.rm = T)} #remove NAs since we introduced some in the chunk above
)
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#> ℹ Using compatibility `.name_repair`.
#> ℹ The deprecated feature was likely used in the SfSpHelpers package.
#> Please report the issue to the authors.
#Check same crs
assertthat::are_equal(st_crs(shpBuildingsAgg), st_crs(shpBuildings))
#> [1] TRUE
#Number of centroids
assertthat::are_equal(nrow(shpBuildingsAgg) , 10**3)
#> [1] FALSE
#Plot to see the difference
shpBuildingsAgg$id <- 'aggregated'
shpBuildings$id <- 'raw'
shpBoth <- rbind(shpBuildingsAgg,
shpBuildings %>% dplyr::select( all_of(colnames(shpBuildingsAgg))))
ggplot() +
geom_sf(data=shpBoth, aes(col=ANNEE_CONS) ) +
facet_wrap(~id, ncol=1) +
ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
scale_color_viridis_c()

Area grouped by type
#Keep the most prevalent groups and make sure to have at least 10 observations so that propToKeep = 0.1 does not fail after
top_cat <- shpBuildings %>% st_drop_geometry() %>% count(LIBELLE_UT) %>% arrange(desc(n)) %>% top_n(n=3) %>% filter(n > 10)
#> Selecting by n
shpBuildingsAgg_by_build_type <- map_dfr(
top_cat$LIBELLE_UT,
function(x) {
spatialKMeans (shp = shpBuildings %>% filter(LIBELLE_UT == x ) ,
propToKeep = 0.1,
numClosestPoints = 10,
var="SUPERFICIE") %>%
mutate(LIBELLE_UT = x)
}
)
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
ggplot() +
geom_sf(data=shpBuildingsAgg_by_build_type, aes(col=SUPERFICIE) ) +
facet_wrap(~LIBELLE_UT, ncol=1) +
ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
scale_color_viridis_c()
