suppressWarnings(library(SfSpHelpers))
suppressWarnings(library(ggplot2))
suppressWarnings(library(dplyr))
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
suppressWarnings(library(magrittr))

Data

Caches results to defaul cache + downloads a large zip


  shpBuildings <- cache_csv_sf_wrapper('eval_fonciere_mtl', 
                                       fun = get_zipped_remote_shapefile,
                                       url = 'https://data.montreal.ca/dataset/4ad6baea-4d2c-460f-a8bf-5d000db498f7/resource/43c2cccf-a439-429b-a3c8-5d4ebce53e1b/download/uniteevaluationfonciere.zip',
                                       id_name='ID_UEV')
#> Loading required package: glue
#> Loading required package: readr
#> Loading required package: sf
#> Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 8.2.1; sf_use_s2() is TRUE
#> Loading required package: purrr
#> 
#> Attaching package: 'purrr'
#> The following object is masked from 'package:magrittr':
#> 
#>     set_names
#> [1] "Using default cache: /home/runner/.cache ..."
#> /home/runner/.cache/eval_fonciere_mtl.zip does not exist yet - running get_zipped_remote_shapefile ...
#> [1] "Downloading to tmp directory"
#> Warning in fun(...): Warning! fuckup with tmp files: forcewriting to data dir
#> Reading layer `uniteevaluationfonciere' from data source `/tmp/RtmpLQ7v32' using driver `ESRI Shapefile'
#> Simple feature collection with 501591 features and 18 fields
#> Geometry type: MULTIPOLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 267585.6 ymin: 5029232 xmax: 306671.1 ymax: 5062643
#> Projected CRS: NAD83 / MTM zone 8
#> Writing layer `eval_fonciere_mtl' to data source 
#>   `/home/runner/.cache/eval_fonciere_mtl.shp' using driver `ESRI Shapefile'
#> Writing 501591 features with 1 fields and geometry type Multi Polygon.
#> Saving results: 
#> /home/runner/.cache/eval_fonciere_mtl.csv
#> /home/runner/.cache/eval_fonciere_mtl.shp...

#Clean up 
shpBuildings %<>% mutate( ANNEE_CONS = plyr::mapvalues(ANNEE_CONS, from=9999, to=NA) )

#Sample
shpBuildings %<>% sample_frac(size=0.001) 

Spatial k mean to thin out large number of points

Year built


  #No need to get the centroids, spatialKMeans does it automatically
  shpBuildingsAgg <- spatialKMeans (shpBuildings,
                            numCentroids = as.integer(nrow(shpBuildings)/10),
                            numClosestPoints = 10,
                            var="ANNEE_CONS",
                            aggFct=function(x) {median(x,na.rm = T)} #remove NAs since we introduced some in the chunk above
  )
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
#> `.name_repair` is omitted as of tibble 2.0.0.
#>  Using compatibility `.name_repair`.
#>  The deprecated feature was likely used in the SfSpHelpers package.
#>   Please report the issue to the authors.

  #Check same crs
  assertthat::are_equal(st_crs(shpBuildingsAgg), st_crs(shpBuildings))
#> [1] TRUE

  #Number of centroids
  assertthat::are_equal(nrow(shpBuildingsAgg) , 10**3)
#> [1] FALSE

  #Plot to see the difference
  shpBuildingsAgg$id <- 'aggregated'
  shpBuildings$id <- 'raw'
 

  shpBoth <- rbind(shpBuildingsAgg,
                   shpBuildings %>% dplyr::select( all_of(colnames(shpBuildingsAgg))))

 

  ggplot() +
    geom_sf(data=shpBoth, aes(col=ANNEE_CONS) ) +
    facet_wrap(~id, ncol=1) + 
    ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
    scale_color_viridis_c()

Area grouped by type


#Keep the most prevalent groups and make sure to have at least 10 observations so that propToKeep = 0.1 does not fail after
top_cat <- shpBuildings %>% st_drop_geometry() %>% count(LIBELLE_UT) %>% arrange(desc(n)) %>% top_n(n=3) %>% filter(n > 10)
#> Selecting by n

shpBuildingsAgg_by_build_type <- map_dfr(
  top_cat$LIBELLE_UT,
  function(x) {
    spatialKMeans (shp = shpBuildings %>%  filter(LIBELLE_UT == x ) ,
                 propToKeep = 0.1,
                 numClosestPoints = 10,
                 var="SUPERFICIE") %>% 
    mutate(LIBELLE_UT = x)
  }
)
#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x

#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x

#> Warning in st_centroid.sf(shp): st_centroid assumes attributes are constant over
#> geometries of x
 
ggplot() +
    geom_sf(data=shpBuildingsAgg_by_build_type, aes(col=SUPERFICIE) ) +
    facet_wrap(~LIBELLE_UT, ncol=1) +
    ggplot2::theme_minimal(base_family="Roboto Condensed", base_size=11.5) +
    scale_color_viridis_c()