In [2]:
Copied!
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold 
from sklearn.model_selection import LeaveOneGroupOut 
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from spatialkfold.blocks import spatial_blocks 
from spatialkfold.datasets import load_ames
from spatialkfold.clusters import spatial_kfold_clusters 
from spatialkfold.plotting import spatial_kfold_plot
from spatialkfold.stats import spatial_kfold_stats
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold 
from sklearn.model_selection import LeaveOneGroupOut 
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from spatialkfold.blocks import spatial_blocks 
from spatialkfold.datasets import load_ames
from spatialkfold.clusters import spatial_kfold_clusters 
from spatialkfold.plotting import spatial_kfold_plot
from spatialkfold.stats import spatial_kfold_stats
I. Spatial Resampling¶
In [3]:
Copied!
ames = load_ames()
ames = load_ames()
In [4]:
Copied!
ames.crs
ames.crs
Out[4]:
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
In [5]:
Copied!
ames
ames
Out[5]:
| MS_SubClass | MS_Zoning | Lot_Frontage | Lot_Area | Street | Alley | Lot_Shape | Land_Contour | Utilities | Lot_Config | ... | Pool_QC | Fence | Misc_Feature | Misc_Val | Mo_Sold | Year_Sold | Sale_Type | Sale_Condition | Sale_Price | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 141.0 | 31770 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | None | 0 | 5 | 2010 | WD | Normal | 215000 | POINT (-93.61975 42.05403) | 
| 1 | One_Story_1946_and_Newer_All_Styles | Residential_High_Density | 80.0 | 11622 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 6 | 2010 | WD | Normal | 105000 | POINT (-93.61976 42.05301) | 
| 2 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 81.0 | 14267 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | Gar2 | 12500 | 6 | 2010 | WD | Normal | 172000 | POINT (-93.61939 42.05266) | 
| 3 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 93.0 | 11160 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | None | 0 | 4 | 2010 | WD | Normal | 244000 | POINT (-93.61732 42.05125) | 
| 4 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 13830 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 3 | 2010 | WD | Normal | 189900 | POINT (-93.63893 42.06090) | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 2925 | Split_or_Multilevel | Residential_Low_Density | 37.0 | 7937 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | CulDSac | ... | No_Pool | Good_Privacy | None | 0 | 3 | 2006 | WD | Normal | 142500 | POINT (-93.60478 41.98896) | 
| 2926 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 0.0 | 8885 | Pave | No_Alley_Access | Slightly_Irregular | Low | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 6 | 2006 | WD | Normal | 131000 | POINT (-93.60268 41.98831) | 
| 2927 | Split_Foyer | Residential_Low_Density | 62.0 | 10441 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | Shed | 700 | 7 | 2006 | WD | Normal | 132000 | POINT (-93.60685 41.98651) | 
| 2928 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 77.0 | 10010 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | No_Fence | None | 0 | 4 | 2006 | WD | Normal | 170000 | POINT (-93.60019 41.99092) | 
| 2929 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 9627 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | No_Fence | None | 0 | 11 | 2006 | WD | Normal | 188000 | POINT (-93.60000 41.98927) | 
2930 rows × 73 columns
In [6]:
Copied!
# Reproject to the approriate coordinate reference system 
# Reproject to the approriate coordinate reference system 
In [7]:
Copied!
ames_prj = ames.copy().to_crs(ames.estimate_utm_crs())
ames_prj = ames.copy().to_crs(ames.estimate_utm_crs())
In [8]:
Copied!
ames_prj.crs
ames_prj.crs
Out[8]:
<Derived Projected CRS: EPSG:32615> Name: WGS 84 / UTM zone 15N Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 96°W and 90°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Ontario. Ecuador -Galapagos. Guatemala. Mexico. United States (USA). - bounds: (-96.0, 0.0, -90.0, 84.0) Coordinate Operation: - name: UTM zone 15N - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
In [9]:
Copied!
# Add an id column for each data point 
# Add an id column for each data point 
In [10]:
Copied!
ames_prj['id'] = range(len(ames_prj))
ames_prj['id'] = range(len(ames_prj))
1. Spatial cluster resampling¶
Two clustering algorithms are supported:
- KMeans (By default)
- BisectingKMeans
1.1 Using KMeans¶
In [11]:
Copied!
ames_clusters = spatial_kfold_clusters(
    gdf=ames_prj, 
    name='id', 
    nfolds= 10, 
    algorithm='kmeans', 
    n_init="auto", 
    random_state =569) 
ames_clusters = spatial_kfold_clusters(
    gdf=ames_prj, 
    name='id', 
    nfolds= 10, 
    algorithm='kmeans', 
    n_init="auto", 
    random_state =569) 
In [12]:
Copied!
ames_clusters
ames_clusters
Out[12]:
| MS_SubClass | MS_Zoning | Lot_Frontage | Lot_Area | Street | Alley | Lot_Shape | Land_Contour | Utilities | Lot_Config | ... | Misc_Feature | Misc_Val | Mo_Sold | Year_Sold | Sale_Type | Sale_Condition | Sale_Price | geometry | id | folds | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 141.0 | 31770 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | None | 0 | 5 | 2010 | WD | Normal | 215000 | POINT (448716.789 4655961.485) | 0 | 9 | 
| 1 | One_Story_1946_and_Newer_All_Styles | Residential_High_Density | 80.0 | 11622 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 6 | 2010 | WD | Normal | 105000 | POINT (448715.802 4655848.124) | 1 | 9 | 
| 2 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 81.0 | 14267 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | Gar2 | 12500 | 6 | 2010 | WD | Normal | 172000 | POINT (448746.026 4655808.487) | 2 | 9 | 
| 3 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 93.0 | 11160 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Corner | ... | None | 0 | 4 | 2010 | WD | Normal | 244000 | POINT (448915.962 4655650.253) | 3 | 9 | 
| 4 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 13830 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Inside | ... | None | 0 | 3 | 2010 | WD | Normal | 189900 | POINT (447135.458 4656735.276) | 4 | 2 | 
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | 
| 2925 | Split_or_Multilevel | Residential_Low_Density | 37.0 | 7937 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | CulDSac | ... | None | 0 | 3 | 2006 | WD | Normal | 142500 | POINT (449905.137 4648727.785) | 2925 | 3 | 
| 2926 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 0.0 | 8885 | Pave | No_Alley_Access | Slightly_Irregular | Low | AllPub | Inside | ... | None | 0 | 6 | 2006 | WD | Normal | 131000 | POINT (450078.246 4648654.392) | 2926 | 3 | 
| 2927 | Split_Foyer | Residential_Low_Density | 62.0 | 10441 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | Shed | 700 | 7 | 2006 | WD | Normal | 132000 | POINT (449731.661 4648456.532) | 2927 | 3 | 
| 2928 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 77.0 | 10010 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 4 | 2006 | WD | Normal | 170000 | POINT (450286.530 4648942.397) | 2928 | 3 | 
| 2929 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 9627 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 11 | 2006 | WD | Normal | 188000 | POINT (450301.311 4648758.419) | 2929 | 3 | 
2930 rows × 75 columns
In [13]:
Copied!
# Get the 'tab20' colormap
cols_tab = cm.get_cmap('tab20', 10)
# Generate a list of colors from the colormap
cols = [cols_tab(i) for i in range(10)]
# create a color ramp
color_ramp = ListedColormap(cols)
# Get the 'tab20' colormap
cols_tab = cm.get_cmap('tab20', 10)
# Generate a list of colors from the colormap
cols = [cols_tab(i) for i in range(10)]
# create a color ramp
color_ramp = ListedColormap(cols)
In [14]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_clusters.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing KMeans')
plt.show()
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_clusters.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing KMeans')
plt.show()
In [15]:
Copied!
# check the number of train and test of the dependent variable for each fold
# check the number of train and test of the dependent variable for each fold
In [16]:
Copied!
ames_clusters_stats = spatial_kfold_stats(X=ames_clusters, y=ames_clusters.Sale_Price, groups=ames_clusters.folds)
ames_clusters_stats = spatial_kfold_stats(X=ames_clusters, y=ames_clusters.Sale_Price, groups=ames_clusters.folds)
In [17]:
Copied!
ames_clusters_stats
ames_clusters_stats
Out[17]:
| split | train | test | |
|---|---|---|---|
| 0 | 1 | 2645 | 285 | 
| 1 | 2 | 2427 | 503 | 
| 2 | 3 | 2769 | 161 | 
| 3 | 4 | 2645 | 285 | 
| 4 | 5 | 2577 | 353 | 
| 5 | 6 | 2568 | 362 | 
| 6 | 7 | 2824 | 106 | 
| 7 | 8 | 2658 | 272 | 
| 8 | 9 | 2640 | 290 | 
| 9 | 10 | 2617 | 313 | 
1.2 Using BisectingKMeans¶
In [18]:
Copied!
ames_clusters_Biskmeans = spatial_kfold_clusters(
    gdf=ames_prj, 
    name='id', 
    nfolds=10, 
    algorithm='bisectingkmeans',
    random_state =569) 
ames_clusters_Biskmeans = spatial_kfold_clusters(
    gdf=ames_prj, 
    name='id', 
    nfolds=10, 
    algorithm='bisectingkmeans',
    random_state =569) 
In [19]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_clusters_Biskmeans.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing BisectingKMeans')
plt.show()
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_clusters_Biskmeans.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing BisectingKMeans')
plt.show()
In [20]:
Copied!
ames_clusters_stats_ = spatial_kfold_stats(X=ames_clusters_Biskmeans, y=ames_clusters_Biskmeans.Sale_Price, groups=ames_clusters_Biskmeans.folds)
ames_clusters_stats_ = spatial_kfold_stats(X=ames_clusters_Biskmeans, y=ames_clusters_Biskmeans.Sale_Price, groups=ames_clusters_Biskmeans.folds)
In [21]:
Copied!
ames_clusters_stats_
ames_clusters_stats_
Out[21]:
| split | train | test | |
|---|---|---|---|
| 0 | 1 | 2561 | 369 | 
| 1 | 2 | 2678 | 252 | 
| 2 | 3 | 2633 | 297 | 
| 3 | 4 | 2572 | 358 | 
| 4 | 5 | 2457 | 473 | 
| 5 | 6 | 2605 | 325 | 
| 6 | 7 | 2759 | 171 | 
| 7 | 8 | 2579 | 351 | 
| 8 | 9 | 2769 | 161 | 
| 9 | 10 | 2757 | 173 | 
In [22]:
Copied!
# create 10 random blocks 
ames_rnd_blocks = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500, 
  method="random",     # "continuous"
  orientation="tb-lr", # "bt-rl"
  grid_type="rect",    # "hex" 
  nfolds=10,
  random_state=135
  )
# create 10 random blocks 
ames_rnd_blocks = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500, 
  method="random",     # "continuous"
  orientation="tb-lr", # "bt-rl"
  grid_type="rect",    # "hex" 
  nfolds=10,
  random_state=135
  )
In [23]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax ,lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Random Blocks Folds')
fig, ax = plt.subplots(1,1 , figsize=(9, 4)) 
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax ,lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Random Blocks Folds')
Out[23]:
Text(0.5, 1.0, 'Random Blocks Folds')
In [24]:
Copied!
# resample the ames data with the prepared blocks 
ames_res_rnd_blk = gpd.overlay(ames_prj, ames_rnd_blocks)
# resample the ames data with the prepared blocks 
ames_res_rnd_blk = gpd.overlay(ames_prj, ames_rnd_blocks)
In [25]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=3)
ax.set_title('Spatially Resampled\nRandom Blocks')
plt.show()
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=3)
ax.set_title('Spatially Resampled\nRandom Blocks')
plt.show()
In [26]:
Copied!
fig, ax = plt.subplots(1,2 , figsize=(10, 6)) 
# plot 1
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk.geometry.x , ames_res_rnd_blk.geometry.y, c=ames_res_rnd_blk['folds'],
                 cmap=color_ramp, s=5)
axins1 = inset_axes(
    ax[1],
    width="5%",  # width: 5% of parent_bbox width
    height="50%",  # height: 50%
    loc="lower left",
    bbox_to_anchor=(1.05, 0, 1, 2),
    bbox_transform=ax[1].transAxes,
    borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
fig, ax = plt.subplots(1,2 , figsize=(10, 6)) 
# plot 1
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk.geometry.x , ames_res_rnd_blk.geometry.y, c=ames_res_rnd_blk['folds'],
                 cmap=color_ramp, s=5)
axins1 = inset_axes(
    ax[1],
    width="5%",  # width: 5% of parent_bbox width
    height="50%",  # height: 50%
    loc="lower left",
    bbox_to_anchor=(1.05, 0, 1, 2),
    bbox_transform=ax[1].transAxes,
    borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
In [27]:
Copied!
ames_res_rnd_blk_stats = spatial_kfold_stats(X=ames_res_rnd_blk, y=ames_res_rnd_blk.Sale_Price, groups=ames_res_rnd_blk.folds)
ames_res_rnd_blk_stats = spatial_kfold_stats(X=ames_res_rnd_blk, y=ames_res_rnd_blk.Sale_Price, groups=ames_res_rnd_blk.folds)
In [28]:
Copied!
ames_res_rnd_blk_stats
ames_res_rnd_blk_stats
Out[28]:
| split | train | test | |
|---|---|---|---|
| 0 | 1 | 2893 | 37 | 
| 1 | 2 | 2371 | 559 | 
| 2 | 3 | 2414 | 516 | 
| 3 | 4 | 2472 | 458 | 
| 4 | 5 | 2687 | 243 | 
| 5 | 6 | 2608 | 322 | 
| 6 | 7 | 2720 | 210 | 
| 7 | 8 | 2589 | 341 | 
| 8 | 9 | 2920 | 10 | 
| 9 | 10 | 2696 | 234 | 
- We could also use Hexagonal grid type instead
In [ ]:
Copied!
# create 10 random blocks 
ames_rnd_blocks_hex = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=0,            # If you use hex as grid_type, only the width is needed. If you provide height, it will be ignored 
  method="random",     # "continuous"
  orientation="tb-lr", # "bt-rl"
  grid_type="hex",   
  nfolds=10,
  random_state=135
  )
ames_res_rnd_blk_hex = gpd.overlay(ames_prj, ames_rnd_blocks_hex)
# create 10 random blocks 
ames_rnd_blocks_hex = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=0,            # If you use hex as grid_type, only the width is needed. If you provide height, it will be ignored 
  method="random",     # "continuous"
  orientation="tb-lr", # "bt-rl"
  grid_type="hex",   
  nfolds=10,
  random_state=135
  )
ames_res_rnd_blk_hex = gpd.overlay(ames_prj, ames_rnd_blocks_hex)
In [30]:
Copied!
fig, ax = plt.subplots(1,2 , figsize=(10, 6)) 
# plot 1
ames_rnd_blocks_hex.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks_hex.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk_hex.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk_hex.geometry.x , ames_res_rnd_blk_hex.geometry.y, c=ames_res_rnd_blk_hex['folds'],
                 cmap=color_ramp, s=5)
axins1 = inset_axes(
    ax[1],
    width="5%",  # width: 5% of parent_bbox width
    height="50%",  # height: 50%
    loc="lower left",
    bbox_to_anchor=(1.05, 0, 1, 2),
    bbox_transform=ax[1].transAxes,
    borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
fig, ax = plt.subplots(1,2 , figsize=(10, 6)) 
# plot 1
ames_rnd_blocks_hex.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks_hex.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk_hex.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk_hex.geometry.x , ames_res_rnd_blk_hex.geometry.y, c=ames_res_rnd_blk_hex['folds'],
                 cmap=color_ramp, s=5)
axins1 = inset_axes(
    ax[1],
    width="5%",  # width: 5% of parent_bbox width
    height="50%",  # height: 50%
    loc="lower left",
    bbox_to_anchor=(1.05, 0, 1, 2),
    bbox_transform=ax[1].transAxes,
    borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
2.2 Continuous spatial resampled blocks¶
Two option are availble with orientation :
- 'tb-lr' : top-bottom, left-right
- ''bt-rl' : bottom-top, right-left
2.2.1. 'tb-lr' : top-bottom, left-right¶
In [31]:
Copied!
ames_cont_blocks = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500,            
  method="continuous",    
  orientation="tb-lr",
  grid_type="rect",   
  nfolds=10,
  random_state=135
  )
ames_cont_blocks = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500,            
  method="continuous",    
  orientation="tb-lr",
  grid_type="rect",   
  nfolds=10,
  random_state=135
  )
In [32]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_cont_blocks.plot(column='folds', cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"tb-lr"')
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_cont_blocks.plot(column='folds', cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"tb-lr"')
Out[32]:
Text(0.5, 1.0, 'Continuous Blocks Folds\norientation:"tb-lr"')
In [33]:
Copied!
# resample the ames data with the prepared blocks 
ames_res_cont_blk = gpd.overlay(ames_prj, ames_cont_blocks)
# resample the ames data with the prepared blocks 
ames_res_cont_blk = gpd.overlay(ames_prj, ames_cont_blocks)
In [34]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_cont_blocks.plot(facecolor="none", edgecolor='grey', ax=ax ,lw=0.7)
ames_res_cont_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "tb-lr"')
plt.show()
fig, ax = plt.subplots(1, 1, figsize=(9, 4)) 
ames_cont_blocks.plot(facecolor="none", edgecolor='grey', ax=ax ,lw=0.7)
ames_res_cont_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "tb-lr"')
plt.show()
In [35]:
Copied!
ames_res_cont_blk_stats = spatial_kfold_stats(X=ames_res_cont_blk, y=ames_res_cont_blk.Sale_Price, groups=ames_res_cont_blk.folds)
ames_res_cont_blk_stats = spatial_kfold_stats(X=ames_res_cont_blk, y=ames_res_cont_blk.Sale_Price, groups=ames_res_cont_blk.folds)
In [36]:
Copied!
ames_res_cont_blk_stats
ames_res_cont_blk_stats
Out[36]:
| split | train | test | |
|---|---|---|---|
| 0 | 1 | 2446 | 484 | 
| 1 | 2 | 2628 | 302 | 
| 2 | 3 | 2801 | 129 | 
| 3 | 4 | 2567 | 363 | 
| 4 | 5 | 2367 | 563 | 
| 5 | 6 | 2402 | 528 | 
| 6 | 7 | 2894 | 36 | 
| 7 | 8 | 2444 | 486 | 
| 8 | 9 | 2896 | 34 | 
| 9 | 10 | 2925 | 5 | 
2.2.2. 'bt-rl' : bottom-top, right-left¶
In [37]:
Copied!
ames_cont_blocks_rev = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500,            
  method="continuous",    
  orientation='bt-rl',
  grid_type="rect",   
  nfolds=10,
  random_state=135
  )
ames_cont_blocks_rev = spatial_blocks(
  gdf=ames_prj, 
  width=1500, 
  height=1500,            
  method="continuous",    
  orientation='bt-rl',
  grid_type="rect",   
  nfolds=10,
  random_state=135
  )
In [38]:
Copied!
fig, ax = plt.subplots(1, 1 , figsize=(9, 4)) 
ames_cont_blocks_rev.plot(column='folds',cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"bt-rl"')
fig, ax = plt.subplots(1, 1 , figsize=(9, 4)) 
ames_cont_blocks_rev.plot(column='folds',cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"bt-rl"')
Out[38]:
Text(0.5, 1.0, 'Continuous Blocks Folds\norientation:"bt-rl"')
In [39]:
Copied!
# resample the ames data with the prepared blocks 
ames_res_cont_blk_rev = gpd.overlay(ames_prj, ames_cont_blocks_rev)
# resample the ames data with the prepared blocks 
ames_res_cont_blk_rev = gpd.overlay(ames_prj, ames_cont_blocks_rev)
In [40]:
Copied!
fig, ax = plt.subplots(1, 1 , figsize=(9, 4)) 
ames_cont_blocks_rev.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_cont_blk_rev.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "bt-rl"')
plt.show()
fig, ax = plt.subplots(1, 1 , figsize=(9, 4)) 
ames_cont_blocks_rev.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_cont_blk_rev.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "bt-rl"')
plt.show()
In [41]:
Copied!
ames_res_cont_blk_rev_stats = spatial_kfold_stats(X=ames_res_cont_blk_rev, y=ames_res_cont_blk_rev.Sale_Price, groups=ames_res_cont_blk_rev.folds)
ames_res_cont_blk_rev_stats = spatial_kfold_stats(X=ames_res_cont_blk_rev, y=ames_res_cont_blk_rev.Sale_Price, groups=ames_res_cont_blk_rev.folds)
In [42]:
Copied!
ames_res_cont_blk_rev_stats
ames_res_cont_blk_rev_stats
Out[42]:
| split | train | test | |
|---|---|---|---|
| 0 | 1 | 2892 | 38 | 
| 1 | 2 | 2443 | 487 | 
| 2 | 3 | 2776 | 154 | 
| 3 | 4 | 2350 | 580 | 
| 4 | 5 | 2306 | 624 | 
| 5 | 6 | 2758 | 172 | 
| 6 | 7 | 2841 | 89 | 
| 7 | 8 | 2772 | 158 | 
| 8 | 9 | 2783 | 147 | 
| 9 | 10 | 2449 | 481 | 
3. Plotting function¶
Plot the partitioning of the data at each fold
In [43]:
Copied!
# check the folds number 
np.unique(ames_clusters.folds.values)
# check the folds number 
np.unique(ames_clusters.folds.values)
Out[43]:
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=int32)
In [44]:
Copied!
for i in range(1,11):
    fig, ax = plt.subplots(1, 1 ,figsize=(9, 4))
    spatial_kfold_plot(X=ames_clusters, 
                       geometry=ames_clusters.geometry, 
                       groups=ames_clusters.folds, 
                       fold_num=i, cmap='viridis', ax=ax)
for i in range(1,11):
    fig, ax = plt.subplots(1, 1 ,figsize=(9, 4))
    spatial_kfold_plot(X=ames_clusters, 
                       geometry=ames_clusters.geometry, 
                       groups=ames_clusters.folds, 
                       fold_num=i, cmap='viridis', ax=ax)
II. Comparaison : Random VS Spatial Cross validataion¶
In [45]:
Copied!
x = ames_prj[['Year_Built', 'Bldg_Type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
le = preprocessing.LabelEncoder()
le.fit(x["Bldg_Type"])
list(le.classes_)
le.transform(ames_prj["Bldg_Type"])
x = ames_prj[['Year_Built', 'Bldg_Type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
le = preprocessing.LabelEncoder()
le.fit(x["Bldg_Type"])
list(le.classes_)
le.transform(ames_prj["Bldg_Type"])
Out[45]:
array([1, 1, 1, ..., 1, 1, 1])
In [46]:
Copied!
# Create a copy of the DataFrame and transform the 'Bldg_Type' column to float values
x_copy = x.copy()
x_copy['type'] = le.transform(x_copy["Bldg_Type"])
# Create a copy of the DataFrame and transform the 'Bldg_Type' column to float values
x_copy = x.copy()
x_copy['type'] = le.transform(x_copy["Bldg_Type"])
In [47]:
Copied!
# Get the independent variables and the dependent variable
X = x_copy[['Year_Built', 'type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
# Get the independent variables and the dependent variable
X = x_copy[['Year_Built', 'type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
In [48]:
Copied!
X
X
Out[48]:
| Year_Built | type | Gr_Liv_Area | |
|---|---|---|---|
| 0 | 1960 | 1 | 1656 | 
| 1 | 1961 | 1 | 896 | 
| 2 | 1958 | 1 | 1329 | 
| 3 | 1968 | 1 | 2110 | 
| 4 | 1997 | 1 | 1629 | 
| ... | ... | ... | ... | 
| 2925 | 1984 | 1 | 1003 | 
| 2926 | 1983 | 1 | 902 | 
| 2927 | 1992 | 1 | 970 | 
| 2928 | 1974 | 1 | 1389 | 
| 2929 | 1993 | 1 | 2000 | 
2930 rows × 3 columns
In [49]:
Copied!
y
y
Out[49]:
| Sale_Price | |
|---|---|
| 0 | 215000 | 
| 1 | 105000 | 
| 2 | 172000 | 
| 3 | 244000 | 
| 4 | 189900 | 
| ... | ... | 
| 2925 | 142500 | 
| 2926 | 131000 | 
| 2927 | 132000 | 
| 2928 | 170000 | 
| 2929 | 188000 | 
2930 rows × 1 columns
1. Random CV¶
In [50]:
Copied!
# Initiate a linear regressor 
reg = LinearRegression()
# Initiate a random CV
kf = KFold(n_splits = 10, shuffle=True, random_state=123)
# evaluate the model and collect the results
random_n_scores = cross_validate(reg, X, y.values.ravel(), 
                                scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'], 
                                cv=kf, n_jobs=-1, error_score='raise')
# Initiate a linear regressor 
reg = LinearRegression()
# Initiate a random CV
kf = KFold(n_splits = 10, shuffle=True, random_state=123)
# evaluate the model and collect the results
random_n_scores = cross_validate(reg, X, y.values.ravel(), 
                                scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'], 
                                cv=kf, n_jobs=-1, error_score='raise')
In [51]:
Copied!
random_n_scores
random_n_scores
Out[51]:
{'fit_time': array([0.0150876 , 0.01461196, 0.07606864, 0.01363134, 0.01567721,
        0.0122335 , 0.01297998, 0.00495601, 0.01281571, 0.0111711 ]),
 'score_time': array([0.01175451, 0.00338268, 0.02259111, 0.00947595, 0.0103538 ,
        0.01029325, 0.00297976, 0.01099586, 0.01003385, 0.00338864]),
 'test_neg_root_mean_squared_error': array([-44622.40838673, -39772.65742074, -44832.52140754, -45450.03692245,
        -50212.56009308, -49287.98884697, -39787.31970143, -52631.12777371,
        -58187.6988452 , -38817.22221497]),
 'test_r2': array([0.68186786, 0.65778112, 0.71220595, 0.68753325, 0.62837042,
        0.6002097 , 0.69541257, 0.59591238, 0.61584596, 0.71330735]),
 'test_neg_mean_absolute_error': array([-30212.75246135, -28792.38398508, -31229.5572116 , -32461.80017946,
        -32897.03779732, -32580.57849056, -29550.97789663, -35188.52372995,
        -35547.25333851, -28150.39632891])}
In [52]:
Copied!
rn_cv_r2 = np.mean(random_n_scores["test_r2"])
rn_cv_rmse = np.mean(abs(random_n_scores["test_neg_root_mean_squared_error"]))
rn_cv_mae = np.mean(abs(random_n_scores["test_neg_mean_absolute_error"]))
print('R2 :',rn_cv_r2)
print('RMSE :',rn_cv_rmse)
print('MAE :',rn_cv_mae)
rn_cv_r2 = np.mean(random_n_scores["test_r2"])
rn_cv_rmse = np.mean(abs(random_n_scores["test_neg_root_mean_squared_error"]))
rn_cv_mae = np.mean(abs(random_n_scores["test_neg_mean_absolute_error"]))
print('R2 :',rn_cv_r2)
print('RMSE :',rn_cv_rmse)
print('MAE :',rn_cv_mae)
R2 : 0.6588446547591043 RMSE : 46360.15416128094 MAE : 31661.12614193703
Spatial CV:¶
Using a Leave Region Out Cross vlidation
In [53]:
Copied!
# Initiate a leave group out cross valiadtion
group_cvs =  LeaveOneGroupOut()
# Evaluate the model and collect the results
spatial_cluster_scores = cross_validate(reg, X, y.values.ravel(), 
                                scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'], 
                                cv= group_cvs.split(X, y, ames_clusters.folds.values.ravel()), # spatial cross validation
                                n_jobs=-1, error_score='raise')
# Initiate a leave group out cross valiadtion
group_cvs =  LeaveOneGroupOut()
# Evaluate the model and collect the results
spatial_cluster_scores = cross_validate(reg, X, y.values.ravel(), 
                                scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'], 
                                cv= group_cvs.split(X, y, ames_clusters.folds.values.ravel()), # spatial cross validation
                                n_jobs=-1, error_score='raise')
In [54]:
Copied!
spatial_cluster_scores
spatial_cluster_scores
Out[54]:
{'fit_time': array([0.01505804, 0.01767373, 0.04996634, 0.01555181, 0.02358913,
        0.02461791, 0.01460123, 0.03371215, 0.02450013, 0.01760197]),
 'score_time': array([0.01214123, 0.0113101 , 0.01480627, 0.0123055 , 0.00381994,
        0.00370812, 0.00353646, 0.01049113, 0.00463915, 0.00336456]),
 'test_neg_root_mean_squared_error': array([-41955.22892416, -49279.43810801, -45746.18536977, -32873.9319184 ,
        -53884.87755416, -83784.012911  , -54303.03783829, -37589.61688976,
        -29506.44691955, -32487.42890339]),
 'test_r2': array([0.50685433, 0.58938426, 0.42016374, 0.4765155 , 0.08934565,
        0.2981662 , 0.37778871, 0.4231063 , 0.17653683, 0.09032787]),
 'test_neg_mean_absolute_error': array([-31754.19616039, -35086.44507158, -33609.13258888, -23596.48589712,
        -30780.25017551, -57463.25887411, -41568.85258424, -28509.48447583,
        -21597.55002658, -25546.92881414])}
In [55]:
Copied!
sp_cv_r2 = np.mean(spatial_cluster_scores["test_r2"])
sp_cv_rmse = np.mean(abs(spatial_cluster_scores["test_neg_root_mean_squared_error"]))
sp_cv_mae = np.mean(abs(spatial_cluster_scores["test_neg_mean_absolute_error"]))
print('R2 :',sp_cv_r2)
print('RMSE :',sp_cv_rmse)
print('MAE :',sp_cv_mae)
sp_cv_r2 = np.mean(spatial_cluster_scores["test_r2"])
sp_cv_rmse = np.mean(abs(spatial_cluster_scores["test_neg_root_mean_squared_error"]))
sp_cv_mae = np.mean(abs(spatial_cluster_scores["test_neg_mean_absolute_error"]))
print('R2 :',sp_cv_r2)
print('RMSE :',sp_cv_rmse)
print('MAE :',sp_cv_mae)
R2 : 0.344818938658986 RMSE : 46141.02053364941 MAE : 32951.25846683899
In [56]:
Copied!
random_n_scores['test_r2']
random_n_scores['test_r2']
Out[56]:
array([0.68186786, 0.65778112, 0.71220595, 0.68753325, 0.62837042,
       0.6002097 , 0.69541257, 0.59591238, 0.61584596, 0.71330735])
In [57]:
Copied!
df_rn_cv_scores = pd.DataFrame({'r2': random_n_scores['test_r2'], 
             'rmse' :abs(random_n_scores['test_neg_root_mean_squared_error']),
             'mae' : abs(random_n_scores["test_neg_mean_absolute_error"]),
            'cv_type' : 'Random CV'})
df_rn_cv_scores = pd.DataFrame({'r2': random_n_scores['test_r2'], 
             'rmse' :abs(random_n_scores['test_neg_root_mean_squared_error']),
             'mae' : abs(random_n_scores["test_neg_mean_absolute_error"]),
            'cv_type' : 'Random CV'})
In [58]:
Copied!
df_rn_cv_scores
df_rn_cv_scores
Out[58]:
| r2 | rmse | mae | cv_type | |
|---|---|---|---|---|
| 0 | 0.681868 | 44622.408387 | 30212.752461 | Random CV | 
| 1 | 0.657781 | 39772.657421 | 28792.383985 | Random CV | 
| 2 | 0.712206 | 44832.521408 | 31229.557212 | Random CV | 
| 3 | 0.687533 | 45450.036922 | 32461.800179 | Random CV | 
| 4 | 0.628370 | 50212.560093 | 32897.037797 | Random CV | 
| 5 | 0.600210 | 49287.988847 | 32580.578491 | Random CV | 
| 6 | 0.695413 | 39787.319701 | 29550.977897 | Random CV | 
| 7 | 0.595912 | 52631.127774 | 35188.523730 | Random CV | 
| 8 | 0.615846 | 58187.698845 | 35547.253339 | Random CV | 
| 9 | 0.713307 | 38817.222215 | 28150.396329 | Random CV | 
In [59]:
Copied!
df_sp_cv_scores = pd.DataFrame({'r2': spatial_cluster_scores['test_r2'], 
             'rmse' :abs(spatial_cluster_scores['test_neg_root_mean_squared_error']),
             'mae' : abs(spatial_cluster_scores["test_neg_mean_absolute_error"]),
            'cv_type' : 'Spatial CV'})
df_sp_cv_scores = pd.DataFrame({'r2': spatial_cluster_scores['test_r2'], 
             'rmse' :abs(spatial_cluster_scores['test_neg_root_mean_squared_error']),
             'mae' : abs(spatial_cluster_scores["test_neg_mean_absolute_error"]),
            'cv_type' : 'Spatial CV'})
In [60]:
Copied!
df_sp_cv_scores
df_sp_cv_scores
Out[60]:
| r2 | rmse | mae | cv_type | |
|---|---|---|---|---|
| 0 | 0.506854 | 41955.228924 | 31754.196160 | Spatial CV | 
| 1 | 0.589384 | 49279.438108 | 35086.445072 | Spatial CV | 
| 2 | 0.420164 | 45746.185370 | 33609.132589 | Spatial CV | 
| 3 | 0.476515 | 32873.931918 | 23596.485897 | Spatial CV | 
| 4 | 0.089346 | 53884.877554 | 30780.250176 | Spatial CV | 
| 5 | 0.298166 | 83784.012911 | 57463.258874 | Spatial CV | 
| 6 | 0.377789 | 54303.037838 | 41568.852584 | Spatial CV | 
| 7 | 0.423106 | 37589.616890 | 28509.484476 | Spatial CV | 
| 8 | 0.176537 | 29506.446920 | 21597.550027 | Spatial CV | 
| 9 | 0.090328 | 32487.428903 | 25546.928814 | Spatial CV | 
In [61]:
Copied!
cv_metrics = pd.concat([df_rn_cv_scores, df_sp_cv_scores])
cv_metrics = pd.concat([df_rn_cv_scores, df_sp_cv_scores])
In [62]:
Copied!
cv_metrics
cv_metrics
Out[62]:
| r2 | rmse | mae | cv_type | |
|---|---|---|---|---|
| 0 | 0.681868 | 44622.408387 | 30212.752461 | Random CV | 
| 1 | 0.657781 | 39772.657421 | 28792.383985 | Random CV | 
| 2 | 0.712206 | 44832.521408 | 31229.557212 | Random CV | 
| 3 | 0.687533 | 45450.036922 | 32461.800179 | Random CV | 
| 4 | 0.628370 | 50212.560093 | 32897.037797 | Random CV | 
| 5 | 0.600210 | 49287.988847 | 32580.578491 | Random CV | 
| 6 | 0.695413 | 39787.319701 | 29550.977897 | Random CV | 
| 7 | 0.595912 | 52631.127774 | 35188.523730 | Random CV | 
| 8 | 0.615846 | 58187.698845 | 35547.253339 | Random CV | 
| 9 | 0.713307 | 38817.222215 | 28150.396329 | Random CV | 
| 0 | 0.506854 | 41955.228924 | 31754.196160 | Spatial CV | 
| 1 | 0.589384 | 49279.438108 | 35086.445072 | Spatial CV | 
| 2 | 0.420164 | 45746.185370 | 33609.132589 | Spatial CV | 
| 3 | 0.476515 | 32873.931918 | 23596.485897 | Spatial CV | 
| 4 | 0.089346 | 53884.877554 | 30780.250176 | Spatial CV | 
| 5 | 0.298166 | 83784.012911 | 57463.258874 | Spatial CV | 
| 6 | 0.377789 | 54303.037838 | 41568.852584 | Spatial CV | 
| 7 | 0.423106 | 37589.616890 | 28509.484476 | Spatial CV | 
| 8 | 0.176537 | 29506.446920 | 21597.550027 | Spatial CV | 
| 9 | 0.090328 | 32487.428903 | 25546.928814 | Spatial CV | 
In [63]:
Copied!
fig, ax =  plt.subplots(1, 3, figsize=(12, 4))
my_colors = {'Random CV': '#1f77b4', 'Spatial CV': '#ff7f0e', 'C': 'gold'}
sns.boxplot(data = cv_metrics,
            hue = 'cv_type', 
            x = 'cv_type',
            y = 'r2',  
            dodge=False, width = .3, linewidth = 1, ax = ax[0],
            #palette= 'coolwarm',
            palette = my_colors,
            showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"} ,flierprops=dict(markersize=3))
ax[0].set_title( r'$R^2$',fontsize=11)
ax[0].set_ylabel('')
ax[0].set_xlabel('')
ax[0].set_ylim(0, 1)
ax[0].tick_params(labelsize = 9)
ax[0].legend([],[], frameon=False)
#
sns.boxplot(data = cv_metrics,
            hue = 'cv_type', 
            x = 'cv_type',
            y = 'rmse',  
            dodge=False, width = .3, linewidth = 1, ax = ax[1],
            #palette= 'coolwarm',
            palette = my_colors,
           showmeans=True, meanprops={"markersize": "2.5",  "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"},  flierprops=dict(markersize=3))
ax[1].set_title('RMSE',fontsize=11)
ax[1].set_ylabel('')
ax[1].set_xlabel('')
ax[1].tick_params(labelsize = 9)
ax[1].legend([],[], frameon=False)
sns.boxplot(data = cv_metrics,
            hue = 'cv_type',
            x = 'cv_type',
            y = 'mae',  
            dodge=False, width = .3, linewidth = 1, ax = ax[2],
            #palette= 'coolwarm',
            palette = my_colors,
           showmeans=True, meanprops={"markersize": "2.5",  "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"},  flierprops=dict(markersize=3))
ax[2].set_title('MAE',fontsize=11)
ax[2].set_ylabel('')
ax[2].set_xlabel('')
ax[2].tick_params(labelsize = 9)
plt.legend([],[], frameon=False)
plt.show()
fig, ax =  plt.subplots(1, 3, figsize=(12, 4))
my_colors = {'Random CV': '#1f77b4', 'Spatial CV': '#ff7f0e', 'C': 'gold'}
sns.boxplot(data = cv_metrics,
            hue = 'cv_type', 
            x = 'cv_type',
            y = 'r2',  
            dodge=False, width = .3, linewidth = 1, ax = ax[0],
            #palette= 'coolwarm',
            palette = my_colors,
            showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"} ,flierprops=dict(markersize=3))
ax[0].set_title( r'$R^2$',fontsize=11)
ax[0].set_ylabel('')
ax[0].set_xlabel('')
ax[0].set_ylim(0, 1)
ax[0].tick_params(labelsize = 9)
ax[0].legend([],[], frameon=False)
#
sns.boxplot(data = cv_metrics,
            hue = 'cv_type', 
            x = 'cv_type',
            y = 'rmse',  
            dodge=False, width = .3, linewidth = 1, ax = ax[1],
            #palette= 'coolwarm',
            palette = my_colors,
           showmeans=True, meanprops={"markersize": "2.5",  "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"},  flierprops=dict(markersize=3))
ax[1].set_title('RMSE',fontsize=11)
ax[1].set_ylabel('')
ax[1].set_xlabel('')
ax[1].tick_params(labelsize = 9)
ax[1].legend([],[], frameon=False)
sns.boxplot(data = cv_metrics,
            hue = 'cv_type',
            x = 'cv_type',
            y = 'mae',  
            dodge=False, width = .3, linewidth = 1, ax = ax[2],
            #palette= 'coolwarm',
            palette = my_colors,
           showmeans=True, meanprops={"markersize": "2.5",  "markerfacecolor" : "#ef3b2c",
                    "markeredgecolor" : "#ef3b2c"},  flierprops=dict(markersize=3))
ax[2].set_title('MAE',fontsize=11)
ax[2].set_ylabel('')
ax[2].set_xlabel('')
ax[2].tick_params(labelsize = 9)
plt.legend([],[], frameon=False)
plt.show()