In [2]:
Copied!
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from spatialkfold.blocks import spatial_blocks
from spatialkfold.datasets import load_ames
from spatialkfold.clusters import spatial_kfold_clusters
from spatialkfold.plotting import spatial_kfold_plot
from spatialkfold.stats import spatial_kfold_stats
import numpy as np
import pandas as pd
import seaborn as sns
import geopandas as gpd
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from matplotlib import cm
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from spatialkfold.blocks import spatial_blocks
from spatialkfold.datasets import load_ames
from spatialkfold.clusters import spatial_kfold_clusters
from spatialkfold.plotting import spatial_kfold_plot
from spatialkfold.stats import spatial_kfold_stats
I. Spatial Resampling¶
In [3]:
Copied!
ames = load_ames()
ames = load_ames()
In [4]:
Copied!
ames.crs
ames.crs
Out[4]:
<Geographic 2D CRS: EPSG:4326> Name: WGS 84 Axis Info [ellipsoidal]: - Lat[north]: Geodetic latitude (degree) - Lon[east]: Geodetic longitude (degree) Area of Use: - name: World. - bounds: (-180.0, -90.0, 180.0, 90.0) Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
In [5]:
Copied!
ames
ames
Out[5]:
MS_SubClass | MS_Zoning | Lot_Frontage | Lot_Area | Street | Alley | Lot_Shape | Land_Contour | Utilities | Lot_Config | ... | Pool_QC | Fence | Misc_Feature | Misc_Val | Mo_Sold | Year_Sold | Sale_Type | Sale_Condition | Sale_Price | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 141.0 | 31770 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | None | 0 | 5 | 2010 | WD | Normal | 215000 | POINT (-93.61975 42.05403) |
1 | One_Story_1946_and_Newer_All_Styles | Residential_High_Density | 80.0 | 11622 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 6 | 2010 | WD | Normal | 105000 | POINT (-93.61976 42.05301) |
2 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 81.0 | 14267 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | Gar2 | 12500 | 6 | 2010 | WD | Normal | 172000 | POINT (-93.61939 42.05266) |
3 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 93.0 | 11160 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Corner | ... | No_Pool | No_Fence | None | 0 | 4 | 2010 | WD | Normal | 244000 | POINT (-93.61732 42.05125) |
4 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 13830 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 3 | 2010 | WD | Normal | 189900 | POINT (-93.63893 42.06090) |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2925 | Split_or_Multilevel | Residential_Low_Density | 37.0 | 7937 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | CulDSac | ... | No_Pool | Good_Privacy | None | 0 | 3 | 2006 | WD | Normal | 142500 | POINT (-93.60478 41.98896) |
2926 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 0.0 | 8885 | Pave | No_Alley_Access | Slightly_Irregular | Low | AllPub | Inside | ... | No_Pool | Minimum_Privacy | None | 0 | 6 | 2006 | WD | Normal | 131000 | POINT (-93.60268 41.98831) |
2927 | Split_Foyer | Residential_Low_Density | 62.0 | 10441 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | Minimum_Privacy | Shed | 700 | 7 | 2006 | WD | Normal | 132000 | POINT (-93.60685 41.98651) |
2928 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 77.0 | 10010 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | No_Fence | None | 0 | 4 | 2006 | WD | Normal | 170000 | POINT (-93.60019 41.99092) |
2929 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 9627 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | No_Pool | No_Fence | None | 0 | 11 | 2006 | WD | Normal | 188000 | POINT (-93.60000 41.98927) |
2930 rows × 73 columns
In [6]:
Copied!
# Reproject to the approriate coordinate reference system
# Reproject to the approriate coordinate reference system
In [7]:
Copied!
ames_prj = ames.copy().to_crs(ames.estimate_utm_crs())
ames_prj = ames.copy().to_crs(ames.estimate_utm_crs())
In [8]:
Copied!
ames_prj.crs
ames_prj.crs
Out[8]:
<Derived Projected CRS: EPSG:32615> Name: WGS 84 / UTM zone 15N Axis Info [cartesian]: - E[east]: Easting (metre) - N[north]: Northing (metre) Area of Use: - name: Between 96°W and 90°W, northern hemisphere between equator and 84°N, onshore and offshore. Canada - Manitoba; Nunavut; Ontario. Ecuador -Galapagos. Guatemala. Mexico. United States (USA). - bounds: (-96.0, 0.0, -90.0, 84.0) Coordinate Operation: - name: UTM zone 15N - method: Transverse Mercator Datum: World Geodetic System 1984 ensemble - Ellipsoid: WGS 84 - Prime Meridian: Greenwich
In [9]:
Copied!
# Add an id column for each data point
# Add an id column for each data point
In [10]:
Copied!
ames_prj['id'] = range(len(ames_prj))
ames_prj['id'] = range(len(ames_prj))
1. Spatial cluster resampling¶
Two clustering algorithms are supported:
- KMeans (By default)
- BisectingKMeans
1.1 Using KMeans¶
In [11]:
Copied!
ames_clusters = spatial_kfold_clusters(
gdf=ames_prj,
name='id',
nfolds= 10,
algorithm='kmeans',
n_init="auto",
random_state =569)
ames_clusters = spatial_kfold_clusters(
gdf=ames_prj,
name='id',
nfolds= 10,
algorithm='kmeans',
n_init="auto",
random_state =569)
In [12]:
Copied!
ames_clusters
ames_clusters
Out[12]:
MS_SubClass | MS_Zoning | Lot_Frontage | Lot_Area | Street | Alley | Lot_Shape | Land_Contour | Utilities | Lot_Config | ... | Misc_Feature | Misc_Val | Mo_Sold | Year_Sold | Sale_Type | Sale_Condition | Sale_Price | geometry | id | folds | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 141.0 | 31770 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | None | 0 | 5 | 2010 | WD | Normal | 215000 | POINT (448716.789 4655961.485) | 0 | 9 |
1 | One_Story_1946_and_Newer_All_Styles | Residential_High_Density | 80.0 | 11622 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 6 | 2010 | WD | Normal | 105000 | POINT (448715.802 4655848.124) | 1 | 9 |
2 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 81.0 | 14267 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Corner | ... | Gar2 | 12500 | 6 | 2010 | WD | Normal | 172000 | POINT (448746.026 4655808.487) | 2 | 9 |
3 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 93.0 | 11160 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Corner | ... | None | 0 | 4 | 2010 | WD | Normal | 244000 | POINT (448915.962 4655650.253) | 3 | 9 |
4 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 13830 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | Inside | ... | None | 0 | 3 | 2010 | WD | Normal | 189900 | POINT (447135.458 4656735.276) | 4 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2925 | Split_or_Multilevel | Residential_Low_Density | 37.0 | 7937 | Pave | No_Alley_Access | Slightly_Irregular | Lvl | AllPub | CulDSac | ... | None | 0 | 3 | 2006 | WD | Normal | 142500 | POINT (449905.137 4648727.785) | 2925 | 3 |
2926 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 0.0 | 8885 | Pave | No_Alley_Access | Slightly_Irregular | Low | AllPub | Inside | ... | None | 0 | 6 | 2006 | WD | Normal | 131000 | POINT (450078.246 4648654.392) | 2926 | 3 |
2927 | Split_Foyer | Residential_Low_Density | 62.0 | 10441 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | Shed | 700 | 7 | 2006 | WD | Normal | 132000 | POINT (449731.661 4648456.532) | 2927 | 3 |
2928 | One_Story_1946_and_Newer_All_Styles | Residential_Low_Density | 77.0 | 10010 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 4 | 2006 | WD | Normal | 170000 | POINT (450286.530 4648942.397) | 2928 | 3 |
2929 | Two_Story_1946_and_Newer | Residential_Low_Density | 74.0 | 9627 | Pave | No_Alley_Access | Regular | Lvl | AllPub | Inside | ... | None | 0 | 11 | 2006 | WD | Normal | 188000 | POINT (450301.311 4648758.419) | 2929 | 3 |
2930 rows × 75 columns
In [13]:
Copied!
# Get the 'tab20' colormap
cols_tab = cm.get_cmap('tab20', 10)
# Generate a list of colors from the colormap
cols = [cols_tab(i) for i in range(10)]
# create a color ramp
color_ramp = ListedColormap(cols)
# Get the 'tab20' colormap
cols_tab = cm.get_cmap('tab20', 10)
# Generate a list of colors from the colormap
cols = [cols_tab(i) for i in range(10)]
# create a color ramp
color_ramp = ListedColormap(cols)
In [14]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_clusters.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing KMeans')
plt.show()
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_clusters.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing KMeans')
plt.show()
In [15]:
Copied!
# check the number of train and test of the dependent variable for each fold
# check the number of train and test of the dependent variable for each fold
In [16]:
Copied!
ames_clusters_stats = spatial_kfold_stats(X=ames_clusters, y=ames_clusters.Sale_Price, groups=ames_clusters.folds)
ames_clusters_stats = spatial_kfold_stats(X=ames_clusters, y=ames_clusters.Sale_Price, groups=ames_clusters.folds)
In [17]:
Copied!
ames_clusters_stats
ames_clusters_stats
Out[17]:
split | train | test | |
---|---|---|---|
0 | 1 | 2645 | 285 |
1 | 2 | 2427 | 503 |
2 | 3 | 2769 | 161 |
3 | 4 | 2645 | 285 |
4 | 5 | 2577 | 353 |
5 | 6 | 2568 | 362 |
6 | 7 | 2824 | 106 |
7 | 8 | 2658 | 272 |
8 | 9 | 2640 | 290 |
9 | 10 | 2617 | 313 |
1.2 Using BisectingKMeans¶
In [18]:
Copied!
ames_clusters_Biskmeans = spatial_kfold_clusters(
gdf=ames_prj,
name='id',
nfolds=10,
algorithm='bisectingkmeans',
random_state =569)
ames_clusters_Biskmeans = spatial_kfold_clusters(
gdf=ames_prj,
name='id',
nfolds=10,
algorithm='bisectingkmeans',
random_state =569)
In [19]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_clusters_Biskmeans.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing BisectingKMeans')
plt.show()
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_clusters_Biskmeans.plot(column='folds', ax=ax, cmap=color_ramp, markersize=2, legend=True)
ax.set_title('Spatially Clustered Folds\nUsing BisectingKMeans')
plt.show()
In [20]:
Copied!
ames_clusters_stats_ = spatial_kfold_stats(X=ames_clusters_Biskmeans, y=ames_clusters_Biskmeans.Sale_Price, groups=ames_clusters_Biskmeans.folds)
ames_clusters_stats_ = spatial_kfold_stats(X=ames_clusters_Biskmeans, y=ames_clusters_Biskmeans.Sale_Price, groups=ames_clusters_Biskmeans.folds)
In [21]:
Copied!
ames_clusters_stats_
ames_clusters_stats_
Out[21]:
split | train | test | |
---|---|---|---|
0 | 1 | 2561 | 369 |
1 | 2 | 2678 | 252 |
2 | 3 | 2633 | 297 |
3 | 4 | 2572 | 358 |
4 | 5 | 2457 | 473 |
5 | 6 | 2605 | 325 |
6 | 7 | 2759 | 171 |
7 | 8 | 2579 | 351 |
8 | 9 | 2769 | 161 |
9 | 10 | 2757 | 173 |
In [22]:
Copied!
# create 10 random blocks
ames_rnd_blocks = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="random", # "continuous"
orientation="tb-lr", # "bt-rl"
grid_type="rect", # "hex"
nfolds=10,
random_state=135
)
# create 10 random blocks
ames_rnd_blocks = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="random", # "continuous"
orientation="tb-lr", # "bt-rl"
grid_type="rect", # "hex"
nfolds=10,
random_state=135
)
In [23]:
Copied!
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax ,lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Random Blocks Folds')
fig, ax = plt.subplots(1,1 , figsize=(9, 4))
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax ,lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Random Blocks Folds')
Out[23]:
Text(0.5, 1.0, 'Random Blocks Folds')
In [24]:
Copied!
# resample the ames data with the prepared blocks
ames_res_rnd_blk = gpd.overlay(ames_prj, ames_rnd_blocks)
# resample the ames data with the prepared blocks
ames_res_rnd_blk = gpd.overlay(ames_prj, ames_rnd_blocks)
In [25]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=3)
ax.set_title('Spatially Resampled\nRandom Blocks')
plt.show()
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=3)
ax.set_title('Spatially Resampled\nRandom Blocks')
plt.show()
In [26]:
Copied!
fig, ax = plt.subplots(1,2 , figsize=(10, 6))
# plot 1
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk.geometry.x , ames_res_rnd_blk.geometry.y, c=ames_res_rnd_blk['folds'],
cmap=color_ramp, s=5)
axins1 = inset_axes(
ax[1],
width="5%", # width: 5% of parent_bbox width
height="50%", # height: 50%
loc="lower left",
bbox_to_anchor=(1.05, 0, 1, 2),
bbox_transform=ax[1].transAxes,
borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
fig, ax = plt.subplots(1,2 , figsize=(10, 6))
# plot 1
ames_rnd_blocks.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk.geometry.x , ames_res_rnd_blk.geometry.y, c=ames_res_rnd_blk['folds'],
cmap=color_ramp, s=5)
axins1 = inset_axes(
ax[1],
width="5%", # width: 5% of parent_bbox width
height="50%", # height: 50%
loc="lower left",
bbox_to_anchor=(1.05, 0, 1, 2),
bbox_transform=ax[1].transAxes,
borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
In [27]:
Copied!
ames_res_rnd_blk_stats = spatial_kfold_stats(X=ames_res_rnd_blk, y=ames_res_rnd_blk.Sale_Price, groups=ames_res_rnd_blk.folds)
ames_res_rnd_blk_stats = spatial_kfold_stats(X=ames_res_rnd_blk, y=ames_res_rnd_blk.Sale_Price, groups=ames_res_rnd_blk.folds)
In [28]:
Copied!
ames_res_rnd_blk_stats
ames_res_rnd_blk_stats
Out[28]:
split | train | test | |
---|---|---|---|
0 | 1 | 2893 | 37 |
1 | 2 | 2371 | 559 |
2 | 3 | 2414 | 516 |
3 | 4 | 2472 | 458 |
4 | 5 | 2687 | 243 |
5 | 6 | 2608 | 322 |
6 | 7 | 2720 | 210 |
7 | 8 | 2589 | 341 |
8 | 9 | 2920 | 10 |
9 | 10 | 2696 | 234 |
- We could also use Hexagonal grid type instead
In [ ]:
Copied!
# create 10 random blocks
ames_rnd_blocks_hex = spatial_blocks(
gdf=ames_prj,
width=1500,
height=0, # If you use hex as grid_type, only the width is needed. If you provide height, it will be ignored
method="random", # "continuous"
orientation="tb-lr", # "bt-rl"
grid_type="hex",
nfolds=10,
random_state=135
)
ames_res_rnd_blk_hex = gpd.overlay(ames_prj, ames_rnd_blocks_hex)
# create 10 random blocks
ames_rnd_blocks_hex = spatial_blocks(
gdf=ames_prj,
width=1500,
height=0, # If you use hex as grid_type, only the width is needed. If you provide height, it will be ignored
method="random", # "continuous"
orientation="tb-lr", # "bt-rl"
grid_type="hex",
nfolds=10,
random_state=135
)
ames_res_rnd_blk_hex = gpd.overlay(ames_prj, ames_rnd_blocks_hex)
In [30]:
Copied!
fig, ax = plt.subplots(1,2 , figsize=(10, 6))
# plot 1
ames_rnd_blocks_hex.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks_hex.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk_hex.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk_hex.geometry.x , ames_res_rnd_blk_hex.geometry.y, c=ames_res_rnd_blk_hex['folds'],
cmap=color_ramp, s=5)
axins1 = inset_axes(
ax[1],
width="5%", # width: 5% of parent_bbox width
height="50%", # height: 50%
loc="lower left",
bbox_to_anchor=(1.05, 0, 1, 2),
bbox_transform=ax[1].transAxes,
borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
fig, ax = plt.subplots(1,2 , figsize=(10, 6))
# plot 1
ames_rnd_blocks_hex.plot(column='folds', cmap=color_ramp, ax=ax[0] , lw=0.7, legend=False)
ames_prj.plot(ax=ax[0], markersize=2, color='r')
ax[0].set_title('Random Blocks Folds')
# plot 2
ames_rnd_blocks_hex.plot(facecolor="none", edgecolor='grey', ax=ax[1], lw=0.7, legend=False)
ames_res_rnd_blk_hex.plot(column='folds', cmap=color_ramp, legend=False, ax=ax[1], markersize = 2)
ax[1].set_title('Spatially Resampled\nrandom blocks')
im1 = ax[1].scatter(ames_res_rnd_blk_hex.geometry.x , ames_res_rnd_blk_hex.geometry.y, c=ames_res_rnd_blk_hex['folds'],
cmap=color_ramp, s=5)
axins1 = inset_axes(
ax[1],
width="5%", # width: 5% of parent_bbox width
height="50%", # height: 50%
loc="lower left",
bbox_to_anchor=(1.05, 0, 1, 2),
bbox_transform=ax[1].transAxes,
borderpad=0
)
fig.colorbar(im1, cax=axins1, ticks= range(1,11))
plt.show()
2.2 Continuous spatial resampled blocks¶
Two option are availble with orientation :
- 'tb-lr' : top-bottom, left-right
- ''bt-rl' : bottom-top, right-left
2.2.1. 'tb-lr' : top-bottom, left-right¶
In [31]:
Copied!
ames_cont_blocks = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="continuous",
orientation="tb-lr",
grid_type="rect",
nfolds=10,
random_state=135
)
ames_cont_blocks = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="continuous",
orientation="tb-lr",
grid_type="rect",
nfolds=10,
random_state=135
)
In [32]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_cont_blocks.plot(column='folds', cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"tb-lr"')
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_cont_blocks.plot(column='folds', cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"tb-lr"')
Out[32]:
Text(0.5, 1.0, 'Continuous Blocks Folds\norientation:"tb-lr"')
In [33]:
Copied!
# resample the ames data with the prepared blocks
ames_res_cont_blk = gpd.overlay(ames_prj, ames_cont_blocks)
# resample the ames data with the prepared blocks
ames_res_cont_blk = gpd.overlay(ames_prj, ames_cont_blocks)
In [34]:
Copied!
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_cont_blocks.plot(facecolor="none", edgecolor='grey', ax=ax ,lw=0.7)
ames_res_cont_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "tb-lr"')
plt.show()
fig, ax = plt.subplots(1, 1, figsize=(9, 4))
ames_cont_blocks.plot(facecolor="none", edgecolor='grey', ax=ax ,lw=0.7)
ames_res_cont_blk.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "tb-lr"')
plt.show()
In [35]:
Copied!
ames_res_cont_blk_stats = spatial_kfold_stats(X=ames_res_cont_blk, y=ames_res_cont_blk.Sale_Price, groups=ames_res_cont_blk.folds)
ames_res_cont_blk_stats = spatial_kfold_stats(X=ames_res_cont_blk, y=ames_res_cont_blk.Sale_Price, groups=ames_res_cont_blk.folds)
In [36]:
Copied!
ames_res_cont_blk_stats
ames_res_cont_blk_stats
Out[36]:
split | train | test | |
---|---|---|---|
0 | 1 | 2446 | 484 |
1 | 2 | 2628 | 302 |
2 | 3 | 2801 | 129 |
3 | 4 | 2567 | 363 |
4 | 5 | 2367 | 563 |
5 | 6 | 2402 | 528 |
6 | 7 | 2894 | 36 |
7 | 8 | 2444 | 486 |
8 | 9 | 2896 | 34 |
9 | 10 | 2925 | 5 |
2.2.2. 'bt-rl' : bottom-top, right-left¶
In [37]:
Copied!
ames_cont_blocks_rev = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="continuous",
orientation='bt-rl',
grid_type="rect",
nfolds=10,
random_state=135
)
ames_cont_blocks_rev = spatial_blocks(
gdf=ames_prj,
width=1500,
height=1500,
method="continuous",
orientation='bt-rl',
grid_type="rect",
nfolds=10,
random_state=135
)
In [38]:
Copied!
fig, ax = plt.subplots(1, 1 , figsize=(9, 4))
ames_cont_blocks_rev.plot(column='folds',cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"bt-rl"')
fig, ax = plt.subplots(1, 1 , figsize=(9, 4))
ames_cont_blocks_rev.plot(column='folds',cmap=color_ramp, ax=ax , lw=0.7, legend=True)
ames_prj.plot(ax=ax, markersize=1, color='r')
ax.set_title('Continuous Blocks Folds\norientation:"bt-rl"')
Out[38]:
Text(0.5, 1.0, 'Continuous Blocks Folds\norientation:"bt-rl"')
In [39]:
Copied!
# resample the ames data with the prepared blocks
ames_res_cont_blk_rev = gpd.overlay(ames_prj, ames_cont_blocks_rev)
# resample the ames data with the prepared blocks
ames_res_cont_blk_rev = gpd.overlay(ames_prj, ames_cont_blocks_rev)
In [40]:
Copied!
fig, ax = plt.subplots(1, 1 , figsize=(9, 4))
ames_cont_blocks_rev.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_cont_blk_rev.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "bt-rl"')
plt.show()
fig, ax = plt.subplots(1, 1 , figsize=(9, 4))
ames_cont_blocks_rev.plot(facecolor="none", edgecolor='grey', ax=ax, lw=0.7)
ames_res_cont_blk_rev.plot(column='folds', cmap=color_ramp, legend=True, ax=ax, markersize=2)
ax.set_title('Spatially Resampled\nContinuous Blocks Folds. "bt-rl"')
plt.show()
In [41]:
Copied!
ames_res_cont_blk_rev_stats = spatial_kfold_stats(X=ames_res_cont_blk_rev, y=ames_res_cont_blk_rev.Sale_Price, groups=ames_res_cont_blk_rev.folds)
ames_res_cont_blk_rev_stats = spatial_kfold_stats(X=ames_res_cont_blk_rev, y=ames_res_cont_blk_rev.Sale_Price, groups=ames_res_cont_blk_rev.folds)
In [42]:
Copied!
ames_res_cont_blk_rev_stats
ames_res_cont_blk_rev_stats
Out[42]:
split | train | test | |
---|---|---|---|
0 | 1 | 2892 | 38 |
1 | 2 | 2443 | 487 |
2 | 3 | 2776 | 154 |
3 | 4 | 2350 | 580 |
4 | 5 | 2306 | 624 |
5 | 6 | 2758 | 172 |
6 | 7 | 2841 | 89 |
7 | 8 | 2772 | 158 |
8 | 9 | 2783 | 147 |
9 | 10 | 2449 | 481 |
3. Plotting function¶
Plot the partitioning of the data at each fold
In [43]:
Copied!
# check the folds number
np.unique(ames_clusters.folds.values)
# check the folds number
np.unique(ames_clusters.folds.values)
Out[43]:
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=int32)
In [44]:
Copied!
for i in range(1,11):
fig, ax = plt.subplots(1, 1 ,figsize=(9, 4))
spatial_kfold_plot(X=ames_clusters,
geometry=ames_clusters.geometry,
groups=ames_clusters.folds,
fold_num=i, cmap='viridis', ax=ax)
for i in range(1,11):
fig, ax = plt.subplots(1, 1 ,figsize=(9, 4))
spatial_kfold_plot(X=ames_clusters,
geometry=ames_clusters.geometry,
groups=ames_clusters.folds,
fold_num=i, cmap='viridis', ax=ax)
II. Comparaison : Random VS Spatial Cross validataion¶
In [45]:
Copied!
x = ames_prj[['Year_Built', 'Bldg_Type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
le = preprocessing.LabelEncoder()
le.fit(x["Bldg_Type"])
list(le.classes_)
le.transform(ames_prj["Bldg_Type"])
x = ames_prj[['Year_Built', 'Bldg_Type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
le = preprocessing.LabelEncoder()
le.fit(x["Bldg_Type"])
list(le.classes_)
le.transform(ames_prj["Bldg_Type"])
Out[45]:
array([1, 1, 1, ..., 1, 1, 1])
In [46]:
Copied!
# Create a copy of the DataFrame and transform the 'Bldg_Type' column to float values
x_copy = x.copy()
x_copy['type'] = le.transform(x_copy["Bldg_Type"])
# Create a copy of the DataFrame and transform the 'Bldg_Type' column to float values
x_copy = x.copy()
x_copy['type'] = le.transform(x_copy["Bldg_Type"])
In [47]:
Copied!
# Get the independent variables and the dependent variable
X = x_copy[['Year_Built', 'type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
# Get the independent variables and the dependent variable
X = x_copy[['Year_Built', 'type', 'Gr_Liv_Area']]
y = ames_prj[['Sale_Price']]
In [48]:
Copied!
X
X
Out[48]:
Year_Built | type | Gr_Liv_Area | |
---|---|---|---|
0 | 1960 | 1 | 1656 |
1 | 1961 | 1 | 896 |
2 | 1958 | 1 | 1329 |
3 | 1968 | 1 | 2110 |
4 | 1997 | 1 | 1629 |
... | ... | ... | ... |
2925 | 1984 | 1 | 1003 |
2926 | 1983 | 1 | 902 |
2927 | 1992 | 1 | 970 |
2928 | 1974 | 1 | 1389 |
2929 | 1993 | 1 | 2000 |
2930 rows × 3 columns
In [49]:
Copied!
y
y
Out[49]:
Sale_Price | |
---|---|
0 | 215000 |
1 | 105000 |
2 | 172000 |
3 | 244000 |
4 | 189900 |
... | ... |
2925 | 142500 |
2926 | 131000 |
2927 | 132000 |
2928 | 170000 |
2929 | 188000 |
2930 rows × 1 columns
1. Random CV¶
In [50]:
Copied!
# Initiate a linear regressor
reg = LinearRegression()
# Initiate a random CV
kf = KFold(n_splits = 10, shuffle=True, random_state=123)
# evaluate the model and collect the results
random_n_scores = cross_validate(reg, X, y.values.ravel(),
scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'],
cv=kf, n_jobs=-1, error_score='raise')
# Initiate a linear regressor
reg = LinearRegression()
# Initiate a random CV
kf = KFold(n_splits = 10, shuffle=True, random_state=123)
# evaluate the model and collect the results
random_n_scores = cross_validate(reg, X, y.values.ravel(),
scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'],
cv=kf, n_jobs=-1, error_score='raise')
In [51]:
Copied!
random_n_scores
random_n_scores
Out[51]:
{'fit_time': array([0.0150876 , 0.01461196, 0.07606864, 0.01363134, 0.01567721, 0.0122335 , 0.01297998, 0.00495601, 0.01281571, 0.0111711 ]), 'score_time': array([0.01175451, 0.00338268, 0.02259111, 0.00947595, 0.0103538 , 0.01029325, 0.00297976, 0.01099586, 0.01003385, 0.00338864]), 'test_neg_root_mean_squared_error': array([-44622.40838673, -39772.65742074, -44832.52140754, -45450.03692245, -50212.56009308, -49287.98884697, -39787.31970143, -52631.12777371, -58187.6988452 , -38817.22221497]), 'test_r2': array([0.68186786, 0.65778112, 0.71220595, 0.68753325, 0.62837042, 0.6002097 , 0.69541257, 0.59591238, 0.61584596, 0.71330735]), 'test_neg_mean_absolute_error': array([-30212.75246135, -28792.38398508, -31229.5572116 , -32461.80017946, -32897.03779732, -32580.57849056, -29550.97789663, -35188.52372995, -35547.25333851, -28150.39632891])}
In [52]:
Copied!
rn_cv_r2 = np.mean(random_n_scores["test_r2"])
rn_cv_rmse = np.mean(abs(random_n_scores["test_neg_root_mean_squared_error"]))
rn_cv_mae = np.mean(abs(random_n_scores["test_neg_mean_absolute_error"]))
print('R2 :',rn_cv_r2)
print('RMSE :',rn_cv_rmse)
print('MAE :',rn_cv_mae)
rn_cv_r2 = np.mean(random_n_scores["test_r2"])
rn_cv_rmse = np.mean(abs(random_n_scores["test_neg_root_mean_squared_error"]))
rn_cv_mae = np.mean(abs(random_n_scores["test_neg_mean_absolute_error"]))
print('R2 :',rn_cv_r2)
print('RMSE :',rn_cv_rmse)
print('MAE :',rn_cv_mae)
R2 : 0.6588446547591043 RMSE : 46360.15416128094 MAE : 31661.12614193703
Spatial CV:¶
Using a Leave Region Out Cross vlidation
In [53]:
Copied!
# Initiate a leave group out cross valiadtion
group_cvs = LeaveOneGroupOut()
# Evaluate the model and collect the results
spatial_cluster_scores = cross_validate(reg, X, y.values.ravel(),
scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'],
cv= group_cvs.split(X, y, ames_clusters.folds.values.ravel()), # spatial cross validation
n_jobs=-1, error_score='raise')
# Initiate a leave group out cross valiadtion
group_cvs = LeaveOneGroupOut()
# Evaluate the model and collect the results
spatial_cluster_scores = cross_validate(reg, X, y.values.ravel(),
scoring= ['neg_root_mean_squared_error', 'r2', 'neg_mean_absolute_error'],
cv= group_cvs.split(X, y, ames_clusters.folds.values.ravel()), # spatial cross validation
n_jobs=-1, error_score='raise')
In [54]:
Copied!
spatial_cluster_scores
spatial_cluster_scores
Out[54]:
{'fit_time': array([0.01505804, 0.01767373, 0.04996634, 0.01555181, 0.02358913, 0.02461791, 0.01460123, 0.03371215, 0.02450013, 0.01760197]), 'score_time': array([0.01214123, 0.0113101 , 0.01480627, 0.0123055 , 0.00381994, 0.00370812, 0.00353646, 0.01049113, 0.00463915, 0.00336456]), 'test_neg_root_mean_squared_error': array([-41955.22892416, -49279.43810801, -45746.18536977, -32873.9319184 , -53884.87755416, -83784.012911 , -54303.03783829, -37589.61688976, -29506.44691955, -32487.42890339]), 'test_r2': array([0.50685433, 0.58938426, 0.42016374, 0.4765155 , 0.08934565, 0.2981662 , 0.37778871, 0.4231063 , 0.17653683, 0.09032787]), 'test_neg_mean_absolute_error': array([-31754.19616039, -35086.44507158, -33609.13258888, -23596.48589712, -30780.25017551, -57463.25887411, -41568.85258424, -28509.48447583, -21597.55002658, -25546.92881414])}
In [55]:
Copied!
sp_cv_r2 = np.mean(spatial_cluster_scores["test_r2"])
sp_cv_rmse = np.mean(abs(spatial_cluster_scores["test_neg_root_mean_squared_error"]))
sp_cv_mae = np.mean(abs(spatial_cluster_scores["test_neg_mean_absolute_error"]))
print('R2 :',sp_cv_r2)
print('RMSE :',sp_cv_rmse)
print('MAE :',sp_cv_mae)
sp_cv_r2 = np.mean(spatial_cluster_scores["test_r2"])
sp_cv_rmse = np.mean(abs(spatial_cluster_scores["test_neg_root_mean_squared_error"]))
sp_cv_mae = np.mean(abs(spatial_cluster_scores["test_neg_mean_absolute_error"]))
print('R2 :',sp_cv_r2)
print('RMSE :',sp_cv_rmse)
print('MAE :',sp_cv_mae)
R2 : 0.344818938658986 RMSE : 46141.02053364941 MAE : 32951.25846683899
In [56]:
Copied!
random_n_scores['test_r2']
random_n_scores['test_r2']
Out[56]:
array([0.68186786, 0.65778112, 0.71220595, 0.68753325, 0.62837042, 0.6002097 , 0.69541257, 0.59591238, 0.61584596, 0.71330735])
In [57]:
Copied!
df_rn_cv_scores = pd.DataFrame({'r2': random_n_scores['test_r2'],
'rmse' :abs(random_n_scores['test_neg_root_mean_squared_error']),
'mae' : abs(random_n_scores["test_neg_mean_absolute_error"]),
'cv_type' : 'Random CV'})
df_rn_cv_scores = pd.DataFrame({'r2': random_n_scores['test_r2'],
'rmse' :abs(random_n_scores['test_neg_root_mean_squared_error']),
'mae' : abs(random_n_scores["test_neg_mean_absolute_error"]),
'cv_type' : 'Random CV'})
In [58]:
Copied!
df_rn_cv_scores
df_rn_cv_scores
Out[58]:
r2 | rmse | mae | cv_type | |
---|---|---|---|---|
0 | 0.681868 | 44622.408387 | 30212.752461 | Random CV |
1 | 0.657781 | 39772.657421 | 28792.383985 | Random CV |
2 | 0.712206 | 44832.521408 | 31229.557212 | Random CV |
3 | 0.687533 | 45450.036922 | 32461.800179 | Random CV |
4 | 0.628370 | 50212.560093 | 32897.037797 | Random CV |
5 | 0.600210 | 49287.988847 | 32580.578491 | Random CV |
6 | 0.695413 | 39787.319701 | 29550.977897 | Random CV |
7 | 0.595912 | 52631.127774 | 35188.523730 | Random CV |
8 | 0.615846 | 58187.698845 | 35547.253339 | Random CV |
9 | 0.713307 | 38817.222215 | 28150.396329 | Random CV |
In [59]:
Copied!
df_sp_cv_scores = pd.DataFrame({'r2': spatial_cluster_scores['test_r2'],
'rmse' :abs(spatial_cluster_scores['test_neg_root_mean_squared_error']),
'mae' : abs(spatial_cluster_scores["test_neg_mean_absolute_error"]),
'cv_type' : 'Spatial CV'})
df_sp_cv_scores = pd.DataFrame({'r2': spatial_cluster_scores['test_r2'],
'rmse' :abs(spatial_cluster_scores['test_neg_root_mean_squared_error']),
'mae' : abs(spatial_cluster_scores["test_neg_mean_absolute_error"]),
'cv_type' : 'Spatial CV'})
In [60]:
Copied!
df_sp_cv_scores
df_sp_cv_scores
Out[60]:
r2 | rmse | mae | cv_type | |
---|---|---|---|---|
0 | 0.506854 | 41955.228924 | 31754.196160 | Spatial CV |
1 | 0.589384 | 49279.438108 | 35086.445072 | Spatial CV |
2 | 0.420164 | 45746.185370 | 33609.132589 | Spatial CV |
3 | 0.476515 | 32873.931918 | 23596.485897 | Spatial CV |
4 | 0.089346 | 53884.877554 | 30780.250176 | Spatial CV |
5 | 0.298166 | 83784.012911 | 57463.258874 | Spatial CV |
6 | 0.377789 | 54303.037838 | 41568.852584 | Spatial CV |
7 | 0.423106 | 37589.616890 | 28509.484476 | Spatial CV |
8 | 0.176537 | 29506.446920 | 21597.550027 | Spatial CV |
9 | 0.090328 | 32487.428903 | 25546.928814 | Spatial CV |
In [61]:
Copied!
cv_metrics = pd.concat([df_rn_cv_scores, df_sp_cv_scores])
cv_metrics = pd.concat([df_rn_cv_scores, df_sp_cv_scores])
In [62]:
Copied!
cv_metrics
cv_metrics
Out[62]:
r2 | rmse | mae | cv_type | |
---|---|---|---|---|
0 | 0.681868 | 44622.408387 | 30212.752461 | Random CV |
1 | 0.657781 | 39772.657421 | 28792.383985 | Random CV |
2 | 0.712206 | 44832.521408 | 31229.557212 | Random CV |
3 | 0.687533 | 45450.036922 | 32461.800179 | Random CV |
4 | 0.628370 | 50212.560093 | 32897.037797 | Random CV |
5 | 0.600210 | 49287.988847 | 32580.578491 | Random CV |
6 | 0.695413 | 39787.319701 | 29550.977897 | Random CV |
7 | 0.595912 | 52631.127774 | 35188.523730 | Random CV |
8 | 0.615846 | 58187.698845 | 35547.253339 | Random CV |
9 | 0.713307 | 38817.222215 | 28150.396329 | Random CV |
0 | 0.506854 | 41955.228924 | 31754.196160 | Spatial CV |
1 | 0.589384 | 49279.438108 | 35086.445072 | Spatial CV |
2 | 0.420164 | 45746.185370 | 33609.132589 | Spatial CV |
3 | 0.476515 | 32873.931918 | 23596.485897 | Spatial CV |
4 | 0.089346 | 53884.877554 | 30780.250176 | Spatial CV |
5 | 0.298166 | 83784.012911 | 57463.258874 | Spatial CV |
6 | 0.377789 | 54303.037838 | 41568.852584 | Spatial CV |
7 | 0.423106 | 37589.616890 | 28509.484476 | Spatial CV |
8 | 0.176537 | 29506.446920 | 21597.550027 | Spatial CV |
9 | 0.090328 | 32487.428903 | 25546.928814 | Spatial CV |
In [63]:
Copied!
fig, ax = plt.subplots(1, 3, figsize=(12, 4))
my_colors = {'Random CV': '#1f77b4', 'Spatial CV': '#ff7f0e', 'C': 'gold'}
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'r2',
dodge=False, width = .3, linewidth = 1, ax = ax[0],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"} ,flierprops=dict(markersize=3))
ax[0].set_title( r'$R^2$',fontsize=11)
ax[0].set_ylabel('')
ax[0].set_xlabel('')
ax[0].set_ylim(0, 1)
ax[0].tick_params(labelsize = 9)
ax[0].legend([],[], frameon=False)
#
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'rmse',
dodge=False, width = .3, linewidth = 1, ax = ax[1],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"}, flierprops=dict(markersize=3))
ax[1].set_title('RMSE',fontsize=11)
ax[1].set_ylabel('')
ax[1].set_xlabel('')
ax[1].tick_params(labelsize = 9)
ax[1].legend([],[], frameon=False)
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'mae',
dodge=False, width = .3, linewidth = 1, ax = ax[2],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"}, flierprops=dict(markersize=3))
ax[2].set_title('MAE',fontsize=11)
ax[2].set_ylabel('')
ax[2].set_xlabel('')
ax[2].tick_params(labelsize = 9)
plt.legend([],[], frameon=False)
plt.show()
fig, ax = plt.subplots(1, 3, figsize=(12, 4))
my_colors = {'Random CV': '#1f77b4', 'Spatial CV': '#ff7f0e', 'C': 'gold'}
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'r2',
dodge=False, width = .3, linewidth = 1, ax = ax[0],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"} ,flierprops=dict(markersize=3))
ax[0].set_title( r'$R^2$',fontsize=11)
ax[0].set_ylabel('')
ax[0].set_xlabel('')
ax[0].set_ylim(0, 1)
ax[0].tick_params(labelsize = 9)
ax[0].legend([],[], frameon=False)
#
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'rmse',
dodge=False, width = .3, linewidth = 1, ax = ax[1],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"}, flierprops=dict(markersize=3))
ax[1].set_title('RMSE',fontsize=11)
ax[1].set_ylabel('')
ax[1].set_xlabel('')
ax[1].tick_params(labelsize = 9)
ax[1].legend([],[], frameon=False)
sns.boxplot(data = cv_metrics,
hue = 'cv_type',
x = 'cv_type',
y = 'mae',
dodge=False, width = .3, linewidth = 1, ax = ax[2],
#palette= 'coolwarm',
palette = my_colors,
showmeans=True, meanprops={"markersize": "2.5", "markerfacecolor" : "#ef3b2c",
"markeredgecolor" : "#ef3b2c"}, flierprops=dict(markersize=3))
ax[2].set_title('MAE',fontsize=11)
ax[2].set_ylabel('')
ax[2].set_xlabel('')
ax[2].tick_params(labelsize = 9)
plt.legend([],[], frameon=False)
plt.show()