sPlotOpen Preprocessing#

sPlotOpen (Sabatini et al, 2021) is an open-access and environmentally and spatially balanced subset of the global sPlot vegetation plots data set v2.1 (Bruelheide et al, 2019).

This section covers:

Link plot coordinates with community wighted means (cwm)
Visualize plot density

Download#

sPlotOpen Data is available at the iDiv Data Repository. For this study we used version 52.

https://idata.idiv.de/ddm/Data/ShowData/3474

Packages#

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import LogNorm, Normalize
import cartopy.crs as ccrs
import cartopy as cart
from matplotlib.colors import BoundaryNorm
from matplotlib.ticker import MaxNLocator
from mpl_toolkits.axes_grid1 import make_axes_locatable

Link plot coordinates to cmw data#

The data is stored in various tab-separated files:

sPlotOpen_header(2).txt : contains information on each plot, such as coordinates, date, biome, country, etc.
sPlotOpen_DT(1).txt : contains information per plot and species with abundance and relative cover
sPlotOpen_CWM_CWV(1).txt : contains information on trait community weighted means and variances for each plot and 18 traits (ln-transformed)

# load the community weighted means

cwm = pd.read_csv("./sPlotOpen/sPlotOpen_CWM_CWV(1).txt", sep= "\t")

plots = pd.read_csv("./sPlotOpen/sPlotOpen_header(2).txt", sep= "\t")

/net/home/swolf/.conda/envs/cartopy/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3172: DtypeWarning: Columns (16) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,

sPlot = pd.merge(cwm, plots, on='PlotObservationID', how='inner')

sPlot.head()

	PlotObservationID	TraitCoverage_cover	Species_richness	TraitCoverage_pa	LeafArea_CWM	StemDens_CWM	SLA_CWM	LeafC_perdrymass_CWM	LeafN_CWM	LeafP_CWM	...	Height_shrubs_lowest	Height_herbs_average	Height_herbs_lowest	Height_herbs_highest	SoilClim_PC1	SoilClim_PC2	Resample_1	Resample_2	Resample_3	Resample_1_consensus
0	16	0.277778	3	0.333333	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True
1	17	0.038462	2	0.500000	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True
2	18	0.047619	4	0.250000	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	False	False	True	False
3	20	0.666667	3	0.333333	3.686063	-0.907135	2.903715	6.136791	2.929729	0.739181	...	NaN	NaN	NaN	NaN	-3.66	0.546	False	True	False	False
4	22	0.538462	7	0.571429	3.899842	-0.900514	2.917708	6.131968	2.955072	0.733698	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True

5 rows × 86 columns

Change columns name#

sPlot.columns

Index(['PlotObservationID', 'TraitCoverage_cover', 'Species_richness',
       'TraitCoverage_pa', 'LeafArea_CWM', 'StemDens_CWM', 'SLA_CWM',
       'LeafC_perdrymass_CWM', 'LeafN_CWM', 'LeafP_CWM', 'PlantHeight_CWM',
       'SeedMass_CWM', 'Seed_length_CWM', 'LDMC_CWM', 'LeafNperArea_CWM',
       'LeafNPratio_CWM', 'Leaf_delta_15N_CWM', 'Seed_num_rep_unit_CWM',
       'Leaffreshmass_CWM', 'Stem_cond_dens_CWM', 'Disp_unit_leng_CWM',
       'Wood_vessel_length_CWM', 'LeafArea_CWV', 'StemDens_CWV', 'SLA_CWV',
       'LeafC_perdrymass_CWV', 'LeafN_CWV', 'LeafP_CWV', 'PlantHeight_CWV',
       'SeedMass_CWV', 'Seed_length_CWV', 'LDMC_CWV', 'LeafNperArea_CWV',
       'LeafNPratio_CWV', 'Leaf_delta_15N_CWV', 'Seed_num_rep_unit_CWV',
       'Leaffreshmass_CWV', 'Stem_cond_dens_CWV', 'Disp_unit_leng_CWV',
       'Wood_vessel_length_CWV', 'GIVD_ID', 'Dataset', 'Continent', 'Country',
       'Biome', 'Date_of_recording', 'Latitude', 'Longitude',
       'Location_uncertainty', 'Releve_area', 'Plant_recorded', 'Elevation',
       'Aspect', 'Slope', 'is_forest', 'ESY', 'Naturalness', 'Forest',
       'Shrubland', 'Grassland', 'Wetland', 'Sparse_vegetation', 'Cover_total',
       'Cover_tree_layer', 'Cover_shrub_layer', 'Cover_herb_layer',
       'Cover_moss_layer', 'Cover_lichen_layer', 'Cover_algae_layer',
       'Cover_litter_layer', 'Cover_bare_rocks', 'Cover_cryptogams',
       'Cover_bare_soil', 'Height_trees_highest', 'Height_trees_lowest',
       'Height_shrubs_highest', 'Height_shrubs_lowest', 'Height_herbs_average',
       'Height_herbs_lowest', 'Height_herbs_highest', 'SoilClim_PC1',
       'SoilClim_PC2', 'Resample_1', 'Resample_2', 'Resample_3',
       'Resample_1_consensus'],
      dtype='object')

# only variables not of interest have mixed data types
types = sPlot.applymap(type).apply(set)
types[types.apply(len) > 1]

Date_of_recording     {<class 'str'>, <class 'float'>}
is_forest            {<class 'float'>, <class 'bool'>}
ESY                   {<class 'str'>, <class 'float'>}
Naturalness           {<class 'str'>, <class 'float'>}
Forest               {<class 'float'>, <class 'bool'>}
Shrubland            {<class 'float'>, <class 'bool'>}
Grassland            {<class 'float'>, <class 'bool'>}
Wetland              {<class 'float'>, <class 'bool'>}
Sparse_vegetation    {<class 'float'>, <class 'bool'>}
dtype: object

Rename the trait columns to match the TRY summary stats:

sPlot.rename(columns = {'StemDens_CWM':'SSD'}, inplace = True)
sPlot.rename(columns = {'LeafC_perdrymass_CWM':'Leaf C'}, inplace = True)
sPlot.rename(columns = {'LeafN_CWM':'Leaf N per mass'}, inplace = True)
sPlot.rename(columns = {'LeafP_CWM':'Leaf P'}, inplace = True)
sPlot.rename(columns = {'LDMC_CWM':'LDMC'}, inplace = True)
sPlot.rename(columns = {'SeedMass_CWM':'Seed mass'}, inplace = True)
sPlot.rename(columns = {'Seed_length_CWM':'Seed length'}, inplace = True)
sPlot.rename(columns = {'LeafNperArea_CWM':'Leaf N per area'}, inplace = True)
sPlot.rename(columns = {'LeafNPratio_CWM':'Leaf N P ratio'}, inplace = True)
sPlot.rename(columns = {'Leaf_delta_15N_CWM':'Leaf delta15N'}, inplace = True)
sPlot.rename(columns = {'Leaffreshmass_CWM':'Leaf fresh mass'}, inplace = True)
sPlot.rename(columns = {'Seed_num_rep_unit_CWM':'Seeds per rep. unit'}, inplace = True)
sPlot.rename(columns = {'Stem_cond_dens_CWM':'Stem conduit density'}, inplace = True)
sPlot.rename(columns = {'Disp_unit_leng_CWM':'Dispersal unit length'}, inplace = True)
sPlot.rename(columns = {'Wood_vessel_length_CWM':'Conduit element length'}, inplace = True)
sPlot.rename(columns = {'PlantHeight_CWM':'Plant Height'}, inplace = True)
sPlot.rename(columns = {'LeafArea_CWM':'Leaf Area'}, inplace = True)
sPlot.rename(columns = {'SLA_CWM':'SLA'}, inplace = True)

Replace infinite values with NaN

sPlot = sPlot.replace(-np.inf, np.nan)
sPlot = sPlot.replace(np.inf, np.nan)

sPlot.head()

	PlotObservationID	TraitCoverage_cover	Species_richness	TraitCoverage_pa	Leaf Area	SSD	SLA	Leaf C	Leaf N per mass	Leaf P	...	Height_shrubs_lowest	Height_herbs_average	Height_herbs_lowest	Height_herbs_highest	SoilClim_PC1	SoilClim_PC2	Resample_1	Resample_2	Resample_3	Resample_1_consensus
0	16	0.277778	3	0.333333	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True
1	17	0.038462	2	0.500000	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True
2	18	0.047619	4	0.250000	3.678311	-1.047293	2.890748	6.128157	2.873263	1.114036	...	NaN	NaN	NaN	NaN	-3.66	0.546	False	False	True	False
3	20	0.666667	3	0.333333	3.686063	-0.907135	2.903715	6.136791	2.929729	0.739181	...	NaN	NaN	NaN	NaN	-3.66	0.546	False	True	False	False
4	22	0.538462	7	0.571429	3.899842	-0.900514	2.917708	6.131968	2.955072	0.733698	...	NaN	NaN	NaN	NaN	-3.66	0.546	True	False	False	True

5 rows × 86 columns

Save this dataframe as csv.

sPlot.to_csv("sPlotOpen/cwm_loc.csv", index=False)

Plot density#

def gridmap(long, lat, label, projection, colorbar=True):
    
    plt.rcParams.update({'font.size': 15})

    Z, xedges, yedges = np.histogram2d(np.array(long,dtype=float),
                                   np.array(lat),bins = [181, 91])

    #https://stackoverflow.com/questions/67801227/color-a-2d-histogram-not-by-density-but-by-the-mean-of-a-third-column
    #https://medium.com/analytics-vidhya/custom-strava-heatmap-231267dcd084
    
    #let function know what projection provided data is in:
    data_crs = ccrs.PlateCarree()
    
    #for colorbar
    cmap = plt.get_cmap('cool')
    im_ratio = Z.shape[0]/Z.shape[1]

    #plot map
    #create base plot of a world map
    ax = fig.add_subplot(1, 1, 1, projection=projection) # I used the PlateCarree projection from cartopy
    
    # set figure to map global extent (-180,180,-90,90)
    ax.set_global()
    
    #add coastlines
    ax.coastlines(resolution='110m', color='orange', linewidth=1.3)
    
    #add grid with values
    im = ax.pcolormesh(xedges, yedges, Z.T, cmap="cool", norm=LogNorm(), transform=data_crs, vmax = 400000)
    
    #add color bar
    if colorbar==True:
        fig.colorbar(im,fraction=0.046*im_ratio, pad=0.04, shrink=0.3, location="left", label=label)

Apply the gridmap function to plot a 2D histogramm of the sPlotOpen plots and save output as .pdf. You can also experiment with other projections. See https://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html for inspiration.

fig = plt.figure(figsize=(12, 12))
gridmap(sPlot['Longitude'], sPlot['Latitude'], "Number of sPlotOpen Plots", projection = ccrs.Robinson())
plt.savefig('../Figures/sPlot_density_Robinson_all.pdf', bbox_inches='tight')

/net/home/swolf/.conda/envs/cartopy/lib/python3.8/site-packages/cartopy/mpl/geoaxes.py:1797: MatplotlibDeprecationWarning: Passing parameters norm and vmin/vmax simultaneously is deprecated since 3.3 and will become an error two minor releases later. Please pass vmin/vmax directly to the norm when creating it.
  result = matplotlib.axes.Axes.pcolormesh(self, *args, **kwargs)

_images/Chapter_4_sPlotOpen_Preprocessing_25_1.png

Citizen science plant observations encode global trait patterns

sPlotOpen Preprocessing

Contents