{ "cells": [ { "cell_type": "markdown", "id": "771b28cd", "metadata": {}, "source": [ "# Link iNaturalist observations to TRY" ] }, { "cell_type": "markdown", "id": "88fc5732", "metadata": {}, "source": [ "Link iNaturalist vascular plant observations to the previously created trait TRY summary statistics.\n", "\n", "This section covers:\n", "\n", "- Load data\n", "- Link iNat and TRY\n", "- Fuzzy merge\n", "- Log trait values\n", "- Number of observations per trait\n", "- Plot observation density after linking" ] }, { "cell_type": "markdown", "id": "624c2ea8", "metadata": {}, "source": [ "## Packages" ] }, { "cell_type": "code", "execution_count": null, "id": "89e742b2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import numpy as np\n", "\n", "# fuzzy matching\n", "#import rapidfuzz\n", "from rapidfuzz import process, fuzz\n", "\n", "# plotting\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from matplotlib.colors import LogNorm, Normalize\n", "import cartopy.crs as ccrs # plot maps\n", "from matplotlib.colors import BoundaryNorm\n", "from matplotlib.ticker import MaxNLocator\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable" ] }, { "cell_type": "markdown", "id": "c24eac1e", "metadata": {}, "source": [ "## Load data" ] }, { "cell_type": "markdown", "id": "39b5bbf6", "metadata": {}, "source": [ "We load the iNaturalist vascular plant observations and the TRY summary stats per species." ] }, { "cell_type": "code", "execution_count": 2, "id": "1a1e8705", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentified
01229615436Commelina communis35.987483-79.0575462013-07-07T00:00:002013-07-07T20:33:11
11802610589Blitum capitatum40.320259-105.6048562013-08-24T13:30:002019-09-02T01:11:54
21212005116Passiflora vitifolia23.189257-106.4049242014-03-18T12:49:372017-02-23T17:24:07
\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1229615436 Commelina communis 35.987483 -79.057546 \n", "1 1802610589 Blitum capitatum 40.320259 -105.604856 \n", "2 1212005116 Passiflora vitifolia 23.189257 -106.404924 \n", "\n", " eventDate dateIdentified \n", "0 2013-07-07T00:00:00 2013-07-07T20:33:11 \n", "1 2013-08-24T13:30:00 2019-09-02T01:11:54 \n", "2 2014-03-18T12:49:37 2017-02-23T17:24:07 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat = pd.read_csv(\"Data/iNat/observations.csv\")\n", "iNat.head(3)\n" ] }, { "cell_type": "markdown", "id": "a1a87c9e", "metadata": {}, "source": [ "Load trait measurments with consolidated species name:" ] }, { "cell_type": "code", "execution_count": 3, "id": "bbe8cd42", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccSpeciesNameDispersal unit lengthLeaf AreaSLALeaf CLDMCLeaf fresh massLeaf N per areaLeaf N per massLeaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
0AaNaNNaN9.433962NaNNaNNaN2.798426.4NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1Aaronsohnia pubescensNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.2NaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " AccSpeciesName Dispersal unit length Leaf Area SLA Leaf C \\\n", "0 Aa NaN NaN 9.433962 NaN \n", "1 Aaronsohnia pubescens NaN NaN NaN NaN \n", "\n", " LDMC Leaf fresh mass Leaf N per area Leaf N per mass Leaf delta15N \\\n", "0 NaN NaN 2.7984 26.4 NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", " Leaf N P ratio Leaf P Plant Height Seed mass Seed length \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN 0.2 NaN NaN \n", "\n", " Seeds per rep. unit Stem conduit density SSD Conduit element length \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TRY = pd.read_csv(\"Data/TRY/TRY_summary_stats.csv\")\n", "TRY.head(2)" ] }, { "cell_type": "code", "execution_count": 4, "id": "4803367e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(51908, 19)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TRY.shape" ] }, { "cell_type": "code", "execution_count": 5, "id": "efeca2cc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(14019405, 6)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat.shape" ] }, { "cell_type": "code", "execution_count": 6, "id": "c8fea929", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14019405" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# check that we have only unique observation ID's\n", "iNat[\"gbifID\"].nunique()" ] }, { "cell_type": "markdown", "id": "c9f283e8", "metadata": {}, "source": [ "## Link iNaturalist and TRY" ] }, { "cell_type": "markdown", "id": "6f386db6", "metadata": {}, "source": [ "Non-fuzzy merge with TRY summary stats on **consolidated TRY species name**:\n" ] }, { "cell_type": "code", "execution_count": 7, "id": "a07593ff", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentifiedAccSpeciesNameDispersal unit lengthLeaf AreaSLA...Leaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
01229615436Commelina communis35.987483-79.0575462013-07-07T00:00:002013-07-07T20:33:11Commelina communisNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
13384000233Commelina communis42.093762-75.9236602021-08-23T13:06:062021-09-17T21:15:37Commelina communisNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
21807276585Commelina communis40.787636-73.9337282017-09-04T12:47:582017-09-04T21:58:57Commelina communisNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
\n", "

3 rows × 25 columns

\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1229615436 Commelina communis 35.987483 -79.057546 \n", "1 3384000233 Commelina communis 42.093762 -75.923660 \n", "2 1807276585 Commelina communis 40.787636 -73.933728 \n", "\n", " eventDate dateIdentified AccSpeciesName \\\n", "0 2013-07-07T00:00:00 2013-07-07T20:33:11 Commelina communis \n", "1 2021-08-23T13:06:06 2021-09-17T21:15:37 Commelina communis \n", "2 2017-09-04T12:47:58 2017-09-04T21:58:57 Commelina communis \n", "\n", " Dispersal unit length Leaf Area SLA ... Leaf delta15N Leaf N P ratio \\\n", "0 NaN NaN NaN ... NaN 12.631579 \n", "1 NaN NaN NaN ... NaN 12.631579 \n", "2 NaN NaN NaN ... NaN 12.631579 \n", "\n", " Leaf P Plant Height Seed mass Seed length Seeds per rep. unit \\\n", "0 1.71 NaN 8.48 NaN NaN \n", "1 1.71 NaN 8.48 NaN NaN \n", "2 1.71 NaN 8.48 NaN NaN \n", "\n", " Stem conduit density SSD Conduit element length \n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "\n", "[3 rows x 25 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY = pd.merge(iNat, TRY, \n", " left_on= ['scientificName'],\n", " right_on= ['AccSpeciesName'], \n", " how='inner')\n", "iNat_TRY.head(3)" ] }, { "cell_type": "markdown", "id": "7f7635fd", "metadata": {}, "source": [ "We repeat the same with the **'original' species name** in TRY:" ] }, { "cell_type": "code", "execution_count": 8, "id": "4e459e9c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SpeciesNameDispersal unit lengthLeaf AreaSLALeaf CLDMCLeaf fresh massLeaf N per areaLeaf N per massLeaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
0(fabaceae)NaNNaN21.3385NaNNaNNaN1.57815733.150000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1(fabaceae) 20-25oblongNaNNaNNaNNaNNaNNaN1.76145332.513864NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " SpeciesName Dispersal unit length Leaf Area SLA Leaf C \\\n", "0 (fabaceae) NaN NaN 21.3385 NaN \n", "1 (fabaceae) 20-25oblong NaN NaN NaN NaN \n", "\n", " LDMC Leaf fresh mass Leaf N per area Leaf N per mass Leaf delta15N \\\n", "0 NaN NaN 1.578157 33.150000 NaN \n", "1 NaN NaN 1.761453 32.513864 NaN \n", "\n", " Leaf N P ratio Leaf P Plant Height Seed mass Seed length \\\n", "0 NaN NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN NaN \n", "\n", " Seeds per rep. unit Stem conduit density SSD Conduit element length \n", "0 NaN NaN NaN NaN \n", "1 NaN NaN NaN NaN " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TRY_syn = pd.read_csv(\"Data/TRY/TRY_summary_stats_syn.csv\")\n", "TRY_syn.head(2)" ] }, { "cell_type": "markdown", "id": "215ceb74", "metadata": {}, "source": [ "Extract from TRY those observations that have not been matched:" ] }, { "cell_type": "code", "execution_count": 9, "id": "99e79d42", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2541013, 6)" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# filter for observations not in merged dataframe:\n", "iNat_rest = iNat[~iNat.gbifID.isin(iNat_TRY['gbifID'])]\n", "iNat_rest.shape" ] }, { "cell_type": "code", "execution_count": 10, "id": "8d75596f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentifiedSpeciesNameDispersal unit lengthLeaf AreaSLA...Leaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
01802610589Blitum capitatum40.320259-105.6048562013-08-24T13:30:002019-09-02T01:11:54Blitum capitatumNaNNaNNaN...NaNNaNNaN0.45NaNNaNNaNNaNNaNNaN
12283078677Blitum capitatum50.744232-120.5113032019-06-29T17:50:282019-09-02T01:16:41Blitum capitatumNaNNaNNaN...NaNNaNNaN0.45NaNNaNNaNNaNNaNNaN
22864818488Blitum capitatum53.938056-106.0685532020-08-22T12:22:092020-08-22T19:13:24Blitum capitatumNaNNaNNaN...NaNNaNNaN0.45NaNNaNNaNNaNNaNNaN
\n", "

3 rows × 25 columns

\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1802610589 Blitum capitatum 40.320259 -105.604856 \n", "1 2283078677 Blitum capitatum 50.744232 -120.511303 \n", "2 2864818488 Blitum capitatum 53.938056 -106.068553 \n", "\n", " eventDate dateIdentified SpeciesName \\\n", "0 2013-08-24T13:30:00 2019-09-02T01:11:54 Blitum capitatum \n", "1 2019-06-29T17:50:28 2019-09-02T01:16:41 Blitum capitatum \n", "2 2020-08-22T12:22:09 2020-08-22T19:13:24 Blitum capitatum \n", "\n", " Dispersal unit length Leaf Area SLA ... Leaf delta15N Leaf N P ratio \\\n", "0 NaN NaN NaN ... NaN NaN \n", "1 NaN NaN NaN ... NaN NaN \n", "2 NaN NaN NaN ... NaN NaN \n", "\n", " Leaf P Plant Height Seed mass Seed length Seeds per rep. unit \\\n", "0 NaN 0.45 NaN NaN NaN \n", "1 NaN 0.45 NaN NaN NaN \n", "2 NaN 0.45 NaN NaN NaN \n", "\n", " Stem conduit density SSD Conduit element length \n", "0 NaN NaN NaN \n", "1 NaN NaN NaN \n", "2 NaN NaN NaN \n", "\n", "[3 rows x 25 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# non-fuzzy merge with TRY summary stats on original TRY species name:\n", "\n", "iNat_TRY_syn = pd.merge(iNat_rest, TRY_syn, \n", " left_on= ['scientificName'],\n", " right_on= ['SpeciesName'], \n", " how='inner')\n", "iNat_TRY_syn.head(3)" ] }, { "cell_type": "code", "execution_count": 11, "id": "497c8e56", "metadata": {}, "outputs": [], "source": [ "subsets = [iNat_TRY, iNat_TRY_syn]\n", "\n", "iNat_TRY_all = pd.concat(subsets)" ] }, { "cell_type": "code", "execution_count": 12, "id": "95f3411a", "metadata": {}, "outputs": [], "source": [ "iNat_TRY_all = iNat_TRY_all.drop(['AccSpeciesName', 'SpeciesName'], axis = 1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "622ea69e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentifiedDispersal unit lengthLeaf AreaSLALeaf C...Leaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
01229615436Commelina communis35.987483-79.0575462013-07-07T00:00:002013-07-07T20:33:11NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
13384000233Commelina communis42.093762-75.9236602021-08-23T13:06:062021-09-17T21:15:37NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
21807276585Commelina communis40.787636-73.9337282017-09-04T12:47:582017-09-04T21:58:57NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
\n", "

3 rows × 24 columns

\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1229615436 Commelina communis 35.987483 -79.057546 \n", "1 3384000233 Commelina communis 42.093762 -75.923660 \n", "2 1807276585 Commelina communis 40.787636 -73.933728 \n", "\n", " eventDate dateIdentified Dispersal unit length Leaf Area \\\n", "0 2013-07-07T00:00:00 2013-07-07T20:33:11 NaN NaN \n", "1 2021-08-23T13:06:06 2021-09-17T21:15:37 NaN NaN \n", "2 2017-09-04T12:47:58 2017-09-04T21:58:57 NaN NaN \n", "\n", " SLA Leaf C ... Leaf delta15N Leaf N P ratio Leaf P Plant Height \\\n", "0 NaN NaN ... NaN 12.631579 1.71 NaN \n", "1 NaN NaN ... NaN 12.631579 1.71 NaN \n", "2 NaN NaN ... NaN 12.631579 1.71 NaN \n", "\n", " Seed mass Seed length Seeds per rep. unit Stem conduit density SSD \\\n", "0 8.48 NaN NaN NaN NaN \n", "1 8.48 NaN NaN NaN NaN \n", "2 8.48 NaN NaN NaN NaN \n", "\n", " Conduit element length \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "\n", "[3 rows x 24 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_all.head(3)" ] }, { "cell_type": "code", "execution_count": 14, "id": "1305f0a7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(11806220, 24)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_all.shape" ] }, { "cell_type": "code", "execution_count": 15, "id": "2ccbb4f1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11806220" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_all[\"gbifID\"].nunique()" ] }, { "cell_type": "code", "execution_count": 16, "id": "dff172e1", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(2213185, 6)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# agian filter for observations not in merged dataframe:\n", "iNat_rest_2 = iNat[~iNat.gbifID.isin(iNat_TRY_all['gbifID'])]\n", "iNat_rest_2.shape" ] }, { "cell_type": "markdown", "id": "aa4af241", "metadata": {}, "source": [ "Check how much was matched:" ] }, { "cell_type": "code", "execution_count": 17, "id": "7052c26d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "iNat species:\n", "90820\n", "TRY consolidated species:\n", "51908\n", "TRY original species:\n", "61180\n", "species merged:\n", "27783\n", "iNat species not merged:\n", "63037\n", "percentage of iNat observations linked with at least one TRY trait:\n", "0.8421341704587321\n" ] } ], "source": [ "print('iNat species:')\n", "print(iNat[\"scientificName\"].nunique())\n", "print('TRY consolidated species:')\n", "print(TRY[\"AccSpeciesName\"].nunique())\n", "print('TRY original species:')\n", "print(TRY_syn[\"SpeciesName\"].nunique())\n", "print('species merged:')\n", "print(iNat_TRY_all[\"scientificName\"].nunique())\n", "print('iNat species not merged:')\n", "print(iNat_rest_2[\"scientificName\"].nunique())\n", "\n", "# percentage of iNat observations linked with at least one TRY trait\n", "print('percentage of iNat observations linked with at least one TRY trait:')\n", "print(len(iNat_TRY_all)/len(iNat))" ] }, { "cell_type": "markdown", "id": "afde0655", "metadata": {}, "source": [ "## Fuzzy merge" ] }, { "cell_type": "markdown", "id": "e7034a8b", "metadata": {}, "source": [ "Get only unique species names left in iNaturalist unmatched observations:" ] }, { "cell_type": "code", "execution_count": 18, "id": "8bb3172d", "metadata": {}, "outputs": [], "source": [ "iNat_rest_unique = iNat_rest_2.drop_duplicates(subset=['scientificName'])" ] }, { "cell_type": "markdown", "id": "98790e9a", "metadata": {}, "source": [ "Get only unique unmatched TRY species names:" ] }, { "cell_type": "code", "execution_count": 19, "id": "2e2a876b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AccSpeciesNameDispersal unit lengthLeaf AreaSLALeaf CLDMCLeaf fresh massLeaf N per areaLeaf N per massLeaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
0AaNaNNaN9.433962NaNNaNNaN2.798426.4NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
2Abacaba (palm)NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN15.0NaNNaNNaNNaNNaNNaN
4Abarema adenophorumNaN3038.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN0.360000NaN
5Abarema alexandriNaN675.000000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
6Abarema barbourianaNaN29.811258NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN18.4968430.456346NaN
\n", "
" ], "text/plain": [ " AccSpeciesName Dispersal unit length Leaf Area SLA Leaf C \\\n", "0 Aa NaN NaN 9.433962 NaN \n", "2 Abacaba (palm) NaN NaN NaN NaN \n", "4 Abarema adenophorum NaN 3038.000000 NaN NaN \n", "5 Abarema alexandri NaN 675.000000 NaN NaN \n", "6 Abarema barbouriana NaN 29.811258 NaN NaN \n", "\n", " LDMC Leaf fresh mass Leaf N per area Leaf N per mass Leaf delta15N \\\n", "0 NaN NaN 2.7984 26.4 NaN \n", "2 NaN NaN NaN NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "5 NaN NaN NaN NaN NaN \n", "6 NaN NaN NaN NaN NaN \n", "\n", " Leaf N P ratio Leaf P Plant Height Seed mass Seed length \\\n", "0 NaN NaN NaN NaN NaN \n", "2 NaN NaN 15.0 NaN NaN \n", "4 NaN NaN NaN NaN NaN \n", "5 NaN NaN NaN NaN NaN \n", "6 NaN NaN NaN NaN NaN \n", "\n", " Seeds per rep. unit Stem conduit density SSD Conduit element length \n", "0 NaN NaN NaN NaN \n", "2 NaN NaN NaN NaN \n", "4 NaN NaN 0.360000 NaN \n", "5 NaN NaN NaN NaN \n", "6 NaN 18.496843 0.456346 NaN " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.options.mode.chained_assignment = None\n", "\n", "TRY = pd.read_csv(\"Data/TRY/TRY_summary_stats.csv\")\n", "TRY_alt = pd.read_csv(\"Data/TRY/TRY_summary_stats_syn.csv\")\n", "\n", "TRY_rest = TRY[~TRY.AccSpeciesName.isin(iNat_TRY_all['scientificName'])]\n", "TRY_alt_rest = TRY_alt[~TRY_alt.SpeciesName.isin(iNat_TRY_all['scientificName'])]\n", "\n", "TRY_alt_rest.rename(columns = {'SpeciesName':'AccSpeciesName'}, inplace = True)\n", "\n", "\n", "TRY_R = pd.concat([TRY_rest, TRY_alt_rest])\n", "TRY_rest_unique = TRY_R.drop_duplicates(subset=['AccSpeciesName'])\n", "\n", "TRY_rest_unique.head()" ] }, { "cell_type": "code", "execution_count": 20, "id": "9ded70d8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0Sambucus ceruleaSambucus caerulea96.96969742779
1Anemonoides sylvestrisAnemone sylvestris90.0000003146
2Elymus hystrixElymus histrix92.85714320225
3Euphorbia enterophoraEuphorbia eriophora90.00000019508
4Tanacetum partheniifoliumTanacetum parthenifolium97.95918447633
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 Sambucus cerulea Sambucus caerulea 96.969697 42779\n", "1 Anemonoides sylvestris Anemone sylvestris 90.000000 3146\n", "2 Elymus hystrix Elymus histrix 92.857143 20225\n", "3 Euphorbia enterophora Euphorbia eriophora 90.000000 19508\n", "4 Tanacetum partheniifolium Tanacetum parthenifolium 97.959184 47633" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# define choices and queries\n", "# this might take a little while\n", "\n", "choices = TRY_rest_unique[\"AccSpeciesName\"].apply(str)\n", "queries = iNat_rest_unique[\"scientificName\"]\n", "\n", "\n", "score_sort = [(x,) + i\n", " for x in queries\n", " for i in process.extract(x, choices, score_cutoff=90, scorer=fuzz.token_sort_ratio) ]\n", "\n", "fuzzy_matches = pd.DataFrame(score_sort)\n", "fuzzy_matches.head()" ] }, { "cell_type": "markdown", "id": "4a814f9d", "metadata": {}, "source": [ "Save fuzzy match to ```.csv```:" ] }, { "cell_type": "code", "execution_count": 21, "id": "b90a739b", "metadata": {}, "outputs": [], "source": [ "fuzzy_matches.to_csv(\"Data/fuzzy_matches.csv\", sep = \"\\t\",index=False)" ] }, { "cell_type": "markdown", "id": "9b5e8e1c", "metadata": {}, "source": [ "Reload fuzzy matches:" ] }, { "cell_type": "code", "execution_count": 22, "id": "8f7ef84b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123
0Sambucus ceruleaSambucus caerulea96.96969742779
1Anemonoides sylvestrisAnemone sylvestris90.0000003146
2Elymus hystrixElymus histrix92.85714320225
3Euphorbia enterophoraEuphorbia eriophora90.00000019508
4Tanacetum partheniifoliumTanacetum parthenifolium97.95918447633
\n", "
" ], "text/plain": [ " 0 1 2 3\n", "0 Sambucus cerulea Sambucus caerulea 96.969697 42779\n", "1 Anemonoides sylvestris Anemone sylvestris 90.000000 3146\n", "2 Elymus hystrix Elymus histrix 92.857143 20225\n", "3 Euphorbia enterophora Euphorbia eriophora 90.000000 19508\n", "4 Tanacetum partheniifolium Tanacetum parthenifolium 97.959184 47633" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fuzzy_matches = pd.read_csv(\"Data/fuzzy_matches.csv\", sep = \"\\t\")\n", "fuzzy_matches.head()" ] }, { "cell_type": "markdown", "id": "e216cc43", "metadata": {}, "source": [ "Add new names to unmatched iNaturalist observations: ```iNat_rest_2``` with ```fuzzy_matches```\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "0a3154ef", "metadata": {}, "outputs": [], "source": [ "fuzzy_matches.rename(columns = {'0':'scientificName'}, inplace = True)\n", "fuzzy_matches.rename(columns = {'1':'fuzzyName'}, inplace = True)\n", "iNat_rest_fuzzy = pd.merge(iNat_rest_2, fuzzy_matches, on='scientificName', how='inner')" ] }, { "cell_type": "markdown", "id": "dbe44fd5", "metadata": {}, "source": [ "Merge with TRY:" ] }, { "cell_type": "code", "execution_count": 24, "id": "28e0b01c", "metadata": {}, "outputs": [], "source": [ "TRY = pd.read_csv(\"Data/TRY/TRY_summary_stats.csv\")\n", "TRY_alt = pd.read_csv(\"Data/TRY/TRY_summary_stats_syn.csv\")\n", "\n", "TRY.rename(columns = {'AccSpeciesName':'fuzzyName'}, inplace = True)\n", "iNat_TRY_fuzzy_1 = pd.merge(iNat_rest_fuzzy, TRY, on='fuzzyName', how='inner')\n", "iNat_TRY_fuzzy_rest = iNat_rest_fuzzy[~iNat_rest_fuzzy.gbifID.isin(iNat_TRY_fuzzy_1['gbifID'])]\n", "iNat_TRY_fuzzy_1= iNat_TRY_fuzzy_1.drop(columns=[\"fuzzyName\", \"2\", \"3\"])\n", "\n", "TRY_alt.rename(columns = {'SpeciesName':'fuzzyName'}, inplace = True)\n", "iNat_TRY_fuzzy_2 = pd.merge(iNat_TRY_fuzzy_rest, TRY_alt, on='fuzzyName', how='inner')\n", "iNat_TRY_fuzzy_2= iNat_TRY_fuzzy_2.drop(columns=[\"fuzzyName\", \"2\", \"3\"])" ] }, { "cell_type": "code", "execution_count": 25, "id": "2fc3e073", "metadata": {}, "outputs": [], "source": [ "# merge fuzzy-consolidated species name match and fuzzy-original match\n", "frames = [iNat_TRY_fuzzy_1, iNat_TRY_fuzzy_2]\n", "\n", "iNat_TRY_fuzzy_merge = pd.concat(frames)" ] }, { "cell_type": "code", "execution_count": 26, "id": "10267a5c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9933762301286904" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_fuzzy_merge['gbifID'].nunique()/len(iNat_TRY_fuzzy_merge['gbifID'])" ] }, { "cell_type": "markdown", "id": "c4d1090d", "metadata": {}, "source": [ "Drop iNat observation duplicates in fuzzy matches, keeping the row with the least NaN" ] }, { "cell_type": "code", "execution_count": 27, "id": "94d8d467", "metadata": {}, "outputs": [], "source": [ "iNat_TRY_fuzzy_merge_2 = (iNat_TRY_fuzzy_merge.assign(counts=iNat_TRY_fuzzy_merge.count(axis=1))\n", " .sort_values(['gbifID', 'counts'])\n", " .drop_duplicates('gbifID', keep='last')\n", " .drop('counts', axis=1))" ] }, { "cell_type": "code", "execution_count": 28, "id": "3bbbbc6c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(89828, 24)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_fuzzy_merge.shape" ] }, { "cell_type": "code", "execution_count": 29, "id": "daba3143", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(89233, 24)" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_fuzzy_merge_2.shape" ] }, { "cell_type": "markdown", "id": "5cfce3e9", "metadata": {}, "source": [ "Concatenate to make final dataframe:" ] }, { "cell_type": "code", "execution_count": 30, "id": "b696e1f9", "metadata": {}, "outputs": [], "source": [ "frames = [iNat_TRY_all, iNat_TRY_fuzzy_merge_2]\n", "\n", "iNat_TRY_final = pd.concat(frames)" ] }, { "cell_type": "markdown", "id": "dcb49444", "metadata": {}, "source": [ "Compare shape to number of unique gbif ID's, check that they are the same. We want each observation represented only once:" ] }, { "cell_type": "code", "execution_count": 31, "id": "082acd2f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(11895453, 24)" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_final.shape" ] }, { "cell_type": "code", "execution_count": 32, "id": "0decb304", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "11895453" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_final['gbifID'].nunique()" ] }, { "cell_type": "code", "execution_count": 33, "id": "f05024bd", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentifiedDispersal unit lengthLeaf AreaSLALeaf C...Leaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
01229615436Commelina communis35.987483-79.0575462013-07-07T00:00:002013-07-07T20:33:11NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
13384000233Commelina communis42.093762-75.9236602021-08-23T13:06:062021-09-17T21:15:37NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
21807276585Commelina communis40.787636-73.9337282017-09-04T12:47:582017-09-04T21:58:57NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
33355124418Commelina communis39.643158-76.7642452020-08-26T10:19:562020-08-27T13:21:22NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
41802638502Commelina communis43.1095051.6225432017-10-21T10:01:002017-10-21T09:02:42NaNNaNNaNNaN...NaN12.6315791.71NaN8.48NaNNaNNaNNaNNaN
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1229615436 Commelina communis 35.987483 -79.057546 \n", "1 3384000233 Commelina communis 42.093762 -75.923660 \n", "2 1807276585 Commelina communis 40.787636 -73.933728 \n", "3 3355124418 Commelina communis 39.643158 -76.764245 \n", "4 1802638502 Commelina communis 43.109505 1.622543 \n", "\n", " eventDate dateIdentified Dispersal unit length Leaf Area \\\n", "0 2013-07-07T00:00:00 2013-07-07T20:33:11 NaN NaN \n", "1 2021-08-23T13:06:06 2021-09-17T21:15:37 NaN NaN \n", "2 2017-09-04T12:47:58 2017-09-04T21:58:57 NaN NaN \n", "3 2020-08-26T10:19:56 2020-08-27T13:21:22 NaN NaN \n", "4 2017-10-21T10:01:00 2017-10-21T09:02:42 NaN NaN \n", "\n", " SLA Leaf C ... Leaf delta15N Leaf N P ratio Leaf P Plant Height \\\n", "0 NaN NaN ... NaN 12.631579 1.71 NaN \n", "1 NaN NaN ... NaN 12.631579 1.71 NaN \n", "2 NaN NaN ... NaN 12.631579 1.71 NaN \n", "3 NaN NaN ... NaN 12.631579 1.71 NaN \n", "4 NaN NaN ... NaN 12.631579 1.71 NaN \n", "\n", " Seed mass Seed length Seeds per rep. unit Stem conduit density SSD \\\n", "0 8.48 NaN NaN NaN NaN \n", "1 8.48 NaN NaN NaN NaN \n", "2 8.48 NaN NaN NaN NaN \n", "3 8.48 NaN NaN NaN NaN \n", "4 8.48 NaN NaN NaN NaN \n", "\n", " Conduit element length \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_final.head()" ] }, { "cell_type": "markdown", "id": "27a2451a", "metadata": {}, "source": [ "After matching with alternate name and a conservative fuzzy match, we were able to match about 85% of the iNaturalist observations with trait information. Many rare species seem to be absent in either one of the two databases.\n" ] }, { "cell_type": "code", "execution_count": 34, "id": "b136932f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "percentage of iNat observations linked with at least one TRY trait:\n", "0.8484991338790769\n", "percentage of species in iNaturalist matched with TRY:\n", "0.3161528297731777\n", "percentage of species in TRY matched with iNaturalist:\n", "0.5531517299838176\n" ] } ], "source": [ "print('percentage of iNat observations linked with at least one TRY trait:')\n", "print(len(iNat_TRY_final)/len(iNat))\n", "\n", "print('percentage of species in iNaturalist matched with TRY:')\n", "print(iNat_TRY_final[\"scientificName\"].nunique()/iNat[\"scientificName\"].nunique())\n", "\n", "print('percentage of species in TRY matched with iNaturalist:')\n", "print(iNat_TRY_final[\"scientificName\"].nunique()/TRY[\"fuzzyName\"].nunique())" ] }, { "cell_type": "code", "execution_count": 35, "id": "0502bf4b", "metadata": {}, "outputs": [], "source": [ "iNat_TRY_final.to_csv(\"Data/iNat_TRY.csv\", index=False)" ] }, { "cell_type": "markdown", "id": "1bd68b38", "metadata": {}, "source": [ "## Log trait values" ] }, { "cell_type": "markdown", "id": "30e8d999", "metadata": {}, "source": [ "The cwm in sPlot were caluclated after being log e transformed, so we must log e transform iNat data also:" ] }, { "cell_type": "code", "execution_count": 36, "id": "624a3225", "metadata": {}, "outputs": [], "source": [ "trait = iNat_TRY_final.columns[6:24]\n", "iNat_TRY_final.loc[:, trait] = np.log(iNat_TRY_final[trait])" ] }, { "cell_type": "code", "execution_count": 37, "id": "56a1b56b", "metadata": {}, "outputs": [], "source": [ "iNat_TRY_final = iNat_TRY_final.replace(-np.inf, np.nan)\n", "iNat_TRY_final = iNat_TRY_final.replace(np.inf, np.nan)" ] }, { "cell_type": "code", "execution_count": 38, "id": "17303325", "metadata": {}, "outputs": [], "source": [ "iNat_TRY_final.to_csv(\"Data/iNat_TRY_log.csv\", index=False)" ] }, { "cell_type": "markdown", "id": "9b7d6011", "metadata": {}, "source": [ "## Number of observations per trait" ] }, { "cell_type": "code", "execution_count": 39, "id": "d978a349", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
gbifIDscientificNamedecimalLatitudedecimalLongitudeeventDatedateIdentifiedDispersal unit lengthLeaf AreaSLALeaf C...Leaf delta15NLeaf N P ratioLeaf PPlant HeightSeed massSeed lengthSeeds per rep. unitStem conduit densitySSDConduit element length
01229615436Commelina communis35.987483-79.0575462013-07-07T00:00:002013-07-07T20:33:11NaNNaNNaNNaN...NaN2.53620.536493NaN2.13771NaNNaNNaNNaNNaN
13384000233Commelina communis42.093762-75.9236602021-08-23T13:06:062021-09-17T21:15:37NaNNaNNaNNaN...NaN2.53620.536493NaN2.13771NaNNaNNaNNaNNaN
21807276585Commelina communis40.787636-73.9337282017-09-04T12:47:582017-09-04T21:58:57NaNNaNNaNNaN...NaN2.53620.536493NaN2.13771NaNNaNNaNNaNNaN
33355124418Commelina communis39.643158-76.7642452020-08-26T10:19:562020-08-27T13:21:22NaNNaNNaNNaN...NaN2.53620.536493NaN2.13771NaNNaNNaNNaNNaN
41802638502Commelina communis43.1095051.6225432017-10-21T10:01:002017-10-21T09:02:42NaNNaNNaNNaN...NaN2.53620.536493NaN2.13771NaNNaNNaNNaNNaN
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " gbifID scientificName decimalLatitude decimalLongitude \\\n", "0 1229615436 Commelina communis 35.987483 -79.057546 \n", "1 3384000233 Commelina communis 42.093762 -75.923660 \n", "2 1807276585 Commelina communis 40.787636 -73.933728 \n", "3 3355124418 Commelina communis 39.643158 -76.764245 \n", "4 1802638502 Commelina communis 43.109505 1.622543 \n", "\n", " eventDate dateIdentified Dispersal unit length Leaf Area \\\n", "0 2013-07-07T00:00:00 2013-07-07T20:33:11 NaN NaN \n", "1 2021-08-23T13:06:06 2021-09-17T21:15:37 NaN NaN \n", "2 2017-09-04T12:47:58 2017-09-04T21:58:57 NaN NaN \n", "3 2020-08-26T10:19:56 2020-08-27T13:21:22 NaN NaN \n", "4 2017-10-21T10:01:00 2017-10-21T09:02:42 NaN NaN \n", "\n", " SLA Leaf C ... Leaf delta15N Leaf N P ratio Leaf P Plant Height \\\n", "0 NaN NaN ... NaN 2.5362 0.536493 NaN \n", "1 NaN NaN ... NaN 2.5362 0.536493 NaN \n", "2 NaN NaN ... NaN 2.5362 0.536493 NaN \n", "3 NaN NaN ... NaN 2.5362 0.536493 NaN \n", "4 NaN NaN ... NaN 2.5362 0.536493 NaN \n", "\n", " Seed mass Seed length Seeds per rep. unit Stem conduit density SSD \\\n", "0 2.13771 NaN NaN NaN NaN \n", "1 2.13771 NaN NaN NaN NaN \n", "2 2.13771 NaN NaN NaN NaN \n", "3 2.13771 NaN NaN NaN NaN \n", "4 2.13771 NaN NaN NaN NaN \n", "\n", " Conduit element length \n", "0 NaN \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY_final.head()" ] }, { "cell_type": "code", "execution_count": 40, "id": "0aed0657", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "gbifID 11500000\n", "scientificName 11500000\n", "decimalLatitude 11500000\n", "decimalLongitude 11500000\n", "eventDate 11500000\n", "dateIdentified 11400000\n", "AccSpeciesName 11500000\n", "Dispersal unit length 4700000\n", "Leaf Area 4800000\n", "SLA 7600000\n", "Leaf C 5000000\n", "LDMC 6700000\n", "Leaf fresh mass 2700000\n", "Leaf N per area 5700000\n", "Leaf N per mass 7000000\n", "Leaf delta15N 2400000\n", "Leaf N P ratio 3800000\n", "Leaf P 5100000\n", "Plant Height 9500000\n", "Seed mass 10200000\n", "Seed length 3500000\n", "Seeds per rep. unit 4000000\n", "Stem conduit density 1200000\n", "SSD 3400000\n", "Conduit element length 300000\n", "dtype: int64" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "iNat_TRY.count().round(decimals=-5)" ] }, { "cell_type": "markdown", "id": "8192ca52", "metadata": {}, "source": [ "## Density of observations after linking" ] }, { "cell_type": "code", "execution_count": 41, "id": "a7615ee9", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.rcParams.update({'font.size': 15})\n", "\n", "Z, xedges, yedges = np.histogram2d(np.array(iNat_TRY['decimalLongitude'],dtype=float),\n", " np.array(iNat_TRY['decimalLatitude']),bins = [181, 91])\n", "\n", "data_crs = ccrs.PlateCarree()\n", "#for colorbar\n", "cmap = plt.get_cmap('cool')\n", "im_ratio = Z.shape[0]/Z.shape[1]\n", "\n", "#plot map\n", "fig = plt.figure(figsize=(12, 12)) # I created a new figure and set up its size\n", "\n", "#create base plot of a world map\n", "ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson()) # I used the PlateCarree projection from cartopy\n", "ax.set_global()\n", "#add coastlines\n", "ax.coastlines(resolution='110m', color='orange', linewidth=1.3)\n", "#add grid with values\n", "im = ax.pcolormesh(xedges, yedges, Z.T, cmap=\"cool\", norm=LogNorm(), transform=data_crs)\n", "#add color bar\n", "#divider = make_axes_locatable(ax)\n", "#cax = divider.append_axes(\"right\", size=\"3%\", pad=0.05)\n", "#fig.colorbar(im, cax=cax)\n", "fig.colorbar(im,fraction=0.046*im_ratio, pad=0.04, shrink=0.3, location=\"left\", label=\"iNaturalist observations vascular plants\")\n", "\n", "\n", "plt.savefig('Figures/iNat_density_Robinson_TRY.pdf', bbox_inches='tight')\n", "\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" } }, "nbformat": 4, "nbformat_minor": 5 }