diff --git a/transit_provider_dashboard/01_prepare_acs_data.ipynb b/transit_provider_dashboard/01_prepare_acs_data.ipynb index c2519c9c8..892f8ef8a 100644 --- a/transit_provider_dashboard/01_prepare_acs_data.ipynb +++ b/transit_provider_dashboard/01_prepare_acs_data.ipynb @@ -4,12 +4,7 @@ "cell_type": "code", "execution_count": 1, "id": "77e2b1ea-6883-4d17-8d0f-2557048cf485", - "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - } - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -17,26 +12,26 @@ "text": [ "Requirement already satisfied: pygris in /opt/conda/lib/python3.11/site-packages (0.2.1)\n", "Requirement already satisfied: geopandas>=0.9 in /opt/conda/lib/python3.11/site-packages (from pygris) (0.14.4)\n", - "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from pygris) (2.32.5)\n", - "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.11/site-packages (from pygris) (4.3.8)\n", + "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from pygris) (2.32.3)\n", + "Requirement already satisfied: platformdirs in /opt/conda/lib/python3.11/site-packages (from pygris) (4.3.6)\n", "Requirement already satisfied: fiona>=1.8.21 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.10.1)\n", "Requirement already satisfied: numpy>=1.22 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (1.26.4)\n", - "Requirement already satisfied: packaging in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (25.0)\n", - "Requirement already satisfied: pandas>=1.4.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.3.3)\n", + "Requirement already satisfied: packaging in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (26.0)\n", + "Requirement already satisfied: pandas>=1.4.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.2.3)\n", "Requirement already satisfied: pyproj>=3.3.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (3.7.2)\n", - "Requirement already satisfied: shapely>=1.8.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.1.1)\n", - "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.4.3)\n", + "Requirement already satisfied: shapely>=1.8.0 in /opt/conda/lib/python3.11/site-packages (from geopandas>=0.9->pygris) (2.1.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.4.0)\n", "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2.6.1)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2025.8.3)\n", - "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (25.3.0)\n", - "Requirement already satisfied: click~=8.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (8.2.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->pygris) (2024.8.30)\n", + "Requirement already satisfied: attrs>=19.2.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (24.2.0)\n", + "Requirement already satisfied: click~=8.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (8.1.7)\n", "Requirement already satisfied: click-plugins>=1.0 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (1.1.1.2)\n", "Requirement already satisfied: cligj>=0.5 in /opt/conda/lib/python3.11/site-packages (from fiona>=1.8.21->geopandas>=0.9->pygris) (0.7.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2025.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2025.2)\n", - "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.4.0->geopandas>=0.9->pygris) (1.17.0)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2.9.0)\n", + "Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /opt/conda/lib/python3.11/site-packages (from pandas>=1.4.0->geopandas>=0.9->pygris) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.11/site-packages (from python-dateutil>=2.8.2->pandas>=1.4.0->geopandas>=0.9->pygris) (1.16.0)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -97,7 +92,7 @@ "variables = [\n", " \"B01003_001E\", # Total Population\n", " \"B17001_002E\", # Population with Income in the past 12 months below poverty level\n", - " \"B16008_037E\", # Non US Citizen Population\n", + " \"B05001_006E\", # Non US Citizen Population\n", " \"B01001_020E\", \"B01001_021E\", \"B01001_022E\", \"B01001_023E\", \"B01001_024E\", \"B01001_025E\", # Male senior population : 65 and above\n", " \"B01001_044E\", \"B01001_045E\", \"B01001_046E\", \"B01001_047E\", \"B01001_048E\", \"B01001_049E\", # Female senior population : 65 and above\n", " \"B01001_006E\", \"B01001_007E\", \"B01001_008E\", \"B01001_009E\", \"B01001_010E\", # Male population : 15-24\n", @@ -107,8 +102,8 @@ " \"B06010_007E\", \"B06010_008E\", # Population with very low income\n", " \"B06010_009E\", \"B06010_010E\", # Population with low income \n", " \"B08014_002E\", \"B08201_002E\", # Workers and Households with no cars\n", - " \"B18101_001E\", # Total Population with Disability\n", - " \"B19058_001E\", # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", + " # \"B18101_001E\", # Total Population with Disability\n", + " \"B19058_002E\", # Public Assistance Income or Food Stamps/SNAP in past 12 months for Households\n", " \"B21001_002E\", # Population with veteran status: 18 and above\n", " \"B18101_004E\", \"B18101_007E\", \"B18101_010E\", \"B18101_013E\", \"B18101_016E\", # Population with disability (Male and Female)\n", " \"B18101_019E\", \"B18101_023E\", \"B18101_026E\", \"B18101_029E\", \"B18101_032E\", \n", @@ -126,7 +121,7 @@ "def fetch_acs(vars_subset, api_key):\n", " var_str = \"NAME,\" + \",\".join(vars_subset)\n", " url = (\n", - " \"https://api.census.gov/data/2023/acs/acs5\"\n", + " \"https://api.census.gov/data/2024/acs/acs5\"\n", " f\"?get={var_str}&for=tract:*&in=state:06&key={api_key}\"\n", " )\n", "\n", @@ -202,7 +197,7 @@ "census_data = census_data.rename(columns = {\n", " 'B01003_001E': 'total_pop',\n", " 'B17001_002E': 'poverty_pop',\n", - " 'B16008_037E': 'non_us_citizen',\n", + " 'B05001_006E': 'non_us_citizen',\n", " 'B01001_020E': 'male_65_to_66', 'B01001_021E': 'male_67_to_69', 'B01001_022E': 'male_70_to_74', \n", " 'B01001_023E': 'male_75_to_79', 'B01001_024E': 'male_80_to_84', 'B01001_025E': 'male_85_and_over',\n", " 'B01001_044E': 'female_65_to_66', 'B01001_045E': 'female_67_to_69', 'B01001_046E': 'female_70_to_74', \n", @@ -216,7 +211,7 @@ " 'B06010_009E': 'income_50000_64999', 'B06010_010E': 'income_65000_74999',\n", " 'B08014_002E': 'workers_with_no_car', 'B08201_002E': 'households_with_no_cars',\n", " # 'B18101_001E': 'disabled_pop',\n", - " 'B19058_001E': 'public_asst_pop',\n", + " 'B19058_002E': 'public_asst_pop',\n", " 'B21001_002E': 'veteran_pop',\n", " 'B18101_004E': 'male_under5_with_disability',\n", " 'B18101_007E': 'male_5_17_with_disability',\n", @@ -356,7 +351,6 @@ " workers_with_no_car\n", " households_with_no_cars\n", " GEOID\n", - " B18101_001E\n", " public_asst_pop\n", " veteran_pop\n", " county_name\n", @@ -373,258 +367,253 @@ " \n", " \n", " 0\n", - " 3094\n", - " 134\n", - " 264\n", - " 47\n", - " 84\n", - " 119\n", + " 3132\n", + " 131\n", + " 259\n", + " 58\n", + " 140\n", + " 141\n", + " 44\n", + " 55\n", " 49\n", - " 46\n", + " 26\n", " 78\n", - " 52\n", - " 70\n", - " 72\n", - " 85\n", - " 105\n", - " 107\n", - " 19\n", + " 103\n", + " 83\n", + " 51\n", + " 116\n", + " 28\n", " 0\n", " 0\n", " 0\n", - " 26\n", - " 26\n", - " 13\n", + " 31\n", + " 28\n", + " 7\n", " 0\n", - " 14\n", - " 19\n", + " 0\n", + " 21\n", " 250001\n", - " 188\n", - " 75\n", - " 134\n", - " 157\n", - " 87\n", - " 129\n", - " 70\n", - " 28\n", - " 85\n", + " 161\n", + " 111\n", + " 76\n", + " 163\n", + " 107\n", + " 112\n", + " 41\n", + " 89\n", + " 107\n", " 06001400100\n", - " 3094\n", - " 1316\n", - " 129\n", + " 62\n", + " 55\n", " Alameda\n", - " 397\n", - " 244\n", - " 199\n", - " 423\n", - " 491\n", - " 45\n", - " 72\n", - " 313\n", + " 348\n", + " 270\n", + " 153\n", + " 487\n", + " 457\n", + " 59\n", + " 56\n", + " 206\n", " \n", " \n", " 1\n", - " 2093\n", - " 164\n", - " 96\n", - " 18\n", - " 60\n", - " 59\n", + " 2203\n", + " 170\n", + " 123\n", + " 22\n", + " 58\n", " 58\n", + " 59\n", " 28\n", - " 26\n", - " 40\n", - " 35\n", - " 67\n", - " 96\n", + " 23\n", + " 36\n", " 34\n", - " 13\n", - " 33\n", + " 74\n", + " 105\n", + " 32\n", + " 19\n", + " 25\n", " 0\n", - " 3\n", + " 4\n", " 0\n", " 10\n", + " 24\n", " 28\n", - " 9\n", - " 0\n", " 0\n", " 0\n", - " 225880\n", - " 75\n", - " 70\n", - " 89\n", - " 12\n", - " 207\n", - " 77\n", - " 32\n", - " 92\n", - " 95\n", + " 22\n", + " 208438\n", + " 110\n", + " 68\n", + " 108\n", + " 30\n", + " 226\n", + " 121\n", + " 34\n", + " 99\n", + " 110\n", " 06001400200\n", - " 2093\n", - " 861\n", - " 38\n", + " 42\n", + " 35\n", " Alameda\n", - " 234\n", - " 219\n", - " 109\n", - " 249\n", - " 285\n", - " 46\n", - " 37\n", - " 168\n", + " 286\n", + " 256\n", + " 155\n", + " 248\n", + " 300\n", + " 39\n", + " 74\n", + " 167\n", " \n", " \n", " 2\n", - " 5727\n", - " 310\n", - " 306\n", - " 23\n", - " 47\n", - " 113\n", - " 100\n", - " 24\n", - " 25\n", - " 108\n", - " 62\n", - " 194\n", - " 158\n", - " 13\n", - " 142\n", - " 94\n", - " 88\n", + " 5990\n", + " 256\n", + " 280\n", + " 43\n", + " 49\n", + " 68\n", + " 130\n", + " 83\n", + " 22\n", + " 87\n", + " 85\n", + " 205\n", + " 193\n", + " 33\n", + " 109\n", + " 98\n", + " 144\n", " 0\n", " 6\n", - " 93\n", - " 55\n", - " 33\n", + " 91\n", + " 89\n", + " 31\n", " 0\n", - " 26\n", - " 43\n", - " 157731\n", - " 383\n", - " 201\n", - " 300\n", + " 0\n", + " 50\n", + " 176618\n", + " 350\n", + " 210\n", + " 324\n", + " 308\n", + " 324\n", " 251\n", - " 400\n", - " 148\n", - " 291\n", - " 157\n", - " 416\n", + " 181\n", + " 237\n", + " 466\n", " 06001400300\n", - " 5727\n", - " 2713\n", - " 80\n", + " 203\n", + " 74\n", " Alameda\n", " 884\n", - " 651\n", - " 439\n", - " 332\n", - " 677\n", - " 281\n", - " 157\n", - " 459\n", + " 632\n", + " 432\n", + " 395\n", + " 712\n", + " 339\n", + " 170\n", + " 479\n", " \n", " \n", " 3\n", - " 4395\n", - " 343\n", - " 185\n", - " 31\n", - " 70\n", - " 89\n", - " 19\n", - " 26\n", - " 36\n", - " 55\n", - " 105\n", - " 104\n", - " 43\n", - " 23\n", - " 30\n", - " 30\n", - " 41\n", - " 0\n", + " 4399\n", + " 345\n", + " 221\n", + " 73\n", + " 90\n", + " 73\n", + " 39\n", + " 48\n", + " 21\n", + " 72\n", + " 83\n", + " 109\n", + " 47\n", + " 22\n", + " 17\n", + " 28\n", + " 38\n", " 0\n", " 0\n", - " 27\n", + " 26\n", + " 21\n", " 9\n", " 0\n", " 0\n", - " 38\n", - " 159612\n", - " 187\n", - " 105\n", - " 287\n", - " 215\n", - " 207\n", - " 178\n", - " 87\n", - " 134\n", - " 204\n", + " 20\n", + " 165134\n", + " 192\n", + " 108\n", + " 321\n", + " 185\n", + " 246\n", + " 186\n", + " 55\n", + " 133\n", + " 212\n", " 06001400400\n", - " 4376\n", - " 1803\n", + " 168\n", " 88\n", " Alameda\n", - " 579\n", - " 422\n", - " 265\n", - " 271\n", - " 360\n", - " 71\n", - " 74\n", - " 339\n", + " 621\n", + " 431\n", + " 241\n", + " 344\n", + " 350\n", + " 92\n", + " 50\n", + " 366\n", " \n", " \n", " 4\n", - " 3822\n", - " 397\n", - " 231\n", - " 41\n", - " 32\n", - " 56\n", - " 41\n", - " 4\n", + " 3579\n", + " 395\n", + " 117\n", + " 37\n", + " 45\n", + " 43\n", + " 46\n", + " 10\n", " 0\n", - " 19\n", - " 47\n", - " 51\n", - " 50\n", " 60\n", - " 203\n", - " 26\n", - " 81\n", - " 9\n", + " 48\n", + " 90\n", + " 27\n", + " 31\n", + " 197\n", + " 23\n", + " 79\n", + " 24\n", " 0\n", - " 115\n", - " 7\n", + " 46\n", + " 18\n", " 0\n", " 0\n", - " 98\n", - " 46\n", - " 96250\n", - " 256\n", - " 91\n", - " 244\n", - " 213\n", - " 385\n", - " 387\n", - " 244\n", - " 74\n", - " 169\n", - " 06001400500\n", - " 3822\n", - " 1655\n", + " 94\n", + " 20\n", + " 96641\n", + " 236\n", + " 125\n", + " 234\n", " 115\n", - " Alameda\n", - " 591\n", - " 598\n", - " 631\n", - " 174\n", " 430\n", - " 231\n", - " 151\n", - " 270\n", + " 383\n", + " 141\n", + " 78\n", + " 227\n", + " 06001400500\n", + " 188\n", + " 100\n", + " Alameda\n", + " 595\n", + " 545\n", + " 524\n", + " 181\n", + " 453\n", + " 172\n", + " 132\n", + " 291\n", " \n", " \n", "\n", @@ -632,88 +621,81 @@ ], "text/plain": [ " total_pop poverty_pop non_us_citizen male_65_to_66 male_67_to_69 \\\n", - "0 3094 134 264 47 84 \n", - "1 2093 164 96 18 60 \n", - "2 5727 310 306 23 47 \n", - "3 4395 343 185 31 70 \n", - "4 3822 397 231 41 32 \n", + "0 3132 131 259 58 140 \n", + "1 2203 170 123 22 58 \n", + "2 5990 256 280 43 49 \n", + "3 4399 345 221 73 90 \n", + "4 3579 395 117 37 45 \n", "\n", " male_70_to_74 male_75_to_79 male_80_to_84 male_85_and_over \\\n", - "0 119 49 46 78 \n", - "1 59 58 28 26 \n", - "2 113 100 24 25 \n", - "3 89 19 26 36 \n", - "4 56 41 4 0 \n", + "0 141 44 55 49 \n", + "1 58 59 28 23 \n", + "2 68 130 83 22 \n", + "3 73 39 48 21 \n", + "4 43 46 10 0 \n", "\n", " female_65_to_66 female_67_to_69 female_70_to_74 female_75_to_79 \\\n", - "0 52 70 72 85 \n", - "1 40 35 67 96 \n", - "2 108 62 194 158 \n", - "3 55 105 104 43 \n", - "4 19 47 51 50 \n", + "0 26 78 103 83 \n", + "1 36 34 74 105 \n", + "2 87 85 205 193 \n", + "3 72 83 109 47 \n", + "4 60 48 90 27 \n", "\n", " female_80_to_84 female_85_and_over male_15_17 male_18_19 male_20 \\\n", - "0 105 107 19 0 0 \n", - "1 34 13 33 0 3 \n", - "2 13 142 94 88 0 \n", - "3 23 30 30 41 0 \n", - "4 60 203 26 81 9 \n", + "0 51 116 28 0 0 \n", + "1 32 19 25 0 4 \n", + "2 33 109 98 144 0 \n", + "3 22 17 28 38 0 \n", + "4 31 197 23 79 24 \n", "\n", " male_21 male_22_24 female_15_17 female_18_19 female_20 female_21 \\\n", - "0 0 26 26 13 0 14 \n", - "1 0 10 28 9 0 0 \n", - "2 6 93 55 33 0 26 \n", - "3 0 0 27 9 0 0 \n", - "4 0 115 7 0 0 98 \n", + "0 0 31 28 7 0 0 \n", + "1 0 10 24 28 0 0 \n", + "2 6 91 89 31 0 0 \n", + "3 0 26 21 9 0 0 \n", + "4 0 46 18 0 0 94 \n", "\n", " female_22_24 median_household_income income_less_10000 \\\n", - "0 19 250001 188 \n", - "1 0 225880 75 \n", - "2 43 157731 383 \n", - "3 38 159612 187 \n", - "4 46 96250 256 \n", + "0 21 250001 161 \n", + "1 22 208438 110 \n", + "2 50 176618 350 \n", + "3 20 165134 192 \n", + "4 20 96641 236 \n", "\n", " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", - "0 75 134 157 \n", - "1 70 89 12 \n", - "2 201 300 251 \n", - "3 105 287 215 \n", - "4 91 244 213 \n", + "0 111 76 163 \n", + "1 68 108 30 \n", + "2 210 324 308 \n", + "3 108 321 185 \n", + "4 125 234 115 \n", "\n", " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", - "0 87 129 70 \n", - "1 207 77 32 \n", - "2 400 148 291 \n", - "3 207 178 87 \n", - "4 385 387 244 \n", - "\n", - " workers_with_no_car households_with_no_cars GEOID B18101_001E \\\n", - "0 28 85 06001400100 3094 \n", - "1 92 95 06001400200 2093 \n", - "2 157 416 06001400300 5727 \n", - "3 134 204 06001400400 4376 \n", - "4 74 169 06001400500 3822 \n", + "0 107 112 41 \n", + "1 226 121 34 \n", + "2 324 251 181 \n", + "3 246 186 55 \n", + "4 430 383 141 \n", "\n", - " public_asst_pop veteran_pop county_name inc_extremelylow inc_verylow \\\n", - "0 1316 129 Alameda 397 244 \n", - "1 861 38 Alameda 234 219 \n", - "2 2713 80 Alameda 884 651 \n", - "3 1803 88 Alameda 579 422 \n", - "4 1655 115 Alameda 591 598 \n", + " workers_with_no_car households_with_no_cars GEOID public_asst_pop \\\n", + "0 89 107 06001400100 62 \n", + "1 99 110 06001400200 42 \n", + "2 237 466 06001400300 203 \n", + "3 133 212 06001400400 168 \n", + "4 78 227 06001400500 188 \n", "\n", - " inc_low male_seniors female_seniors male_youth female_youth \\\n", - "0 199 423 491 45 72 \n", - "1 109 249 285 46 37 \n", - "2 439 332 677 281 157 \n", - "3 265 271 360 71 74 \n", - "4 631 174 430 231 151 \n", + " veteran_pop county_name inc_extremelylow inc_verylow inc_low \\\n", + "0 55 Alameda 348 270 153 \n", + "1 35 Alameda 286 256 155 \n", + "2 74 Alameda 884 632 432 \n", + "3 88 Alameda 621 431 241 \n", + "4 100 Alameda 595 545 524 \n", "\n", - " disabled_pop \n", - "0 313 \n", - "1 168 \n", - "2 459 \n", - "3 339 \n", - "4 270 " + " male_seniors female_seniors male_youth female_youth disabled_pop \n", + "0 487 457 59 56 206 \n", + "1 248 300 39 74 167 \n", + "2 395 712 339 170 479 \n", + "3 344 350 92 50 366 \n", + "4 181 453 172 132 291 " ] }, "execution_count": 17, @@ -728,6 +710,191 @@ { "cell_type": "code", "execution_count": 18, + "id": "c2de4bb9-9635-464b-a8ab-e8c199e429c1", + "metadata": {}, + "outputs": [], + "source": [ + "ca_totals = census_data.drop(columns=[\"GEOID\", \"county_name\"]).sum().to_frame().T\n", + "# Add GEOID column for California\n", + "ca_totals['GEOID'] = '06'" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "5493bb9c-1664-4217-a695-0635487ba63c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
total_poppoverty_popnon_us_citizenmale_65_to_66male_67_to_69male_70_to_74male_75_to_79male_80_to_84male_85_and_overfemale_65_to_66female_67_to_69female_70_to_74female_75_to_79female_80_to_84female_85_and_overmale_15_17male_18_19male_20male_21male_22_24female_15_17female_18_19female_20female_21female_22_24median_household_incomeincome_less_10000income_10000_14999income_15000_24999income_25000_34999income_35000_49999income_50000_64999income_65000_74999workers_with_no_carhouseholds_with_no_carspublic_asst_popveteran_popinc_extremelylowinc_verylowinc_lowmale_seniorsfemale_seniorsmale_youthfemale_youthdisabled_popGEOID
03928737746322484841377409523545127745881495290304583281225442680605627861854603018400676462887799993527574266214268195771750761419506077251891245912743008-803721287603329873218608730868732797045356136825719871387561636868959291191402513054538602833635841339595482781629337674226337262508307449755206
\n", + "
" + ], + "text/plain": [ + " total_pop poverty_pop non_us_citizen male_65_to_66 male_67_to_69 \\\n", + "0 39287377 4632248 4841377 409523 545127 \n", + "\n", + " male_70_to_74 male_75_to_79 male_80_to_84 male_85_and_over \\\n", + "0 745881 495290 304583 281225 \n", + "\n", + " female_65_to_66 female_67_to_69 female_70_to_74 female_75_to_79 \\\n", + "0 442680 605627 861854 603018 \n", + "\n", + " female_80_to_84 female_85_and_over male_15_17 male_18_19 male_20 \\\n", + "0 400676 462887 799993 527574 266214 \n", + "\n", + " male_21 male_22_24 female_15_17 female_18_19 female_20 female_21 \\\n", + "0 268195 771750 761419 506077 251891 245912 \n", + "\n", + " female_22_24 median_household_income income_less_10000 \\\n", + "0 743008 -80372128760 3329873 \n", + "\n", + " income_10000_14999 income_15000_24999 income_25000_34999 \\\n", + "0 2186087 3086873 2797045 \n", + "\n", + " income_35000_49999 income_50000_64999 income_65000_74999 \\\n", + "0 3561368 2571987 1387561 \n", + "\n", + " workers_with_no_car households_with_no_cars public_asst_pop veteran_pop \\\n", + "0 636868 959291 1914025 1305453 \n", + "\n", + " inc_extremelylow inc_verylow inc_low male_seniors female_seniors \\\n", + "0 8602833 6358413 3959548 2781629 3376742 \n", + "\n", + " male_youth female_youth disabled_pop GEOID \n", + "0 2633726 2508307 4497552 06 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ca_totals.head(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, "id": "e2341578-5d79-4c2f-a6a5-ee556abcbc17", "metadata": {}, "outputs": [ @@ -737,7 +904,7 @@ "text": [ "\n", "RangeIndex: 9129 entries, 0 to 9128\n", - "Data columns (total 48 columns):\n", + "Data columns (total 47 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 total_pop 9129 non-null int64 \n", @@ -776,19 +943,18 @@ " 33 workers_with_no_car 9129 non-null int64 \n", " 34 households_with_no_cars 9129 non-null int64 \n", " 35 GEOID 9129 non-null object\n", - " 36 B18101_001E 9129 non-null int64 \n", - " 37 public_asst_pop 9129 non-null int64 \n", - " 38 veteran_pop 9129 non-null int64 \n", - " 39 county_name 9129 non-null object\n", - " 40 inc_extremelylow 9129 non-null int64 \n", - " 41 inc_verylow 9129 non-null int64 \n", - " 42 inc_low 9129 non-null int64 \n", - " 43 male_seniors 9129 non-null int64 \n", - " 44 female_seniors 9129 non-null int64 \n", - " 45 male_youth 9129 non-null int64 \n", - " 46 female_youth 9129 non-null int64 \n", - " 47 disabled_pop 9129 non-null int64 \n", - "dtypes: int64(46), object(2)\n", + " 36 public_asst_pop 9129 non-null int64 \n", + " 37 veteran_pop 9129 non-null int64 \n", + " 38 county_name 9129 non-null object\n", + " 39 inc_extremelylow 9129 non-null int64 \n", + " 40 inc_verylow 9129 non-null int64 \n", + " 41 inc_low 9129 non-null int64 \n", + " 42 male_seniors 9129 non-null int64 \n", + " 43 female_seniors 9129 non-null int64 \n", + " 44 male_youth 9129 non-null int64 \n", + " 45 female_youth 9129 non-null int64 \n", + " 46 disabled_pop 9129 non-null int64 \n", + "dtypes: int64(45), object(2)\n", "memory usage: 3.3+ MB\n" ] } @@ -799,7 +965,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "6806f203-fc02-402a-97de-ed6ef73df0dc", "metadata": {}, "outputs": [ @@ -821,7 +987,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "d55b0985-e8f2-45e4-829f-5cc4f7f51241", "metadata": {}, "outputs": [ @@ -840,7 +1006,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "id": "6c31979d-b0b4-4e12-93e4-7ace297b5c8b", "metadata": {}, "outputs": [], @@ -851,7 +1017,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "2ed3d76d-d51b-4e10-8045-6ce49845da53", "metadata": {}, "outputs": [], @@ -862,13 +1028,13 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "id": "e3374031-fae9-4711-8407-b59aef3093dd", "metadata": {}, "outputs": [], "source": [ "# Store data in warehouse\n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data.parquet\", \"wb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data_2024.parquet\", \"wb\") as f:\n", " tracts_ca_acs.to_parquet(f, index=False)" ] } diff --git a/transit_provider_dashboard/02_prepare_orgs_ridership_data.ipynb b/transit_provider_dashboard/02_prepare_orgs_ridership_data.ipynb index 40efe638a..708639e02 100644 --- a/transit_provider_dashboard/02_prepare_orgs_ridership_data.ipynb +++ b/transit_provider_dashboard/02_prepare_orgs_ridership_data.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: shared_utils in /opt/conda/lib/python3.11/site-packages (4.2)\n", + "Requirement already satisfied: shared_utils in /home/jovyan/data-analyses/_shared_utils (4.4)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -35,21 +35,7 @@ "execution_count": 3, "id": "a4b318d7-230c-4a36-9406-87d3e541b6bc", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.11/site-packages/dask/dataframe/__init__.py:31: FutureWarning: \n", - "Dask dataframe query planning is disabled because dask-expr is not installed.\n", - "\n", - "You can install it with `pip install dask[dataframe]` or `conda install dask`.\n", - "This will raise in a future version.\n", - "\n", - " warnings.warn(msg, FutureWarning)\n" - ] - } - ], + "outputs": [], "source": [ "import pandas as pd \n", "import geopandas as gpd\n", @@ -130,25 +116,25 @@ "output_type": "stream", "text": [ "\n", - "RangeIndex: 218 entries, 0 to 217\n", + "RangeIndex: 199 entries, 0 to 198\n", "Data columns (total 13 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 key 218 non-null object \n", - " 1 name 218 non-null object \n", - " 2 source_record_id 218 non-null object \n", - " 3 organization_type 218 non-null object \n", - " 4 ntd_id 179 non-null object \n", - " 5 ntd_id_2022 181 non-null object \n", - " 6 ntd_agency_info_key 159 non-null object \n", - " 7 public_currently_operating 218 non-null object \n", - " 8 _is_current 218 non-null bool \n", - " 9 _valid_from 218 non-null datetime64[ns, UTC]\n", - " 10 _valid_to 218 non-null datetime64[ns, UTC]\n", - " 11 _valid_from_local 218 non-null datetime64[ns] \n", - " 12 _valid_to_local 218 non-null datetime64[ns] \n", + " 0 key 199 non-null object \n", + " 1 name 199 non-null object \n", + " 2 source_record_id 199 non-null object \n", + " 3 organization_type 199 non-null object \n", + " 4 ntd_id 165 non-null object \n", + " 5 ntd_id_2022 167 non-null object \n", + " 6 ntd_agency_info_key 147 non-null object \n", + " 7 public_currently_operating 199 non-null object \n", + " 8 _is_current 199 non-null bool \n", + " 9 _valid_from 199 non-null datetime64[ns, UTC]\n", + " 10 _valid_to 199 non-null datetime64[ns, UTC]\n", + " 11 _valid_from_local 199 non-null datetime64[ns] \n", + " 12 _valid_to_local 199 non-null datetime64[ns] \n", "dtypes: bool(1), datetime64[ns, UTC](2), datetime64[ns](2), object(8)\n", - "memory usage: 20.8+ KB\n" + "memory usage: 19.0+ KB\n" ] } ], @@ -158,7 +144,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "56dc63b7-cd87-4d59-98fb-b5dc601012a6", "metadata": {}, "outputs": [], @@ -171,14 +157,14 @@ " FROM \n", " cal-itp-data-infra.mart_ntd.dim_annual_service_agencies\n", " WHERE \n", - " state = 'CA' AND report_year = 2023\n", + " state = 'CA' AND report_year = 2024\n", " \"\"\"\n", " ridership_data= pd.read_sql(query, connection)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "db017035-7b98-48a0-9a63-5fcf144e63d0", "metadata": {}, "outputs": [], @@ -195,6 +181,104 @@ " }).sort_values(by=\"ntd_id\").reset_index()" ] }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2fbaf115-871e-44da-9620-bd3da2123925", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
agencyntd_idunlinked_passenger_trips_uptagency_voms
0San Francisco Bay Area Rapid Transit District,...9000354927366.0582.0
1Golden Empire Transit District900043835221.093.0
2Santa Cruz Metropolitan Transit District900063911718.097.0
3City of Santa Monica, dba: Big Blue Bus900088633459.0162.0
4San Mateo County Transit District, dba: SamTrans9000910255600.0361.0
\n", + "
" + ], + "text/plain": [ + " agency ntd_id \\\n", + "0 San Francisco Bay Area Rapid Transit District,... 90003 \n", + "1 Golden Empire Transit District 90004 \n", + "2 Santa Cruz Metropolitan Transit District 90006 \n", + "3 City of Santa Monica, dba: Big Blue Bus 90008 \n", + "4 San Mateo County Transit District, dba: SamTrans 90009 \n", + "\n", + " unlinked_passenger_trips_upt agency_voms \n", + "0 54927366.0 582.0 \n", + "1 3835221.0 93.0 \n", + "2 3911718.0 97.0 \n", + "3 8633459.0 162.0 \n", + "4 10255600.0 361.0 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ridership_data_grouped.head(5)" + ] + }, { "cell_type": "code", "execution_count": 11, @@ -231,13 +315,13 @@ "outputs": [], "source": [ "#Store data in warehouse\n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data.parquet\", \"wb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data_2024.parquet\", \"wb\") as f:\n", " ridership_data_grouped.to_parquet(f, index=False)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "id": "38dd8fb5-5d8a-43e1-b21a-0f05ecfe70fd", "metadata": {}, "outputs": [], diff --git a/transit_provider_dashboard/04_data_processing.ipynb b/transit_provider_dashboard/04_data_processing.ipynb index a28529bc4..bc2f5fcf9 100644 --- a/transit_provider_dashboard/04_data_processing.ipynb +++ b/transit_provider_dashboard/04_data_processing.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: shared_utils in /opt/conda/lib/python3.11/site-packages (4.2)\n", + "Requirement already satisfied: shared_utils in /home/jovyan/data-analyses/_shared_utils (4.4)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -66,7 +66,7 @@ "outputs": [], "source": [ "# Load the stored ACS dataset from the specified GCS file path.\n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data.parquet\", \"rb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data_2024.parquet\", \"rb\") as f:\n", " tracts_ca_acs = gpd.read_parquet(f)" ] }, @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data.parquet\", \"rb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data_2024.parquet\", \"rb\") as f:\n", " ridership_data_grouped = pd.read_parquet(f)" ] }, @@ -106,6 +106,17 @@ " orgs_stops_clean = gpd.read_parquet(f)" ] }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9055d0ac-413b-4e0a-9eea-1d5e1f131571", + "metadata": {}, + "outputs": [], + "source": [ + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_ca_data_2024.parquet\", \"rb\") as f:\n", + " ca_totals_acs = gpd.read_parquet(f)" + ] + }, { "cell_type": "markdown", "id": "703d3c9d-c755-45d9-aa74-467b315549dc", @@ -116,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 9, "id": "93efdfc1-8b2e-4c4f-959d-0303fea1104e", "metadata": {}, "outputs": [ @@ -214,7 +225,7 @@ "8 Sacramento Area Council of Governments " ] }, - "execution_count": 56, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -232,7 +243,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "4e240dfe-906f-40f7-846c-85fdfb163124", "metadata": {}, "outputs": [], @@ -248,7 +259,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "5b7f9095-e99c-4fb2-8f01-c57691e6707e", "metadata": {}, "outputs": [], @@ -265,7 +276,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "fb689771-40fe-4770-b598-bc8efad4ae4d", "metadata": {}, "outputs": [], @@ -277,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "b26ac3a7-29c4-4d05-94ea-a932e01d5df9", "metadata": {}, "outputs": [], @@ -300,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "f842e8db-0236-48fc-b232-f04bbc4b07b3", "metadata": {}, "outputs": [], @@ -312,7 +323,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "60918016-ff2f-42c7-a803-77777f0c2e97", "metadata": {}, "outputs": [], @@ -326,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "fa5da612-30d5-4f92-81f6-4b679583c956", "metadata": {}, "outputs": [], @@ -342,7 +353,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "00428ed7-333d-4657-a3d0-b2664e2533ef", "metadata": {}, "outputs": [ @@ -351,7 +362,7 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 107671 entries, 0 to 107670\n", + "RangeIndex: 107671 entries, 0 to 107670\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", @@ -365,13 +376,13 @@ " 7 geometry 107671 non-null geometry\n", " 8 organization_name 94959 non-null object \n", " 9 name_clean 107671 non-null object \n", - " 10 source_record_id 88562 non-null object \n", - " 11 key 88562 non-null object \n", - " 12 organization_type 88562 non-null object \n", - " 13 ntd_id_y 84115 non-null object \n", - " 14 ntd_id_2022_y 84215 non-null object \n", + " 10 source_record_id 85607 non-null object \n", + " 11 key 85607 non-null object \n", + " 12 organization_type 85607 non-null object \n", + " 13 ntd_id_y 81301 non-null object \n", + " 14 ntd_id_2022_y 81401 non-null object \n", "dtypes: geometry(1), object(14)\n", - "memory usage: 13.1+ MB\n" + "memory usage: 12.3+ MB\n" ] } ], @@ -381,7 +392,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "7b4a2b5a-46cc-4f62-8387-70c61e9473f1", "metadata": {}, "outputs": [ @@ -512,7 +523,7 @@ "8 recPJULRJk1Yn824N " ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } @@ -536,7 +547,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "ab6e4ec8-2a80-49e3-a87e-825d9674967e", "metadata": {}, "outputs": [ @@ -567,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 20, "id": "aadfd3ec-fd6d-41f6-84f4-bca7a7817253", "metadata": {}, "outputs": [], @@ -600,7 +611,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 21, "id": "2396a8c1-483b-4c5f-a4ab-06ccccd07932", "metadata": {}, "outputs": [ @@ -609,7 +620,7 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 107671 entries, 0 to 107670\n", + "RangeIndex: 107671 entries, 0 to 107670\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", @@ -623,13 +634,13 @@ " 7 geometry 107671 non-null geometry\n", " 8 organization_name 95351 non-null object \n", " 9 name_clean 107671 non-null object \n", - " 10 source_record_id 88562 non-null object \n", - " 11 key 88954 non-null object \n", - " 12 organization_type 88954 non-null object \n", - " 13 ntd_id_y 84115 non-null object \n", - " 14 ntd_id_2022_y 84215 non-null object \n", + " 10 source_record_id 85607 non-null object \n", + " 11 key 85999 non-null object \n", + " 12 organization_type 85999 non-null object \n", + " 13 ntd_id_y 81301 non-null object \n", + " 14 ntd_id_2022_y 81401 non-null object \n", "dtypes: geometry(1), object(14)\n", - "memory usage: 13.1+ MB\n" + "memory usage: 12.3+ MB\n" ] } ], @@ -639,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 22, "id": "afa2cd5c-05ad-4bc5-826b-570723c38b1f", "metadata": {}, "outputs": [ @@ -649,7 +660,7 @@ "208" ] }, - "execution_count": 20, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -660,7 +671,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 23, "id": "d5ca8df5-d40b-4e71-82bd-c30cbaa97acc", "metadata": {}, "outputs": [], @@ -680,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 24, "id": "7351e332-0ed9-4739-bb5c-d76edc8d1bb7", "metadata": {}, "outputs": [], @@ -694,7 +705,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 25, "id": "29888876-97a7-4462-9149-7a19ba540104", "metadata": {}, "outputs": [], @@ -705,7 +716,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 26, "id": "806da12f-9cf6-4306-81c0-dd057f31d8a9", "metadata": { "tags": [] @@ -718,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 27, "id": "4bafb6fb-f42e-4e70-81aa-9dec5bce7991", "metadata": {}, "outputs": [], @@ -728,7 +739,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 28, "id": "53f8fd3d-7d0a-4570-aa02-3df6df277380", "metadata": {}, "outputs": [ @@ -737,7 +748,7 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 107671 entries, 0 to 107670\n", + "RangeIndex: 107671 entries, 0 to 107670\n", "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", @@ -751,13 +762,13 @@ " 7 geometry 107671 non-null geometry\n", " 8 organization_name 95351 non-null object \n", " 9 name_clean 107671 non-null object \n", - " 10 source_record_id 88562 non-null object \n", - " 11 key 88954 non-null object \n", - " 12 organization_type 88954 non-null object \n", - " 13 ntd_id_y 84115 non-null object \n", - " 14 ntd_id_2022_y 84215 non-null object \n", + " 10 source_record_id 85607 non-null object \n", + " 11 key 85999 non-null object \n", + " 12 organization_type 85999 non-null object \n", + " 13 ntd_id_y 81301 non-null object \n", + " 14 ntd_id_2022_y 81401 non-null object \n", "dtypes: geometry(1), object(14)\n", - "memory usage: 13.1+ MB\n" + "memory usage: 12.3+ MB\n" ] } ], @@ -767,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 29, "id": "9de1e91a-aa0b-4eea-b6c2-05537cf21aea", "metadata": {}, "outputs": [], @@ -777,7 +788,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 30, "id": "538d7740-f3b7-477e-a2ec-81560e3966c5", "metadata": {}, "outputs": [ @@ -800,11 +811,11 @@ " 7 organization_source_record_id 197 non-null object \n", " 8 organization_name 197 non-null object \n", " 9 name_clean 207 non-null object \n", - " 10 source_record_id 179 non-null object \n", - " 11 key 182 non-null object \n", - " 12 organization_type 182 non-null object \n", - " 13 ntd_id_y 150 non-null object \n", - " 14 ntd_id_2022_y 152 non-null object \n", + " 10 source_record_id 161 non-null object \n", + " 11 key 164 non-null object \n", + " 12 organization_type 164 non-null object \n", + " 13 ntd_id_y 138 non-null object \n", + " 14 ntd_id_2022_y 140 non-null object \n", "dtypes: geometry(1), object(14)\n", "memory usage: 24.4+ KB\n" ] @@ -816,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 31, "id": "98dd17d1-56e0-4dfc-b566-d5ae3eb66881", "metadata": {}, "outputs": [ @@ -925,11 +936,11 @@ " recsrIZdx5Wt6n3ol\n", " Anaheim Transportation Network\n", " Anaheim Resort\n", - " recsrIZdx5Wt6n3ol\n", - " a49a3c2c1b56a748da002e3f343fa1c5\n", - " Independent Agency\n", - " 90211\n", - " 90211\n", + " None\n", + " None\n", + " None\n", + " None\n", + " None\n", " \n", " \n", " 4\n", @@ -993,25 +1004,25 @@ "0 Alhambra recNaKvzVQhGX1puu \n", "1 Amador recSBFiK95hJnJuYx \n", "2 Amtrak recKsb5FnJy70up78 \n", - "3 Anaheim Resort recsrIZdx5Wt6n3ol \n", + "3 Anaheim Resort None \n", "4 Antelope Valley Transit Authority recxsWR0KRrQTdjmg \n", "\n", " key organization_type ntd_id_y \\\n", "0 897ce086b03388bc914f5c239298fb85 City/Town 90247 \n", "1 71b58e792726688aef31d4712480d350 Independent Agency 9R02-91000 \n", "2 7225e6e33a67f74bc42fe137d9f9be23 Federal Government None \n", - "3 a49a3c2c1b56a748da002e3f343fa1c5 Independent Agency 90211 \n", + "3 None None None \n", "4 86e23a9a896696a1d14e026b3f17843b Independent Agency 90121 \n", "\n", " ntd_id_2022_y \n", "0 90247 \n", "1 91000 \n", "2 None \n", - "3 90211 \n", + "3 None \n", "4 90121 " ] }, - "execution_count": 29, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -1022,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "id": "2e231b7d-d744-419d-b442-810b166fcd3d", "metadata": { "tags": [] @@ -1039,7 +1050,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "id": "fa657799-42aa-46d7-a789-c4d86b226c13", "metadata": { "tags": [] @@ -1052,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "id": "59ce3254-f021-40cd-869b-3c7a21f9002e", "metadata": { "tags": [] @@ -1141,7 +1152,6 @@ " income_65000_74999\n", " workers_with_no_car\n", " households_with_no_cars\n", - " B18101_001E\n", " public_asst_pop\n", " veteran_pop\n", " county_name\n", @@ -1188,224 +1198,218 @@ " CT\n", " 1413607\n", " 0\n", - " 5315\n", - " 879\n", - " 1017\n", - " 19\n", - " 79\n", - " 79\n", - " 20\n", + " 5489\n", + " 779\n", + " 921\n", + " 23\n", " 46\n", - " 76\n", - " 81\n", + " 69\n", + " 33\n", + " 115\n", + " 65\n", + " 67\n", + " 62\n", + " 59\n", + " 86\n", + " 97\n", + " 221\n", + " 165\n", + " 8\n", + " 12\n", + " 9\n", + " 48\n", + " 73\n", + " 126\n", + " 16\n", + " 0\n", " 85\n", - " 50\n", - " 76\n", - " 89\n", + " 77565\n", + " 523\n", + " 395\n", + " 653\n", + " 486\n", + " 740\n", + " 424\n", + " 178\n", + " 126\n", " 185\n", - " 121\n", - " 14\n", - " 0\n", - " 9\n", - " 80\n", - " 93\n", - " 86\n", - " 18\n", - " 13\n", - " 82\n", - " 76930\n", - " 509\n", - " 347\n", - " 565\n", - " 553\n", - " 844\n", - " 353\n", - " 217\n", - " 119\n", - " 186\n", - " 5284\n", - " 1876\n", - " 57\n", + " 123\n", + " 66\n", " Los Angeles\n", - " 1421\n", - " 1397\n", - " 570\n", - " 319\n", - " 566\n", - " 224\n", - " 292\n", - " 498\n", + " 1571\n", + " 1226\n", + " 602\n", + " 351\n", + " 592\n", + " 242\n", + " 300\n", + " 618\n", " 1.406019e+06\n", " POLYGON ((174546.535 -434262.136, 174543.285 -...\n", " 23.630888\n", " \n", " \n", " 1\n", - " LA Metro Bus Schedule\n", - " 90154\n", - " 90154\n", - " 142294\n", - " Candlewood / Hayter\n", - " LA Metro Bus Schedule\n", - " recPnGkwdpnr8jmHB\n", - " Los Angeles County Metropolitan Transportation...\n", - " LA Metro Bus\n", - " recPnGkwdpnr8jmHB\n", - " 9e96bde610e80d71f500eea119c4723c\n", - " Independent Agency\n", - " 90154\n", - " 90154\n", + " Alhambra Schedule\n", + " 90247\n", + " 90247\n", + " 2619788\n", + " Alamansor St & Los Higos St\n", + " Alhambra Schedule\n", + " recNaKvzVQhGX1puu\n", + " City of Alhambra\n", + " Alhambra\n", + " recNaKvzVQhGX1puu\n", + " 897ce086b03388bc914f5c239298fb85\n", + " City/Town\n", + " 90247\n", + " 90247\n", " 06\n", " 037\n", - " 481103\n", - " 1400000US06037481103\n", - " 06037481103\n", - " 4811.03\n", - " Census Tract 4811.03\n", + " 482301\n", + " 1400000US06037482301\n", + " 06037482301\n", + " 4823.01\n", + " Census Tract 4823.01\n", " CA\n", " Los Angeles County\n", " California\n", " CT\n", - " 1413607\n", + " 873603\n", " 0\n", - " 5315\n", - " 879\n", - " 1017\n", - " 19\n", - " 79\n", - " 79\n", - " 20\n", - " 46\n", - " 76\n", - " 81\n", - " 85\n", - " 50\n", - " 76\n", - " 89\n", - " 185\n", - " 121\n", - " 14\n", - " 0\n", - " 9\n", + " 4974\n", + " 698\n", + " 1085\n", " 80\n", - " 93\n", + " 123\n", + " 11\n", + " 71\n", + " 15\n", + " 37\n", + " 36\n", + " 106\n", + " 85\n", + " 24\n", " 86\n", - " 18\n", - " 13\n", - " 82\n", - " 76930\n", - " 509\n", - " 347\n", - " 565\n", - " 553\n", - " 844\n", - " 353\n", - " 217\n", + " 200\n", + " 151\n", + " 72\n", + " 11\n", + " 49\n", + " 92\n", + " 118\n", + " 23\n", + " 0\n", + " 19\n", + " 50\n", + " 85720\n", + " 474\n", + " 701\n", + " 649\n", + " 461\n", + " 292\n", + " 279\n", + " 170\n", + " 55\n", + " 134\n", + " 332\n", " 119\n", - " 186\n", - " 5284\n", - " 1876\n", - " 57\n", " Los Angeles\n", - " 1421\n", - " 1397\n", - " 570\n", - " 319\n", - " 566\n", - " 224\n", - " 292\n", - " 498\n", - " 1.406019e+06\n", - " POLYGON ((175957.387 -433744.848, 175878.515 -...\n", - " 747957.619083\n", + " 1824\n", + " 753\n", + " 449\n", + " 337\n", + " 537\n", + " 375\n", + " 210\n", + " 445\n", + " 8.743129e+05\n", + " POLYGON ((175016.153 -436000.911, 174975.556 -...\n", + " 122884.944234\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name ntd_id_x ntd_id_2022_x stop_id \\\n", - "0 Alhambra Schedule 90247 90247 2619788 \n", - "1 LA Metro Bus Schedule 90154 90154 142294 \n", + " name ntd_id_x ntd_id_2022_x stop_id \\\n", + "0 Alhambra Schedule 90247 90247 2619788 \n", + "1 Alhambra Schedule 90247 90247 2619788 \n", "\n", " stop_name schedule_gtfs_dataset_name \\\n", "0 Alamansor St & Los Higos St Alhambra Schedule \n", - "1 Candlewood / Hayter LA Metro Bus Schedule \n", - "\n", - " organization_source_record_id \\\n", - "0 recNaKvzVQhGX1puu \n", - "1 recPnGkwdpnr8jmHB \n", + "1 Alamansor St & Los Higos St Alhambra Schedule \n", "\n", - " organization_name name_clean \\\n", - "0 City of Alhambra Alhambra \n", - "1 Los Angeles County Metropolitan Transportation... LA Metro Bus \n", + " organization_source_record_id organization_name name_clean \\\n", + "0 recNaKvzVQhGX1puu City of Alhambra Alhambra \n", + "1 recNaKvzVQhGX1puu City of Alhambra Alhambra \n", "\n", - " source_record_id key organization_type \\\n", - "0 recNaKvzVQhGX1puu 897ce086b03388bc914f5c239298fb85 City/Town \n", - "1 recPnGkwdpnr8jmHB 9e96bde610e80d71f500eea119c4723c Independent Agency \n", + " source_record_id key organization_type \\\n", + "0 recNaKvzVQhGX1puu 897ce086b03388bc914f5c239298fb85 City/Town \n", + "1 recNaKvzVQhGX1puu 897ce086b03388bc914f5c239298fb85 City/Town \n", "\n", " ntd_id_y ntd_id_2022_y STATEFP COUNTYFP TRACTCE GEOIDFQ \\\n", "0 90247 90247 06 037 481103 1400000US06037481103 \n", - "1 90154 90154 06 037 481103 1400000US06037481103 \n", + "1 90247 90247 06 037 482301 1400000US06037482301 \n", "\n", " GEOID NAME NAMELSAD STUSPS NAMELSADCO \\\n", "0 06037481103 4811.03 Census Tract 4811.03 CA Los Angeles County \n", - "1 06037481103 4811.03 Census Tract 4811.03 CA Los Angeles County \n", + "1 06037482301 4823.01 Census Tract 4823.01 CA Los Angeles County \n", "\n", " STATE_NAME LSAD ALAND AWATER total_pop poverty_pop non_us_citizen \\\n", - "0 California CT 1413607 0 5315 879 1017 \n", - "1 California CT 1413607 0 5315 879 1017 \n", + "0 California CT 1413607 0 5489 779 921 \n", + "1 California CT 873603 0 4974 698 1085 \n", "\n", " male_65_to_66 male_67_to_69 male_70_to_74 male_75_to_79 male_80_to_84 \\\n", - "0 19 79 79 20 46 \n", - "1 19 79 79 20 46 \n", + "0 23 46 69 33 115 \n", + "1 80 123 11 71 15 \n", "\n", " male_85_and_over female_65_to_66 female_67_to_69 female_70_to_74 \\\n", - "0 76 81 85 50 \n", - "1 76 81 85 50 \n", + "0 65 67 62 59 \n", + "1 37 36 106 85 \n", "\n", " female_75_to_79 female_80_to_84 female_85_and_over male_15_17 \\\n", - "0 76 89 185 121 \n", - "1 76 89 185 121 \n", + "0 86 97 221 165 \n", + "1 24 86 200 151 \n", "\n", " male_18_19 male_20 male_21 male_22_24 female_15_17 female_18_19 \\\n", - "0 14 0 9 80 93 86 \n", - "1 14 0 9 80 93 86 \n", + "0 8 12 9 48 73 126 \n", + "1 72 11 49 92 118 23 \n", "\n", " female_20 female_21 female_22_24 median_household_income \\\n", - "0 18 13 82 76930 \n", - "1 18 13 82 76930 \n", + "0 16 0 85 77565 \n", + "1 0 19 50 85720 \n", "\n", " income_less_10000 income_10000_14999 income_15000_24999 \\\n", - "0 509 347 565 \n", - "1 509 347 565 \n", + "0 523 395 653 \n", + "1 474 701 649 \n", "\n", " income_25000_34999 income_35000_49999 income_50000_64999 \\\n", - "0 553 844 353 \n", - "1 553 844 353 \n", + "0 486 740 424 \n", + "1 461 292 279 \n", "\n", " income_65000_74999 workers_with_no_car households_with_no_cars \\\n", - "0 217 119 186 \n", - "1 217 119 186 \n", + "0 178 126 185 \n", + "1 170 55 134 \n", "\n", - " B18101_001E public_asst_pop veteran_pop county_name inc_extremelylow \\\n", - "0 5284 1876 57 Los Angeles 1421 \n", - "1 5284 1876 57 Los Angeles 1421 \n", + " public_asst_pop veteran_pop county_name inc_extremelylow inc_verylow \\\n", + "0 123 66 Los Angeles 1571 1226 \n", + "1 332 119 Los Angeles 1824 753 \n", "\n", - " inc_verylow inc_low male_seniors female_seniors male_youth \\\n", - "0 1397 570 319 566 224 \n", - "1 1397 570 319 566 224 \n", + " inc_low male_seniors female_seniors male_youth female_youth \\\n", + "0 602 351 592 242 300 \n", + "1 449 337 537 375 210 \n", "\n", - " female_youth disabled_pop area_m2 \\\n", - "0 292 498 1.406019e+06 \n", - "1 292 498 1.406019e+06 \n", + " disabled_pop area_m2 \\\n", + "0 618 1.406019e+06 \n", + "1 445 8.743129e+05 \n", "\n", " geometry area_2 \n", "0 POLYGON ((174546.535 -434262.136, 174543.285 -... 23.630888 \n", - "1 POLYGON ((175957.387 -433744.848, 175878.515 -... 747957.619083 " + "1 POLYGON ((175016.153 -436000.911, 174975.556 -... 122884.944234 " ] }, - "execution_count": 32, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1420,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "id": "9c706362-1ebf-408f-8ae2-b119a9bccd5c", "metadata": {}, "outputs": [ @@ -1426,7 +1430,7 @@ "text": [ "\n", "RangeIndex: 20839 entries, 0 to 20838\n", - "Data columns (total 77 columns):\n", + "Data columns (total 76 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 name 20839 non-null object \n", @@ -1438,11 +1442,11 @@ " 6 organization_source_record_id 18848 non-null object \n", " 7 organization_name 18848 non-null object \n", " 8 name_clean 20839 non-null object \n", - " 9 source_record_id 17485 non-null object \n", - " 10 key 18001 non-null object \n", - " 11 organization_type 18001 non-null object \n", - " 12 ntd_id_y 15508 non-null object \n", - " 13 ntd_id_2022_y 15570 non-null object \n", + " 9 source_record_id 16916 non-null object \n", + " 10 key 17432 non-null object \n", + " 11 organization_type 17432 non-null object \n", + " 12 ntd_id_y 15004 non-null object \n", + " 13 ntd_id_2022_y 15066 non-null object \n", " 14 STATEFP 20839 non-null object \n", " 15 COUNTYFP 20839 non-null object \n", " 16 TRACTCE 20839 non-null object \n", @@ -1491,23 +1495,22 @@ " 59 income_65000_74999 20839 non-null int64 \n", " 60 workers_with_no_car 20839 non-null int64 \n", " 61 households_with_no_cars 20839 non-null int64 \n", - " 62 B18101_001E 20839 non-null int64 \n", - " 63 public_asst_pop 20839 non-null int64 \n", - " 64 veteran_pop 20839 non-null int64 \n", - " 65 county_name 20839 non-null object \n", - " 66 inc_extremelylow 20839 non-null int64 \n", - " 67 inc_verylow 20839 non-null int64 \n", - " 68 inc_low 20839 non-null int64 \n", - " 69 male_seniors 20839 non-null int64 \n", - " 70 female_seniors 20839 non-null int64 \n", - " 71 male_youth 20839 non-null int64 \n", - " 72 female_youth 20839 non-null int64 \n", - " 73 disabled_pop 20839 non-null int64 \n", - " 74 area_m2 20839 non-null float64 \n", - " 75 geometry 20839 non-null geometry\n", - " 76 area_2 20839 non-null float64 \n", - "dtypes: float64(2), geometry(1), int64(48), object(26)\n", - "memory usage: 12.2+ MB\n" + " 62 public_asst_pop 20839 non-null int64 \n", + " 63 veteran_pop 20839 non-null int64 \n", + " 64 county_name 20839 non-null object \n", + " 65 inc_extremelylow 20839 non-null int64 \n", + " 66 inc_verylow 20839 non-null int64 \n", + " 67 inc_low 20839 non-null int64 \n", + " 68 male_seniors 20839 non-null int64 \n", + " 69 female_seniors 20839 non-null int64 \n", + " 70 male_youth 20839 non-null int64 \n", + " 71 female_youth 20839 non-null int64 \n", + " 72 disabled_pop 20839 non-null int64 \n", + " 73 area_m2 20839 non-null float64 \n", + " 74 geometry 20839 non-null geometry\n", + " 75 area_2 20839 non-null float64 \n", + "dtypes: float64(2), geometry(1), int64(47), object(26)\n", + "memory usage: 12.1+ MB\n" ] } ], @@ -1525,7 +1528,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 35, "id": "ed44c28b-0eb9-4d7c-b9a5-3739ea9f5133", "metadata": { "tags": [] @@ -1539,7 +1542,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "id": "597c67ad-9913-435f-8c9b-ddc1a9fbd297", "metadata": { "tags": [] @@ -1562,7 +1565,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "id": "e1e4d484-1748-4027-83b2-bbb8dc441ed9", "metadata": {}, "outputs": [ @@ -1627,93 +1630,89 @@ " 1400000US06037481103\n", " POLYGON ((174546.535 -434262.136, 174543.285 -...\n", " 23.630888\n", - " 0.089329\n", - " 0.014773\n", - " 0.017093\n", - " 0.002000\n", - " 0.003126\n", - " 0.008370\n", - " 0.031530\n", - " 0.023883\n", - " 0.023479\n", - " 0.009580\n", - " 0.005361\n", - " 0.009513\n", - " 0.003765\n", - " 0.004908\n", - " 0.000958\n", + " 0.092253\n", + " 0.013093\n", + " 0.015479\n", + " 0.002118\n", + " 0.003109\n", + " 0.010387\n", + " 0.002067\n", + " 0.026404\n", + " 0.020605\n", + " 0.010118\n", + " 0.005899\n", + " 0.009950\n", + " 0.004067\n", + " 0.005042\n", + " 0.001109\n", " \n", " \n", " 1\n", - " LA Metro Bus Schedule\n", - " Independent Agency\n", - " Los Angeles County Metropolitan Transportation...\n", - " 90154\n", - " 90154\n", - " 142294\n", - " Candlewood / Hayter\n", - " 1400000US06037481103\n", - " POLYGON ((175957.387 -433744.848, 175878.515 -...\n", - " 747957.619083\n", - " 2827.411664\n", - " 467.600160\n", - " 541.011790\n", - " 63.304231\n", - " 98.946109\n", - " 264.920227\n", - " 997.972584\n", - " 755.926994\n", - " 743.159754\n", - " 303.221947\n", - " 169.697897\n", - " 301.094074\n", - " 119.160905\n", - " 155.334752\n", - " 30.322195\n", + " Alhambra Schedule\n", + " City/Town\n", + " City of Alhambra\n", + " 90247\n", + " 90247\n", + " 2619788\n", + " Alamansor St & Los Higos St\n", + " 1400000US06037482301\n", + " POLYGON ((175016.153 -436000.911, 174975.556 -...\n", + " 122884.944234\n", + " 699.097229\n", + " 98.104115\n", + " 152.497084\n", + " 7.730267\n", + " 18.833741\n", + " 62.544887\n", + " 46.662702\n", + " 256.363761\n", + " 105.834381\n", + " 63.107088\n", + " 47.365454\n", + " 75.475515\n", + " 52.706365\n", + " 29.515565\n", + " 16.725487\n", " \n", " \n", "\n", "" ], "text/plain": [ - " name organization_type \\\n", - "0 Alhambra Schedule City/Town \n", - "1 LA Metro Bus Schedule Independent Agency \n", - "\n", - " organization_name ntd_id_y ntd_id_2022_y \\\n", - "0 City of Alhambra 90247 90247 \n", - "1 Los Angeles County Metropolitan Transportation... 90154 90154 \n", + " name organization_type organization_name ntd_id_y \\\n", + "0 Alhambra Schedule City/Town City of Alhambra 90247 \n", + "1 Alhambra Schedule City/Town City of Alhambra 90247 \n", "\n", - " stop_id stop_name GEOIDFQ \\\n", - "0 2619788 Alamansor St & Los Higos St 1400000US06037481103 \n", - "1 142294 Candlewood / Hayter 1400000US06037481103 \n", + " ntd_id_2022_y stop_id stop_name GEOIDFQ \\\n", + "0 90247 2619788 Alamansor St & Los Higos St 1400000US06037481103 \n", + "1 90247 2619788 Alamansor St & Los Higos St 1400000US06037482301 \n", "\n", " geometry area_2 \\\n", "0 POLYGON ((174546.535 -434262.136, 174543.285 -... 23.630888 \n", - "1 POLYGON ((175957.387 -433744.848, 175878.515 -... 747957.619083 \n", + "1 POLYGON ((175016.153 -436000.911, 174975.556 -... 122884.944234 \n", "\n", " total_pop_adj poverty_pop_adj non_us_citizen_adj \\\n", - "0 0.089329 0.014773 0.017093 \n", - "1 2827.411664 467.600160 541.011790 \n", + "0 0.092253 0.013093 0.015479 \n", + "1 699.097229 98.104115 152.497084 \n", "\n", " workers_with_no_car_adj households_with_no_cars_adj disabled_pop_adj \\\n", - "0 0.002000 0.003126 0.008370 \n", - "1 63.304231 98.946109 264.920227 \n", + "0 0.002118 0.003109 0.010387 \n", + "1 7.730267 18.833741 62.544887 \n", "\n", " public_asst_pop_adj inc_extremelylow_adj inc_verylow_adj inc_low_adj \\\n", - "0 0.031530 0.023883 0.023479 0.009580 \n", - "1 997.972584 755.926994 743.159754 303.221947 \n", + "0 0.002067 0.026404 0.020605 0.010118 \n", + "1 46.662702 256.363761 105.834381 63.107088 \n", "\n", " male_seniors_adj female_seniors_adj male_youth_adj female_youth_adj \\\n", - "0 0.005361 0.009513 0.003765 0.004908 \n", - "1 169.697897 301.094074 119.160905 155.334752 \n", + "0 0.005899 0.009950 0.004067 0.005042 \n", + "1 47.365454 75.475515 52.706365 29.515565 \n", "\n", " veteran_pop_adj \n", - "0 0.000958 \n", - "1 30.322195 " + "0 0.001109 \n", + "1 16.725487 " ] }, - "execution_count": 36, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } @@ -1739,7 +1738,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "id": "893d1d91-1f1d-43af-870d-abfba2ccccdf", "metadata": {}, "outputs": [], @@ -1773,7 +1772,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "id": "d4712014-ac05-4411-b48e-8ff985371e9c", "metadata": { "tags": [] @@ -1790,7 +1789,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "id": "23e6568e-b163-40d3-999b-e05416c2b6e6", "metadata": {}, "outputs": [], @@ -1807,7 +1806,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 41, "id": "a7fd704c-6a7b-4573-85d1-8d27bbaf43a7", "metadata": { "tags": [] @@ -1818,7 +1817,7 @@ "output_type": "stream", "text": [ "\n", - "Int64Index: 207 entries, 0 to 206\n", + "RangeIndex: 207 entries, 0 to 206\n", "Data columns (total 28 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", @@ -1830,7 +1829,7 @@ " 5 organization_source_record_id 197 non-null object \n", " 6 organization_name 197 non-null object \n", " 7 name_clean 207 non-null object \n", - " 8 source_record_id 179 non-null object \n", + " 8 source_record_id 161 non-null object \n", " 9 total_pop_adj 207 non-null float64 \n", " 10 poverty_pop_adj 207 non-null float64 \n", " 11 non_us_citizen_adj 207 non-null float64 \n", @@ -1846,12 +1845,12 @@ " 21 veteran_pop_adj 207 non-null float64 \n", " 22 male_youth_adj 207 non-null float64 \n", " 23 female_youth_adj 207 non-null float64 \n", - " 24 organization_type 182 non-null object \n", - " 25 ntd_id 150 non-null object \n", - " 26 ntd_id_2022 152 non-null object \n", - " 27 key 182 non-null object \n", + " 24 organization_type 164 non-null object \n", + " 25 ntd_id 138 non-null object \n", + " 26 ntd_id_2022 140 non-null object \n", + " 27 key 164 non-null object \n", "dtypes: float64(15), geometry(1), object(12)\n", - "memory usage: 46.9+ KB\n" + "memory usage: 45.4+ KB\n" ] } ], @@ -1861,7 +1860,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 42, "id": "f6060e9a-6fd1-48a0-80d0-84f4f8f402a7", "metadata": {}, "outputs": [], @@ -1881,7 +1880,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 43, "id": "55e5088d-0ccd-4a53-be4a-b88d90e91afd", "metadata": {}, "outputs": [], @@ -1895,7 +1894,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 44, "id": "4b87f77d-56cf-4bb5-81e6-687ae8e304e7", "metadata": {}, "outputs": [], @@ -1909,7 +1908,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 45, "id": "3dd76252-8511-4ca1-a32b-792c4ef20ab4", "metadata": {}, "outputs": [], @@ -1922,7 +1921,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 46, "id": "366986a8-b17b-4e19-b7c2-82608e14d250", "metadata": {}, "outputs": [], @@ -1961,7 +1960,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 47, "id": "e4f586a2-1591-45fe-9170-ea7df1ac1177", "metadata": {}, "outputs": [ @@ -1981,7 +1980,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 48, "id": "d6cc3be8-b52b-4ec0-97c6-1fc3c7e108d9", "metadata": {}, "outputs": [ @@ -2001,7 +2000,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 49, "id": "b7d230c1-b9ec-4bb7-80c7-f356470bf1e8", "metadata": {}, "outputs": [], @@ -2029,7 +2028,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 50, "id": "44cd8c32-05c5-4852-b58c-a8dff66cfba2", "metadata": {}, "outputs": [], @@ -2098,7 +2097,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 51, "id": "83cbfb70-c4d7-4bb5-870f-3e3fbb3eef43", "metadata": {}, "outputs": [ @@ -2106,18 +2105,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "Saved GeoJSON: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_data.geojson\n", - "Saved Parquet: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_data.parquet\n" + "Saved GeoJSON: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_2024_data.geojson\n", + "Saved Parquet: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_2024_data.parquet\n" ] } ], "source": [ - "export_gdf_public(merged_agency_ntd, \"transitprovider_acs_ridership_data\")" + "export_gdf_public(merged_agency_ntd, \"transitprovider_acs_ridership_2024_data\")" ] }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 52, "id": "d0549fb6-e2cb-4e1a-b8ee-67ba22e6c295", "metadata": {}, "outputs": [ @@ -2125,12 +2124,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "Saved XLSX with metadata: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_data.xlsx\n" + "Saved XLSX with metadata: gs://calitp-publish-data-analysis/transit_provider_dashboard/transitprovider_acs_ridership_2024_data.xlsx\n" ] } ], "source": [ - "export_gdf_public_with_metadata(merged_agency_ntd, \"transitprovider_acs_ridership_data\")" + "export_gdf_public_with_metadata(merged_agency_ntd, \"transitprovider_acs_ridership_2024_data\")" ] }, { diff --git a/transit_provider_dashboard/05_reconciliation_processing.ipynb b/transit_provider_dashboard/05_reconciliation_processing.ipynb index bba380a30..92a5a317a 100644 --- a/transit_provider_dashboard/05_reconciliation_processing.ipynb +++ b/transit_provider_dashboard/05_reconciliation_processing.ipynb @@ -10,7 +10,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: shared_utils in /home/jovyan/data-analyses/_shared_utils (4.3)\n", + "Requirement already satisfied: shared_utils in /home/jovyan/data-analyses/_shared_utils (4.4)\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -72,7 +72,7 @@ "outputs": [], "source": [ "# Load the stored ACS dataset from the specified GCS file path.\n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data.parquet\", \"rb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/census_tracts_data_2024.parquet\", \"rb\") as f:\n", " tracts_ca_acs = gpd.read_parquet(f)" ] }, @@ -84,7 +84,7 @@ "outputs": [], "source": [ "# Load Ridership Grouped Data \n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data.parquet\", \"rb\") as f:\n", + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/ridership_data_2024.parquet\", \"rb\") as f:\n", " ridership_data_grouped = pd.read_parquet(f)" ] }, @@ -181,11 +181,11 @@ " 7 geometry 107671 non-null geometry\n", " 8 organization_name 95351 non-null object \n", " 9 name_clean 107671 non-null object \n", - " 10 source_record_id 88562 non-null object \n", - " 11 key 88954 non-null object \n", - " 12 organization_type 88954 non-null object \n", - " 13 ntd_id_y 84115 non-null object \n", - " 14 ntd_id_2022_y 84215 non-null object \n", + " 10 source_record_id 85607 non-null object \n", + " 11 key 85999 non-null object \n", + " 12 organization_type 85999 non-null object \n", + " 13 ntd_id_y 81301 non-null object \n", + " 14 ntd_id_2022_y 81401 non-null object \n", "dtypes: geometry(1), object(14)\n", "memory usage: 12.3+ MB\n" ] @@ -204,117 +204,206 @@ "source": [ "# Reconciliation groups for different organization subset \n", "RECONCILIATION_GROUPS = {\n", - " \"hubspot_payments\": [\n", - " \"Sonoma County\", \n", + " \"hubspot_payments_with_added_agencies\": [\n", + " \"Alameda-Contra Costa Transit District\",\n", + " \"Anaheim Transportation Network\",\n", + " \"Capitol Corridor Joint Powers Authority\",\n", + " \"Central Contra Costa Transit Authority\",\n", " \"City and County of San Francisco\",\n", - " \"City of Vacaville\",\n", - " \"Cloverdale Transit\",\n", + " \"City of Alameda\",\n", + " \"City of Arcata\",\n", + " \"City of Camarillo\",\n", " \"City of Fairfield\",\n", - " \"Western Contra Costa Transit Authority\",\n", - " \"San Francisco Bay Area Rapid Transit District\",\n", + " \"City of Petaluma\",\n", + " \"City of Rancho Cordova\",\n", " \"City of Santa Rosa\",\n", - " \"Golden Gate Bridge, Highway and Transportation District\",\n", " \"City of Union City\",\n", - " \"City of Alameda\",\n", + " \"City of Vacaville\",\n", + " \"Cloverdale Transit\",\n", " \"Eastern Contra Costa Transit Authority\",\n", - " \"San Mateo County Transit District\",\n", - " \"Napa Valley Transportation Authority\",\n", - " \"Alameda-Contra Costa Transit District\",\n", + " \"El Dorado County Transit Authority\",\n", + " \"Flagship Cruises and Events Inc.\", #Organization names for San Diego Schedule\n", + " \"Golden Gate Bridge, Highway and Transportation District\",\n", + " \"Lake Transit Authority\",\n", " \"Livermore-Amador Valley Transit Authority\",\n", - " \"Santa Clara Valley Transportation Authority\",\n", - " #\"Metropolitan Tulsa Transit Authority\",\n", - " #\"River Valley Transit\",\n", - " \"Ventura County Transportation Commission\",\n", - " # \"MCTA (Monroe County Transportation Authority)\",\n", - " \"City of Petaluma\",\n", " \"Marin County Transit District\",\n", + " \"Mendocino Transit Authority\",\n", + " \"Monterey-Salinas Transit\",\n", + " \"Napa Valley Transportation Authority\",\n", + " \"Nevada County\",\n", + " \"North County Transit District\",\n", + " \"Orange County Transportation Authority\",\n", " \"Peninsula Corridor Joint Powers Board\",\n", - " \"Sonoma-Marin Area Rail Transit District\",\n", " \"Redwood Coast Transit Authority\",\n", - " \"Lake Transit Authority\",\n", - " \"City of Arcata\",\n", - " \"Flagship Cruises and Events Inc.\", #Organization names for San Diego Schedule\n", + " \"Redding Area Bus Authority\",\n", " \"San Diego International Airport\",\n", - " # \"Connecticut DOT/ CTTransit\"\n", - " \"City of Rancho Cordova\",\n", - " \"Orange County Transportation Authority\",\n", - " \"North County Transit District\",\n", - " \"Solano County Transit\",\n", - " \"Central Contra Costa Transit Authority\",\n", - " \"Capitol Corridor Joint Powers Authority\",\n", - " \"Santa Barbara Metropolitan Transit District\",\n", + " \"San Francisco Bay Area Rapid Transit District\",\n", + " \"San Mateo County Transit District\",\n", " \"Santa Barbara County Association of Governments\",\n", - " \"Monterey-Salinas Transit\",\n", - " \"Mendocino Transit Authority\",\n", - " \"Anaheim Transportation Network\",\n", - " # \"Waccamaw Regional Transportation Authority\", \n", - " \"Nevada County\",\n", + " \"Santa Barbara Metropolitan Transit District\",\n", + " \"Santa Clara Valley Transportation Authority\",\n", + " \"Solano County Transit\",\n", + " \"Sonoma County\",\n", + " \"Sonoma-Marin Area Rail Transit District\",\n", + " \"Ventura County Transportation Commission\",\n", + " \"Western Contra Costa Transit Authority\" \n", + " #\"Metropolitan Tulsa Transit Authority\",\n", + " #\"River Valley Transit\",\n", + " # \"MCTA (Monroe County Transportation Authority)\",\n", + " # \"Connecticut DOT/ CTTransit\"\n", " ],\n", - " \n", - " \"contactless_payments_june_2026\": [\n", - " # \"Ventura County Transportation Commission Valley Express\",\n", + "\n", + " \"contactless_payments_next_six_months\": [\n", + " \"City of Roseville\",\n", " \"City of Moorpark\",\n", - " \"City of Camarillo\",\n", + " #\"City of Ojai\",\n", " \"City of Thousand Oaks\",\n", " \"San Luis Obispo Regional Transit Authority\",\n", " \"City of Morro Bay\",\n", + " \"Gold Coast Transit District\",\n", + " \"City of San Luis Obispo\",\n", " \"Glenn County\",\n", " \"Southern California Regional Rail Authority\",\n", - " \"Redding Area Bus Authority\",\n", - " \"Gold Coast Transit District\",\n", - " \"City of Roseville\",\n", " \"Santa Cruz Metropolitan Transit District\",\n", + " \"Yolo County Transportation District\",\n", + " \"Trinity County\",\n", + " \"Sacramento County\",\n", " \"City of Simi Valley\",\n", - " # \"City of Wasco\",\n", - " \"City of San Luis Obispo\",\n", + " \"Yuba-Sutter Transit Authority\",\n", + " #\"City of Wasco\",\n", + " \"Imperial County Transportation Commission\"\n", " ],\n", - " \"reduced_fares_live_jan_2026\": [\n", - " \"Monterey-Salinas Transit\",\n", - " \"Santa Barbara Metropolitan Transit District\",\n", - " \"City of Rancho Cordova\",\n", - " \"Nevada County\",\n", - " \"Ventura County Transportation Commission\",\n", - " \"San Luis Obispo Regional Transit Authority\",\n", - " ],\n", - " \"reduced_fares_q1_2026\": [\n", - " \"El Dorado Transit Authority\",\n", - " \"Redding Area Bus Authority\",\n", + "\n", + " \"contactless_payments_next_six_months_including_la\": [\n", + " \"City of Roseville\",\n", + " \"City of Moorpark\",\n", + " #\"City of Ojai\",\n", + " \"City of Thousand Oaks\",\n", " \"San Luis Obispo Regional Transit Authority\",\n", + " \"City of Morro Bay\",\n", + " \"Gold Coast Transit District\",\n", " \"City of San Luis Obispo\",\n", - " \"City of Roseville\",\n", + " \"Glenn County\",\n", + " \"Southern California Regional Rail Authority\",\n", " \"Santa Cruz Metropolitan Transit District\",\n", - " ],\n", - "\n", - " \"tap_to_pay\": [\n", + " \"Yolo County Transportation District\",\n", + " \"Trinity County\",\n", + " \"Sacramento County\",\n", + " \"City of Simi Valley\",\n", + " \"Yuba-Sutter Transit Authority\",\n", + " #\"City of Wasco\",\n", + " \"Imperial County Transportation Commission\",\n", + " #\"Angels Flight Railway\",\n", " \"Antelope Valley Transit Authority\",\n", " \"City of Baldwin Park\",\n", + " \"City of Redondo Beach\", \n", " \"City of Burbank\",\n", - " \"City of Carson\",\n", + " \"City of Carson\", \n", + " #\"Compton Renaissance Transit System\",\n", " \"City of Culver City\",\n", + " \"City of Duarte\",\n", " \"City of Gardena\",\n", " \"City of Glendale\", \n", " \"City of Glendora\",\n", " \"City of Huntington Park\",\n", + " \"Los Angeles County\",\n", " \"City of Los Angeles\",\n", + " \"City of Lawndale\",\n", + " \"Los Angeles World Airports\",\n", + " \"Long Beach Transit\",\n", + " \"Los Angeles County Metropolitan Transportation Authority\",\n", " \"City of Montebello\",\n", " \"City of Monterey Park\",\n", " \"City of Norwalk\",\n", + " \"Palos Verdes Peninsula Transit Authority\",\n", " \"City of Pasadena\",\n", - " \"City of Redondo Beach\",\n", " \"City of Santa Clarita\",\n", " \"City of Santa Monica\",\n", - " \"City of Torrance\",\n", - " \"Long Beach Transit\",\n", - " \"Los Angeles County\",\n", - " \"Los Angeles County Metropolitan Transportation Authority\",\n", - " \"Palos Verdes Peninsula Transit Authority\",\n", - " \"Los Angeles World Airports\",\n", - " \"City of Duarte\",\n", - " \"City of Lawndale\"\n", - " # Angels Flight Railway,\n", - " # Compton Renaissance Transit System\n", + " \"City of Torrance\", \n", + " ],\n", "\n", - " ], \n", + "\n", + " # \"contactless_payments_june_2026\": [\n", + " # # \"Ventura County Transportation Commission Valley Express\",\n", + " # \"City of Camarillo\",\n", + " # \"City of Moorpark\",\n", + " # \"City of Morro Bay\",\n", + " # \"City of Roseville\",\n", + " # \"City of San Luis Obispo\",\n", + " # \"City of Simi Valley\",\n", + " # \"City of Thousand Oaks\",\n", + " # \"Glenn County\",\n", + " # \"Gold Coast Transit District\",\n", + " # \"Redding Area Bus Authority\",\n", + " # \"San Luis Obispo Regional Transit Authority\",\n", + " # \"Santa Cruz Metropolitan Transit District\",\n", + " # \"Southern California Regional Rail Authority\",\n", + " # ],\n", + " \n", + " \"reduced_fares_live_march_2026\": [\n", + " \"Monterey-Salinas Transit\",\n", + " \"Santa Barbara Metropolitan Transit District\",\n", + " \"City of Rancho Cordova\",\n", + " \"Nevada County\",\n", + " \"Ventura County Transportation Commission\",\n", + " \"San Luis Obispo Regional Transit Authority\",\n", + " \"El Dorado County Transit Authority\",\n", + " \"Redding Area Bus Authority\"\n", + " ],\n", + " \n", + " # \"reduced_fares_q1_2026\": [\n", + " # \"El Dorado Transit Authority\",\n", + " # \"Redding Area Bus Authority\",\n", + " # \"San Luis Obispo Regional Transit Authority\",\n", + " # \"City of San Luis Obispo\",\n", + " # \"City of Roseville\",\n", + " # \"Santa Cruz Metropolitan Transit District\",\n", + " # ],\n", + "\n", + " \"reduced_fares_next_6_months\": [\n", + " \"City of San Luis Obispo\",\n", + " \"Santa Cruz Metropolitan Transit District\", \n", + " \"Santa Barbara County Association of Governments\",\n", + " \"City of Camarillo\",\n", + " \"City of Roseville\",\n", + " \"City of Simi Valley\",\n", + " \"City of Thousand Oaks\",\n", + " \"Gold Coast Transit District\",\n", + " #\"Valley Express\"\n", + " \n", + "\n", + " ],\n", + "\n", + " # \"tap_to_pay\": [\n", + " # \"Antelope Valley Transit Authority\",\n", + " # \"City of Baldwin Park\",\n", + " # \"City of Burbank\",\n", + " # \"City of Carson\",\n", + " # \"City of Culver City\",\n", + " # \"City of Gardena\",\n", + " # \"City of Glendale\", \n", + " # \"City of Glendora\",\n", + " # \"City of Huntington Park\",\n", + " # \"City of Los Angeles\",\n", + " # \"City of Montebello\",\n", + " # \"City of Monterey Park\",\n", + " # \"City of Norwalk\",\n", + " # \"City of Pasadena\",\n", + " # \"City of Redondo Beach\",\n", + " # \"City of Santa Clarita\",\n", + " # \"City of Santa Monica\",\n", + " # \"City of Torrance\",\n", + " # \"Long Beach Transit\",\n", + " # \"Los Angeles County\",\n", + " # \"Los Angeles County Metropolitan Transportation Authority\",\n", + " # \"Palos Verdes Peninsula Transit Authority\",\n", + " # \"Los Angeles World Airports\",\n", + " # \"City of Duarte\",\n", + " # \"City of Lawndale\"\n", + " # # Angels Flight Railway,\n", + " # # Compton Renaissance Transit System\n", + "\n", + " # ], \n", "}" ] }, @@ -356,9 +445,73 @@ "GCS__PUBLIC_FILE_PATH = f\"{PUBLIC_GCS}transit_provider_dashboard/\"" ] }, + { + "cell_type": "markdown", + "id": "7de133b3-760c-4684-be97-0eed632982c8", + "metadata": {}, + "source": [ + "### Calculating : Access to any public transit by groups." + ] + }, { "cell_type": "code", "execution_count": 14, + "id": "37f76353-93cd-417a-8e2d-27d5e1e4c08e", + "metadata": {}, + "outputs": [], + "source": [ + "# # Ensure CRS in meters\n", + "# statewide_dissolved = orgs_stop_buffered.dissolve().reset_index(drop=True).to_crs(epsg=3310)\n", + "# ca_boundary = unary_union(tracts_ca_acs.geometry)\n", + "\n", + "# statewide_clipped = statewide_dissolved.geometry.intersection(ca_boundary)\n", + "# statewide_dissolved['geometry'] = statewide_clipped\n", + "# tracts_ca_acs = tracts_ca_acs.to_crs(epsg=3310)\n", + "# tracts_ca_acs[\"tract_area\"] = tracts_ca_acs.geometry.area\n", + "\n", + "# intersection = gpd.overlay(statewide_dissolved, tracts_ca_acs, how=\"intersection\")\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "6e87e966-95e1-4669-b449-bbfa0ccb46df", + "metadata": {}, + "outputs": [], + "source": [ + "# # Ensure area ratio adjustment\n", + "# intersection['area_ratio'] = intersection.geometry.area / intersection['ALAND']\n", + "# # Adjust total_pop by area ratio\n", + "# intersection['total_pop_adj'] = intersection['total_pop'] * intersection['area_ratio']\n", + "# # Sum over all intersecting tracts\n", + "# population_with_access = intersection['total_pop_adj'].sum()\n", + "\n", + "# # List all columns to adjust\n", + "# cols_to_adjust = [\n", + "# 'total_pop','poverty_pop','non_us_citizen','workers_with_no_car',\n", + "# 'households_with_no_cars','disabled_pop','public_asst_pop','inc_extremelylow',\n", + "# 'inc_verylow','inc_low','male_seniors','female_seniors','male_youth',\n", + "# 'female_youth','veteran_pop'\n", + "# ]\n", + "\n", + "# # Multiply each column by the area ratio\n", + "# for col in cols_to_adjust:\n", + "# intersection[f'{col}_adj'] = intersection[col] * intersection['area_ratio']\n", + "\n", + "# # Sum across all intersected tracts to get statewide totals\n", + "# statewide_access_totals = intersection[[f'{col}_adj' for col in cols_to_adjust]].sum().to_frame().T\n", + "\n", + "# # Convert all columns to integers\n", + "# statewide_access_totals = statewide_access_totals.astype(int)\n", + "\n", + "# # DataFrame with all adjusted population counts\n", + "# statewide_access_totals" + ] + }, + { + "cell_type": "code", + "execution_count": 16, "id": "84d65035-e264-477b-b17b-45a0cc7b939d", "metadata": {}, "outputs": [ @@ -366,7 +519,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1893/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Uploaded hubspot_payments_with_added_agencies to private and public GCS (Parquet, GeoJSON, CSV)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" ] }, @@ -374,14 +542,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Uploaded hubspot_payments to private and public GCS (Parquet, GeoJSON, CSV)\n" + "Uploaded contactless_payments_next_six_months to private and public GCS (Parquet, GeoJSON, CSV)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1893/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" ] }, @@ -389,14 +557,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Uploaded contactless_payments_june_2026 to private and public GCS (Parquet, GeoJSON, CSV)\n" + "Uploaded contactless_payments_next_six_months_including_la to private and public GCS (Parquet, GeoJSON, CSV)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1893/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" ] }, @@ -404,14 +572,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Uploaded reduced_fares_live_jan_2026 to private and public GCS (Parquet, GeoJSON, CSV)\n" + "Uploaded reduced_fares_live_march_2026 to private and public GCS (Parquet, GeoJSON, CSV)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1893/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" ] }, @@ -426,7 +594,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_1893/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", + "/tmp/ipykernel_5200/4206040860.py:52: UserWarning: Geometry column does not contain geometry.\n", " final_gdf_copy[\"geometry\"] = final_gdf_copy.geometry.apply(lambda x: x.wkt if x else None)\n" ] }, @@ -434,7 +602,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Uploaded tap_to_pay to private and public GCS (Parquet, GeoJSON, CSV)\n" + "Uploaded reduced_fares_next_6_months to private and public GCS (Parquet, GeoJSON, CSV)\n" ] } ], @@ -472,7 +640,7 @@ "\n", " final_gdf = final_gdf.to_crs(epsg=4326)\n", " \n", - " # Paths for your private output folder\n", + " # Paths for private output folder\n", " parquet_path = f\"{output_folder}/{group_name}.parquet\"\n", " geojson_path = f\"{output_folder}/{group_name}.geojson\"\n", " csv_path = f\"{output_folder}/{group_name}.csv\"\n", @@ -506,19 +674,19 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "3b1ab703-9d3b-4319-9963-aae5b180e982", "metadata": {}, "outputs": [], "source": [ "# Cross check one group \n", - "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/tap_to_pay.parquet\", \"rb\") as f:\n", - " tap_to_pay = gpd.read_parquet(f)" + "with fs.open(f\"{GCS_FILE_PATH}/transit_provider_dashboard/reduced_fares_q1_2026.parquet\", \"rb\") as f:\n", + " reduced_fares_q1_2026 = gpd.read_parquet(f)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "059615e4-1c0b-413f-9ee4-523bc585c86d", "metadata": {}, "outputs": [ @@ -555,12 +723,12 @@ } ], "source": [ - "tap_to_pay.info()" + "reduced_fares_q1_2026.info()" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "25f56f69-15b6-4ce9-b9a2-993693c9e32c", "metadata": {}, "outputs": [ @@ -608,24 +776,24 @@ " \n", " \n", " 0\n", - " 9.209175e+06\n", - " 1.280361e+06\n", - " 1.441314e+06\n", - " 186698.405012\n", - " 290226.968669\n", - " 1.003196e+06\n", - " 3.175872e+06\n", - " 2.253825e+06\n", - " 1.632346e+06\n", - " 904944.447095\n", - " 579194.818784\n", - " 743509.907874\n", - " 594392.952438\n", - " 580087.993019\n", - " 201337.250575\n", - " 333206885.0\n", - " 3979.0\n", - " MULTIPOLYGON (((-118.82018 34.00950, -118.8210...\n", + " 573732.059169\n", + " 75173.308582\n", + " 44972.468718\n", + " 8092.965732\n", + " 17564.294869\n", + " 75438.859266\n", + " 29542.014814\n", + " 146514.519549\n", + " 96130.173745\n", + " 61245.903455\n", + " 44254.49558\n", + " 54794.616692\n", + " 52439.932137\n", + " 51402.454971\n", + " 24408.428357\n", + " 5466620.0\n", + " 177.0\n", + " MULTIPOLYGON (((-122.14502 36.97543, -122.1458...\n", " \n", " \n", "\n", @@ -633,68 +801,291 @@ ], "text/plain": [ " total_pop_adj poverty_pop_adj non_us_citizen_adj \\\n", - "0 9.209175e+06 1.280361e+06 1.441314e+06 \n", + "0 573732.059169 75173.308582 44972.468718 \n", "\n", " workers_with_no_car_adj households_with_no_cars_adj disabled_pop_adj \\\n", - "0 186698.405012 290226.968669 1.003196e+06 \n", + "0 8092.965732 17564.294869 75438.859266 \n", "\n", - " public_asst_pop_adj inc_extremelylow_adj inc_verylow_adj inc_low_adj \\\n", - "0 3.175872e+06 2.253825e+06 1.632346e+06 904944.447095 \n", + " public_asst_pop_adj inc_extremelylow_adj inc_verylow_adj inc_low_adj \\\n", + "0 29542.014814 146514.519549 96130.173745 61245.903455 \n", "\n", " male_seniors_adj female_seniors_adj male_youth_adj female_youth_adj \\\n", - "0 579194.818784 743509.907874 594392.952438 580087.993019 \n", + "0 44254.49558 54794.616692 52439.932137 51402.454971 \n", "\n", " veteran_pop_adj unlinked_passenger_trips_upt agency_voms \\\n", - "0 201337.250575 333206885.0 3979.0 \n", + "0 24408.428357 5466620.0 177.0 \n", "\n", " geometry \n", - "0 MULTIPOLYGON (((-118.82018 34.00950, -118.8210... " + "0 MULTIPOLYGON (((-122.14502 36.97543, -122.1458... " ] }, - "execution_count": 17, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tap_to_pay.head(1)" + "reduced_fares_q1_2026.head(1)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "2298bf47-f37f-497d-bf43-73a995cf7f36", + "execution_count": 21, + "id": "6806c602-462a-453d-9c5a-d7f86f6719d6", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], "text/plain": [ - "\n", - "Name: WGS 84\n", - "Axis Info [ellipsoidal]:\n", - "- Lat[north]: Geodetic latitude (degree)\n", - "- Lon[east]: Geodetic longitude (degree)\n", - "Area of Use:\n", - "- name: World.\n", - "- bounds: (-180.0, -90.0, 180.0, 90.0)\n", - "Datum: World Geodetic System 1984 ensemble\n", - "- Ellipsoid: WGS 84\n", - "- Prime Meridian: Greenwich" + "" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tap_to_pay.crs" + "reduced_fares_q1_2026.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "6806c602-462a-453d-9c5a-d7f86f6719d6", + "id": "d17c1c33-0636-4aca-a01f-63892fb150b9", "metadata": {}, "outputs": [], "source": []