diff --git a/CHANGELOG.md b/CHANGELOG.md index 2a51720..d51bd8d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +* Made minutes available for formatting file names, @JoachimKoenigslieb +- use `.expand_dims` instead of `.assign_coords` to make sure we have both time dimension and time coordinates when loading from files, @JoachimKoenigslieb +- Added a helper `make_pvlib_clearsky_dataset` which creates clearsky data by calling `pvlib`. Also added a new config variable `clearsky_source` which defaults to `file` and switches behavior when set to `pvlib`, @JoachimKoenigslieb +- `check_solar_elevation` now does not assume location is in Copenhagen by default, @JoachimKoenigslieb + ### Added diff --git a/sunflow/data_io.py b/sunflow/data_io.py index 70973d3..7a26962 100644 --- a/sunflow/data_io.py +++ b/sunflow/data_io.py @@ -133,6 +133,7 @@ def generate_input_filename( {month}: Two-digit month (e.g. 03) {day}: Two-digit day (e.g. 09) {hour}: Two-digit hour (e.g. 12) + {minute}: Two-digit minute (e.g. 30) Path separators in the result are supported, so a format like ``{year}/{month}/{day}/{dataset_name}_{timestamp}.nc`` resolves to a @@ -154,6 +155,7 @@ def generate_input_filename( month=time_step.strftime("%m"), day=time_step.strftime("%d"), hour=time_step.strftime("%H"), + minute=time_step.strftime("%M"), ) return filename @@ -226,7 +228,15 @@ def load_data_from_files( try: ds = xr.open_dataset(filepath) - collected.append(ds.assign_coords(time=[time_step.replace(tzinfo=None)])) + time_step_naive = time_step.replace(tzinfo=None) + + # Ensure time is in both dimension and in coords. + if "time" in ds.dims: + ds = ds.assign_coords(time=[time_step_naive]) + else: + ds = ds.expand_dims(time=[time_step_naive]) + + collected.append(ds) logger.info(f"Loaded {data_type} from {filepath}") except Exception as e: logger.error(f"Failed to load {data_type} {filepath}: {e}") diff --git a/sunflow/forecast.py b/sunflow/forecast.py index 3abe325..fe92f65 100644 --- a/sunflow/forecast.py +++ b/sunflow/forecast.py @@ -2,11 +2,51 @@ from datetime import datetime import numpy as np +import pandas as pd +import pvlib import xarray as xr from Models.ProbabilisticAdvection import ProbabilisticAdvection from .geospatial import get_coordinates +PVLIB_CLEARSKY_VARIABLE_NAME = "pvlib_clearsky_ghi" + + +def make_pvlib_clearsky_dataset( + times: list[datetime], + latitudes: np.ndarray, + longitudes: np.ndarray, +) -> xr.Dataset: + lon_grid, lat_grid = np.meshgrid(longitudes, latitudes) + lon_grid = np.where(lon_grid > 180, lon_grid - 360, lon_grid) + flat_latitudes = lat_grid.ravel() + flat_longitudes = lon_grid.ravel() + + fields = [] + for time in times: + repeated_times = pd.DatetimeIndex([time] * len(flat_latitudes)) + solar_position = pvlib.solarposition.get_solarposition( + repeated_times, + flat_latitudes, + flat_longitudes, + ) + clearsky = pvlib.clearsky.simplified_solis(solar_position["apparent_elevation"]) + fields.append(clearsky["ghi"].to_numpy().reshape(lat_grid.shape)) + + return xr.Dataset( + { + PVLIB_CLEARSKY_VARIABLE_NAME: ( + ["time", "latitude", "longitude"], + np.stack(fields), + ) + }, + coords={ + "time": [time.replace(tzinfo=None) for time in times], + "latitude": latitudes, + "longitude": longitudes, + }, + ) + def preprocess_data( data: xr.Dataset, diff --git a/sunflow/geospatial.py b/sunflow/geospatial.py index 80712ff..5c1dc29 100644 --- a/sunflow/geospatial.py +++ b/sunflow/geospatial.py @@ -83,6 +83,9 @@ def get_coordinates(ds: xr.Dataset) -> tuple[np.ndarray, np.ndarray]: elif "lat" in ds.coords and "lon" in ds.coords: latitudes = ds.lat.values longitudes = ds.lon.values + elif "latitude" in ds.coords and "longitude" in ds.coords: + latitudes = ds.latitude.values + longitudes = ds.longitude.values else: raise RuntimeError("Could not find coordinates in dataset.") @@ -97,23 +100,26 @@ def get_coordinates(ds: xr.Dataset) -> tuple[np.ndarray, np.ndarray]: def check_solar_elevation( time: datetime, - lat: float = 55.6761, - lon: float = 12.5683, + lat: float, + lon: float, ) -> float: """Compute the solar elevation angle at a given time and location. - Uses pvlib to calculate the solar position. Defaults to Copenhagen, - Denmark (55.68°N, 12.57°E). + Uses pvlib to calculate the solar position. Longitudes in 0..360 convention + are converted to the -180..180 convention expected by pvlib. Args: time: Datetime (timezone-aware) for which to compute the elevation. - lat: Latitude in decimal degrees. Default 55.6761 (Copenhagen). - lon: Longitude in decimal degrees. Default 12.5683 (Copenhagen). + lat: Latitude in decimal degrees. + lon: Longitude in decimal degrees. Returns: Solar elevation angle in degrees above the horizon. Negative values indicate the sun is below the horizon. """ + if lon > 180: + lon -= 360 + location = pvlib.location.Location(lat, lon) solar_elevation = location.get_solarposition(time)["elevation"].values[0] return solar_elevation diff --git a/sunflow/main.py b/sunflow/main.py index 6034bab..abf183e 100644 --- a/sunflow/main.py +++ b/sunflow/main.py @@ -22,8 +22,14 @@ save_forecast, ) from .downloaders import download_past_data -from .forecast import multiply_clearsky, preprocess_data, simple_advection_forecast -from .geospatial import check_solar_elevation, get_bbox +from .forecast import ( + PVLIB_CLEARSKY_VARIABLE_NAME, + make_pvlib_clearsky_dataset, + multiply_clearsky, + preprocess_data, + simple_advection_forecast, +) +from .geospatial import check_solar_elevation, get_bbox, get_coordinates from .time_handler import generate_time_steps, round_time from .validation import ( MissingClearskyDataError, @@ -193,6 +199,10 @@ def run_nowcast( """ time_step_str = time_step.strftime("%Y-%m-%dT%H:%M:%SZ") logger.info(f"--- Running nowcast for {time_step_str} ---") + nc_variable_names = config["nc_variable_names"].copy() + clearsky_source = config.get("clearsky_source", "file") + if clearsky_source == "pvlib": + nc_variable_names["sds_cs"] = PVLIB_CLEARSKY_VARIABLE_NAME # Fetch current data (with retry loop in operational mode) fetch_current_data_with_retry( @@ -209,8 +219,13 @@ def run_nowcast( # Check solar elevation try: - solar_elevation = check_solar_elevation(time_step) - logger.info(f"Solar elevation: {solar_elevation:.2f} degrees") + lon_min, lat_min, lon_max, lat_max = map(float, bbox.split(",")) + solar_elevation = max( + check_solar_elevation(time_step, lat=lat, lon=lon) + for lat in (lat_min, lat_max) + for lon in (lon_min, lon_max) + ) + logger.info(f"Maximum corner solar elevation: {solar_elevation:.2f} degrees") if solar_elevation < 1: reason = "sun too low" logger.warning(f"{reason.capitalize()}. Skipping.\n") @@ -261,10 +276,20 @@ def run_nowcast( if n_loaded == 0: raise RuntimeError("No past data loaded. Cannot proceed.") + if clearsky_source == "pvlib": + latitudes, longitudes = get_coordinates(data) + data = data.merge( + make_pvlib_clearsky_dataset( + past_time_steps, + latitudes, + longitudes, + ) + ) + # Preprocess logger.info("Preprocessing data...") ratio_data, latitudes, longitudes = preprocess_data( - data, past_time_steps, config["nc_variable_names"] + data, past_time_steps, nc_variable_names ) # Validate data shape @@ -294,19 +319,27 @@ def run_nowcast( clearsky_t0_time = time_step - timedelta(days=1) all_clearsky_time_steps = [clearsky_t0_time] + previous_day_time_steps - # Fetch clearsky data with fallback to earlier days if a file is missing - logger.info("Fetching clearsky data...") - clearsky_data = fetch_clearsky_with_fallback( - all_clearsky_time_steps, - run_mode, - nowcast_config.max_clearsky_fallback_days, - config, - bbox, - dataset_name, - bbox_choice, - nowcast_config, - s3_config, - ) + if clearsky_source == "pvlib": + logger.info("Generating pvlib clearsky data...") + clearsky_data = make_pvlib_clearsky_dataset( + all_clearsky_time_steps, + latitudes, + longitudes, + ) + else: + # Fetch clearsky data with fallback to earlier days if a file is missing + logger.info("Fetching clearsky data...") + clearsky_data = fetch_clearsky_with_fallback( + all_clearsky_time_steps, + run_mode, + nowcast_config.max_clearsky_fallback_days, + config, + bbox, + dataset_name, + bbox_choice, + nowcast_config, + s3_config, + ) # Validate clearsky data try: @@ -319,7 +352,7 @@ def run_nowcast( clearsky_data, previous_day_time_steps, expected_spatial_shape, - config["nc_variable_names"], + nc_variable_names, ) # Multiply by clearsky to get actual solar irradiance @@ -328,12 +361,12 @@ def run_nowcast( ratio_forecast, clearsky_data, previous_day_time_steps, - config["nc_variable_names"], + nc_variable_names, ) # Prepend timestep 0: current observation (ratio_data[-1]) × clearsky at t=0 sds_cs_t0 = clearsky_data.sel(time=clearsky_t0_time.replace(tzinfo=None))[ - config["nc_variable_names"]["sds_cs"] + nc_variable_names["sds_cs"] ].values solar_t0 = ratio_data[-1] * sds_cs_t0 solar_forecast = np.concatenate([solar_t0[np.newaxis, :, :], solar_forecast], axis=0)