Skip to content

Commit 1d977ed

Browse files
committed
merge: merge the all development from dev to main
2 parents b62ad1f + 791bb8e commit 1d977ed

14 files changed

Lines changed: 770 additions & 29 deletions

File tree

bot/bot.py

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,23 @@
11
import asyncio
22
import contextlib
3+
import types
34
from sys import exception
45

56
import aiohttp
67
from discord.errors import Forbidden
8+
from discord.ext import commands
79
from pydis_core import BotBase
10+
from pydis_core.utils import scheduling
11+
from pydis_core.utils._extensions import walk_extensions
812
from pydis_core.utils.error_handling import handle_forbidden_from_block
913
from sentry_sdk import new_scope, start_transaction
1014

1115
from bot import constants, exts
1216
from bot.log import get_logger
17+
from bot.utils.startup_reporting import StartupFailureReporter
1318

1419
log = get_logger("bot")
1520

16-
1721
class StartupError(Exception):
1822
"""Exception class for startup errors."""
1923

@@ -26,9 +30,13 @@ class Bot(BotBase):
2630
"""A subclass of `pydis_core.BotBase` that implements bot-specific functions."""
2731

2832
def __init__(self, *args, **kwargs):
29-
3033
super().__init__(*args, **kwargs)
3134

35+
# Track extension load failures and tasks so we can report them after all attempts have completed
36+
self.extension_load_failures: dict[str, BaseException] = {}
37+
self._extension_load_tasks: dict[str, asyncio.Task] = {}
38+
self._startup_failure_reporter = StartupFailureReporter()
39+
3240
async def load_extension(self, name: str, *args, **kwargs) -> None:
3341
"""Extend D.py's load_extension function to also record sentry performance stats."""
3442
with start_transaction(op="cog-load", name=name):
@@ -77,3 +85,53 @@ async def on_error(self, event: str, *args, **kwargs) -> None:
7785
scope.set_extra("kwargs", kwargs)
7886

7987
log.exception(f"Unhandled exception in {event}.")
88+
89+
async def add_cog(self, cog: commands.Cog) -> None:
90+
"""
91+
Add a cog to the bot with exception handling.
92+
93+
Override of `BotBase.add_cog` to capture and log any exceptions raised during cog loading,
94+
including the extension name if available.
95+
"""
96+
extension = cog.__module__
97+
98+
try:
99+
await super().add_cog(cog)
100+
log.info(f"Cog successfully loaded: {cog.qualified_name}")
101+
102+
except BaseException as e:
103+
key = extension or f"(unknown)::{cog.qualified_name}"
104+
self.extension_load_failures[key] = e
105+
106+
log.exception(
107+
f"Failed during add_cog (extension={extension}, cog={cog.qualified_name})"
108+
)
109+
# Propagate error
110+
raise
111+
112+
async def _load_extensions(self, module: types.ModuleType) -> None:
113+
"""Load extensions for the bot."""
114+
await self.wait_until_guild_available()
115+
116+
self.all_extensions = walk_extensions(module)
117+
118+
async def _load_one(extension: str) -> None:
119+
try:
120+
await self.load_extension(extension)
121+
log.info(f"Extension successfully loaded: {extension}")
122+
123+
except BaseException as e:
124+
self.extension_load_failures[extension] = e
125+
log.exception(f"Failed to load extension: {extension}")
126+
raise
127+
128+
for extension in self.all_extensions:
129+
task = scheduling.create_task(_load_one(extension))
130+
self._extension_load_tasks[extension] = task
131+
132+
# Wait for all load tasks to complete so we can report any failures together
133+
await asyncio.gather(*self._extension_load_tasks.values(), return_exceptions=True)
134+
135+
# Send a Discord message to moderators if any extensions failed to load
136+
if self.extension_load_failures :
137+
await self._startup_failure_reporter.notify(self, self.extension_load_failures)

bot/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,9 @@ class _URLs(_BaseURLs):
462462
connect_max_retries: int = 3
463463
connect_cooldown: int = 5
464464

465+
# Back-off in cog_load
466+
connect_initial_backoff: int = 1
467+
465468
site_logs_view: str = "https://pythondiscord.com/staff/bot/logs"
466469

467470

bot/exts/filtering/filtering.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import datetime
23
import io
34
import json
@@ -24,7 +25,7 @@
2425
import bot.exts.filtering._ui.filter as filters_ui
2526
from bot import constants
2627
from bot.bot import Bot
27-
from bot.constants import BaseURLs, Channels, Guild, MODERATION_ROLES, Roles
28+
from bot.constants import BaseURLs, Channels, Guild, MODERATION_ROLES, Roles, URLs
2829
from bot.exts.backend.branding._repository import HEADERS, PARAMS
2930
from bot.exts.filtering._filter_context import Event, FilterContext
3031
from bot.exts.filtering._filter_lists import FilterList, ListType, ListTypeConverter, filter_list_types
@@ -55,6 +56,7 @@
5556
from bot.utils.channel import is_mod_channel
5657
from bot.utils.lock import lock_arg
5758
from bot.utils.message_cache import MessageCache
59+
from bot.utils.retry import is_retryable_api_error
5860

5961
log = get_logger(__name__)
6062

@@ -108,7 +110,31 @@ async def cog_load(self) -> None:
108110
await self.bot.wait_until_guild_available()
109111

110112
log.trace("Loading filtering information from the database.")
111-
raw_filter_lists = await self.bot.api_client.get("bot/filter/filter_lists")
113+
for attempt in range(1, URLs.connect_max_retries + 1):
114+
try:
115+
raw_filter_lists = await self.bot.api_client.get("bot/filter/filter_lists")
116+
break
117+
except Exception as error:
118+
is_retryable = is_retryable_api_error(error)
119+
is_last_attempt = attempt == URLs.connect_max_retries
120+
121+
if not is_retryable:
122+
raise
123+
124+
if is_last_attempt:
125+
log.exception("Failed to load filtering data after %d attempts.", URLs.connect_max_retries)
126+
raise
127+
128+
backoff_seconds = URLs.connect_initial_backoff * (2 ** (attempt - 1))
129+
log.warning(
130+
"Failed to load filtering data (attempt %d/%d). Retrying in %d second(s): %s",
131+
attempt,
132+
URLs.connect_max_retries,
133+
backoff_seconds,
134+
error
135+
)
136+
await asyncio.sleep(backoff_seconds)
137+
112138
example_list = None
113139
for raw_filter_list in raw_filter_lists:
114140
loaded_list = self._load_raw_filter_list(raw_filter_list)

bot/exts/info/python_news.py

Lines changed: 42 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import re
23
import typing as t
34
from datetime import UTC, datetime, timedelta
@@ -12,7 +13,9 @@
1213

1314
from bot import constants
1415
from bot.bot import Bot
16+
from bot.constants import URLs
1517
from bot.log import get_logger
18+
from bot.utils.retry import is_retryable_api_error
1619
from bot.utils.webhooks import send_webhook
1720

1821
PEPS_RSS_URL = "https://peps.python.org/peps.rss"
@@ -46,19 +49,45 @@ def __init__(self, bot: Bot):
4649

4750
async def cog_load(self) -> None:
4851
"""Load all existing seen items from db and create any missing mailing lists."""
49-
with sentry_sdk.start_span(description="Fetch mailing lists from site"):
50-
response = await self.bot.api_client.get("bot/mailing-lists")
51-
52-
for mailing_list in response:
53-
self.seen_items[mailing_list["name"]] = set(mailing_list["seen_items"])
54-
55-
with sentry_sdk.start_span(description="Update site with new mailing lists"):
56-
for mailing_list in ("pep", *constants.PythonNews.mail_lists):
57-
if mailing_list not in self.seen_items:
58-
await self.bot.api_client.post("bot/mailing-lists", json={"name": mailing_list})
59-
self.seen_items[mailing_list] = set()
60-
61-
self.fetch_new_media.start()
52+
for attempt in range(1, URLs.connect_max_retries + 1):
53+
try:
54+
with sentry_sdk.start_span(description="Fetch mailing lists from site"):
55+
response = await self.bot.api_client.get("bot/mailing-lists")
56+
57+
# Rebuild state on each successful fetch (avoid partial state across retries)
58+
self.seen_items = {}
59+
for mailing_list in response:
60+
self.seen_items[mailing_list["name"]] = set(mailing_list["seen_items"])
61+
62+
with sentry_sdk.start_span(description="Update site with new mailing lists"):
63+
for mailing_list in ("pep", *constants.PythonNews.mail_lists):
64+
if mailing_list not in self.seen_items:
65+
await self.bot.api_client.post("bot/mailing-lists", json={"name": mailing_list})
66+
self.seen_items[mailing_list] = set()
67+
68+
self.fetch_new_media.start()
69+
return
70+
71+
except Exception as error:
72+
if not is_retryable_api_error(error):
73+
raise
74+
75+
if attempt == URLs.connect_max_retries:
76+
log.exception(
77+
"Failed to load PythonNews mailing lists after %d attempt(s).",
78+
URLs.connect_max_retries,
79+
)
80+
raise
81+
82+
backoff_seconds = URLs.connect_initial_backoff * (2 ** (attempt - 1))
83+
log.warning(
84+
"Failed to load PythonNews mailing lists (attempt %d/%d). Retrying in %d second(s). Error: %s",
85+
attempt,
86+
URLs.connect_max_retries,
87+
backoff_seconds,
88+
error,
89+
)
90+
await asyncio.sleep(backoff_seconds)
6291

6392
async def cog_unload(self) -> None:
6493
"""Stop news posting tasks on cog unload."""

bot/exts/moderation/infraction/superstarify.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import json
23
import random
34
import textwrap
@@ -10,13 +11,15 @@
1011

1112
from bot import constants
1213
from bot.bot import Bot
14+
from bot.constants import URLs
1315
from bot.converters import Duration, DurationOrExpiry
1416
from bot.decorators import ensure_future_timestamp
1517
from bot.exts.moderation.infraction import _utils
1618
from bot.exts.moderation.infraction._scheduler import InfractionScheduler
1719
from bot.log import get_logger
1820
from bot.utils import time
1921
from bot.utils.messages import format_user
22+
from bot.utils.retry import is_retryable_api_error
2023

2124
log = get_logger(__name__)
2225
NICKNAME_POLICY_URL = "https://pythondiscord.com/pages/rules/#nickname-policy"
@@ -43,9 +46,7 @@ async def on_member_update(self, before: Member, after: Member) -> None:
4346
f"{after.display_name}. Checking if the user is in superstar-prison..."
4447
)
4548

46-
active_superstarifies = await self.bot.api_client.get(
47-
"bot/infractions",
48-
params={
49+
active_superstarifies = await self._fetch_with_retries(params={
4950
"active": "true",
5051
"type": "superstar",
5152
"user__id": str(before.id)
@@ -84,9 +85,7 @@ async def on_member_update(self, before: Member, after: Member) -> None:
8485
@Cog.listener()
8586
async def on_member_join(self, member: Member) -> None:
8687
"""Reapply active superstar infractions for returning members."""
87-
active_superstarifies = await self.bot.api_client.get(
88-
"bot/infractions",
89-
params={
88+
active_superstarifies = await self._fetch_with_retries(params={
9089
"active": "true",
9190
"type": "superstar",
9291
"user__id": member.id
@@ -238,6 +237,22 @@ async def cog_check(self, ctx: Context) -> bool:
238237
"""Only allow moderators to invoke the commands in this cog."""
239238
return await has_any_role(*constants.MODERATION_ROLES).predicate(ctx)
240239

240+
async def _fetch_with_retries(self,
241+
retries: int = URLs.connect_max_retries,
242+
params: dict[str, str] | None = None) -> list[dict]:
243+
"""Fetch infractions from the API with retries and exponential backoff."""
244+
if retries < 1:
245+
raise ValueError("retries must be at least 1")
246+
247+
for attempt in range(1, retries + 1):
248+
try:
249+
return await self.bot.api_client.get("bot/infractions", params=params)
250+
except Exception as e:
251+
if attempt == retries or not is_retryable_api_error(e):
252+
raise
253+
await asyncio.sleep(URLs.connect_initial_backoff * (2 ** (attempt - 1)))
254+
return None
255+
241256

242257
async def setup(bot: Bot) -> None:
243258
"""Load the Superstarify cog."""

bot/exts/utils/reminders.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import random
23
import textwrap
34
import typing as t
@@ -23,6 +24,7 @@
2324
POSITIVE_REPLIES,
2425
Roles,
2526
STAFF_AND_COMMUNITY_ROLES,
27+
URLs,
2628
)
2729
from bot.converters import Duration, UnambiguousUser
2830
from bot.errors import LockedResourceError
@@ -224,13 +226,25 @@ async def cog_unload(self) -> None:
224226
async def cog_load(self) -> None:
225227
"""Get all current reminders from the API and reschedule them."""
226228
await self.bot.wait_until_guild_available()
227-
response = await self.bot.api_client.get(
228-
"bot/reminders",
229-
params={"active": "true"}
230-
)
231-
229+
# retry fetching reminders with exponential backoff
230+
for attempt in range(1, URLs.connect_max_retries + 1):
231+
try:
232+
# response either throws, or is a list of reminders (possibly empty)
233+
response = await self.bot.api_client.get(
234+
"bot/reminders",
235+
params={"active": "true"}
236+
)
237+
break
238+
except Exception as e:
239+
if not self._check_error_is_retriable(e):
240+
log.error(f"Failed to load reminders due to non-retryable error: {e}")
241+
raise
242+
log.warning(f"Attempt {attempt} - Failed to fetch reminders from the API: {e}")
243+
if attempt == URLs.connect_max_retries:
244+
log.error("Max retry attempts reached. Failed to load reminders.")
245+
raise
246+
await asyncio.sleep(URLs.connect_initial_backoff * (2 ** (attempt - 1))) # Exponential backoff
232247
now = datetime.now(UTC)
233-
234248
for reminder in response:
235249
is_valid, *_ = self.ensure_valid_reminder(reminder)
236250
if not is_valid:
@@ -244,6 +258,13 @@ async def cog_load(self) -> None:
244258
else:
245259
self.schedule_reminder(reminder)
246260

261+
def _check_error_is_retriable(self, error: Exception) -> bool:
262+
"""Return whether loading filter lists failed due to some temporary error, thus retrying could help."""
263+
if isinstance(error, ResponseCodeError):
264+
return error.status in (408, 429) or error.status >= 500
265+
266+
return isinstance(error, (TimeoutError, OSError))
267+
247268
def ensure_valid_reminder(self, reminder: dict) -> tuple[bool, discord.TextChannel]:
248269
"""Ensure reminder channel can be fetched otherwise delete the reminder."""
249270
channel = self.bot.get_channel(reminder["channel_id"])

bot/utils/retry.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from pydis_core.site_api import ResponseCodeError
2+
3+
4+
def is_retryable_api_error(error: Exception) -> bool:
5+
"""Return whether an API error is temporary and worth retrying."""
6+
if isinstance(error, ResponseCodeError):
7+
return error.status in (408, 429) or error.status >= 500
8+
9+
return isinstance(error, (TimeoutError, OSError))

0 commit comments

Comments
 (0)