diff --git a/.envs/.local/.django b/.envs/.local/.django index 6e3912153..552cc3eea 100755 --- a/.envs/.local/.django +++ b/.envs/.local/.django @@ -19,4 +19,16 @@ CELERY_FLOWER_PASSWORD=QgScyefPrYhHgO6onW61u0nazc5xdBuP4sM7jMRrBBFuA2RjsFhZLp7xb FETCH_DATA_TIMEOUT=2 DJANGO_PROFILING_ENABLED=True -RUN_ASYNC=False \ No newline at end of file +RUN_ASYNC=False + +# OpenSearch logging +# ------------------------------------------------------------------------------ +# Set ``USE_OPENSEARCH_LOGGING=yes`` to ship logs to the local ``opensearch`` +# service defined in ``local.yml``. The OpenSearch Dashboards UI is exposed +# at http://localhost:5609. Index name is composed automatically as +# ``core-logs-dev-YYYY.MM.DD``. Leave empty / unset to keep console-only +# logging (default behavior). +USE_OPENSEARCH_LOGGING=yes +# OPENSEARCH_LOGGING_HOSTS=http://opensearch:9200 +# OPENSEARCH_LOGGING_ENVIRONMENT=dev +# OPENSEARCH_LOGGING_LEVEL=INFO diff --git a/config/logging_handlers.py b/config/logging_handlers.py new file mode 100644 index 000000000..4f6d2ef99 --- /dev/null +++ b/config/logging_handlers.py @@ -0,0 +1,307 @@ +""" +Custom logging handlers for the SciELO Content Manager. + +This module provides an OpenSearch logging handler that ships log records +to an OpenSearch cluster. It is intentionally tolerant of failures: any +error while contacting OpenSearch is suppressed (and reported to +``sys.stderr`` via ``logging.Handler.handleError``) so that logging never +disrupts the application. + +Activation is controlled by the project's ``LOGGING`` configuration; when +OpenSearch is not enabled, loggers fall back to the console handler. + +Usage from application code +--------------------------- + +The handler is wired in ``config/settings/*.py`` and attached to the root +and ``django`` loggers automatically when enabled, so application code +just uses the standard :mod:`logging` API. Both plain messages and +structured fields are supported: + +.. code-block:: python + + import logging + + logger = logging.getLogger(__name__) + + # Plain message — indexed with the default fields (@timestamp, level, + # logger, message, module, func_name, line_no, host, service, + # environment, ...). + logger.info("user logged in") + + # Message with positional arguments (Python's standard interpolation). + logger.warning("slow query took %.2fs on table=%s", 1.23, "article") + + # Structured/contextual fields via ``extra={...}``. Any keys passed in + # ``extra`` that are not standard ``LogRecord`` attributes are added + # as top-level fields on the OpenSearch document, which makes them + # filterable in dashboards (e.g. by ``user_id`` or ``request_id``). + logger.info( + "imported article", + extra={ + "user_id": user.id, + "article_pid": article.pid, + "request_id": request_id, + "duration_ms": duration_ms, + }, + ) + + # Exceptions are captured automatically with a full traceback when + # ``exc_info=True`` (or when using ``logger.exception(...)`` inside + # an ``except`` block). + try: + do_something() + except Exception: + logger.exception( + "import failed", + extra={"article_pid": pid, "stage": "xml_parse"}, + ) + +The ``service`` and ``environment`` fields (configured globally via +``OPENSEARCH_LOGGING_ENVIRONMENT`` in settings) are merged into every +document, so each environment writes to a clearly identified, separate +index (e.g. ``core-logs-prod-YYYY.MM.DD`` vs ``core-logs-dev-YYYY.MM.DD``) +and also carries the ``environment`` field inside the document itself. +""" +from __future__ import annotations + +import atexit +import logging +import os +import queue +import socket +import threading +from datetime import datetime, timezone +from typing import Any, Iterable, Optional + + +class OpenSearchLogHandler(logging.Handler): + """A non-blocking logging handler that ships records to OpenSearch. + + Records are placed on an in-memory queue and shipped by a background + daemon thread using ``opensearch-py``. The handler never raises: + connection or indexing errors are routed through + :meth:`logging.Handler.handleError` so the application is never + impacted by logging failures. + + Parameters + ---------- + hosts: + Iterable of hosts (strings such as ``"https://os.example.org:9200"`` + or dicts accepted by :class:`opensearchpy.OpenSearch`). When empty + or ``None`` the handler is a no-op. + index: + Index name (or index name prefix when ``index_date_format`` is + set) used to store the log documents. + index_date_format: + Optional ``strftime`` format string. When set, the daily/monthly + index name is built as + ``f"{index}-{datetime.now(timezone.utc):format}"``. + http_auth: + Optional ``(user, password)`` tuple used for basic auth. + use_ssl: + Whether to use HTTPS (default ``True``). + verify_certs: + Whether to verify TLS certificates (default ``True``). + extra_fields: + Optional mapping merged into every log document (useful for + environment / service tags). + queue_size: + Maximum number of buffered records. Older records are dropped + when the queue is full to protect application memory. + level: + Standard ``logging`` level threshold. + """ + + def __init__( + self, + hosts: Optional[Iterable[Any]] = None, + index: str = "scielo-core-logs", + index_date_format: Optional[str] = "%Y.%m.%d", + http_auth: Optional[tuple] = None, + use_ssl: bool = True, + verify_certs: bool = True, + extra_fields: Optional[dict] = None, + queue_size: int = 10_000, + level: int = logging.NOTSET, + ) -> None: + super().__init__(level=level) + self._hosts = list(hosts) if hosts else [] + self._index = index + self._index_date_format = index_date_format + self._http_auth = tuple(http_auth) if http_auth else None + self._use_ssl = use_ssl + self._verify_certs = verify_certs + self._extra_fields = dict(extra_fields or {}) + self._hostname = socket.gethostname() + self._queue_size = queue_size + self._pid = os.getpid() + + self._queue: "queue.Queue[Optional[logging.LogRecord]]" = queue.Queue( + maxsize=queue_size + ) + self._client = None + self._client_lock = threading.Lock() + self._worker: Optional[threading.Thread] = None + self._stop_event = threading.Event() + + if self._hosts: + self._start_worker() + atexit.register(self.close) + + # ------------------------------------------------------------------ + # Worker / client lifecycle + # ------------------------------------------------------------------ + def _start_worker(self) -> None: + self._worker = threading.Thread( + target=self._run, + name="OpenSearchLogHandler", + daemon=True, + ) + self._worker.start() + + def _get_client(self): + if self._client is not None: + return self._client + with self._client_lock: + if self._client is None: + # Imported lazily so that environments without opensearch-py + # installed can still import this module (the handler will + # simply be inactive when no hosts are configured). + from opensearchpy import OpenSearch # type: ignore + + self._client = OpenSearch( + hosts=self._hosts, + http_auth=self._http_auth, + use_ssl=self._use_ssl, + verify_certs=self._verify_certs, + ssl_show_warn=self._verify_certs, + ) + return self._client + + def _index_name(self) -> str: + if not self._index_date_format: + return self._index + suffix = datetime.now(timezone.utc).strftime(self._index_date_format) + return f"{self._index}-{suffix}" + + # ------------------------------------------------------------------ + # logging.Handler API + # ------------------------------------------------------------------ + def _ensure_runtime(self) -> None: + """Reinitialize runtime objects when used after a process fork.""" + current_pid = os.getpid() + worker_alive = self._worker is not None and self._worker.is_alive() + + if current_pid == self._pid and worker_alive: + return + + self._pid = current_pid + self._client = None + self._stop_event = threading.Event() + self._queue = queue.Queue(maxsize=self._queue_size) + self._worker = None + if self._hosts: + self._start_worker() + + def emit(self, record: logging.LogRecord) -> None: # noqa: D401 + if not self._hosts: + return + try: + self._ensure_runtime() + self._queue.put_nowait(record) + except queue.Full: + # Drop the record rather than block the application thread. + pass + except Exception: + self.handleError(record) + + def _run(self) -> None: + while not self._stop_event.is_set(): + try: + record = self._queue.get(timeout=1.0) + except queue.Empty: + continue + if record is None: # sentinel from close() + self._queue.task_done() + break + try: + self._ship(record) + except Exception: + self.handleError(record) + finally: + self._queue.task_done() + + def _ship(self, record: logging.LogRecord) -> None: + client = self._get_client() + document = self._build_document(record) + client.index(index=self._index_name(), body=document) + + # The standard attributes set by ``logging.LogRecord.__init__``. Any + # attribute on the record that is *not* in this set has been supplied + # by the caller via ``extra={...}`` and is therefore promoted to a + # top-level field on the OpenSearch document. + _RESERVED_RECORD_ATTRS = frozenset( + { + "name", "msg", "args", "levelname", "levelno", "pathname", + "filename", "module", "exc_info", "exc_text", "stack_info", + "lineno", "funcName", "created", "msecs", "relativeCreated", + "thread", "threadName", "processName", "process", "message", + "asctime", "taskName", + } + ) + + def _build_document(self, record: logging.LogRecord) -> dict: + try: + message = record.getMessage() + except Exception: + message = record.msg if isinstance(record.msg, str) else repr(record.msg) + document: dict = { + "@timestamp": datetime.fromtimestamp( + record.created, tz=timezone.utc + ).isoformat(), + "level": record.levelname, + "logger": record.name, + "message": message, + "module": record.module, + "func_name": record.funcName, + "line_no": record.lineno, + "process": record.process, + "thread": record.thread, + "host": self._hostname, + } + if record.exc_info: + document["exception"] = self.format( + logging.LogRecord( + name=record.name, + level=record.levelno, + pathname=record.pathname, + lineno=record.lineno, + msg="", + args=None, + exc_info=record.exc_info, + ) + ) + # Promote any caller-supplied ``extra={...}`` keys to top-level + # fields so they become filterable/searchable in OpenSearch. + for key, value in record.__dict__.items(): + if key in self._RESERVED_RECORD_ATTRS or key.startswith("_"): + continue + document.setdefault(key, value) + if self._extra_fields: + for key, value in self._extra_fields.items(): + document.setdefault(key, value) + return document + + def close(self) -> None: + if self._stop_event.is_set(): + return + self._stop_event.set() + try: + # Sentinel to wake the worker if it's blocked on the queue. + self._queue.put_nowait(None) + except queue.Full: + pass + if self._worker is not None and self._worker.is_alive(): + self._worker.join(timeout=2.0) + super().close() diff --git a/config/settings/base.py b/config/settings/base.py index 78c59ea37..b42087fc2 100644 --- a/config/settings/base.py +++ b/config/settings/base.py @@ -318,6 +318,62 @@ # more details on how to customize your logging configuration. logs_path = ROOT_DIR / "logs" logs_path.mkdir(parents=True, exist_ok=True) + +# OpenSearch logging +# ------------------------------------------------------------------------------ +# When ``USE_OPENSEARCH_LOGGING=True`` and at least one host is configured, +# log records are shipped asynchronously to the OpenSearch cluster by +# :class:`config.logging_handlers.OpenSearchLogHandler`. Otherwise the +# application logs only to the console (which is collected by the platform's +# stdout/stderr pipeline). The application never emails errors. +# +# The ``OPENSEARCH_LOGGING_ENVIRONMENT`` setting (e.g. ``"prod"``, ``"hml"``, +# ``"dev"``) is used both to compose the index name (e.g. +# ``core-logs-prod-2026.04.28``) and as a top-level field on every document, +# so logs from different environments are kept in separate indexes *and* +# remain semantically tagged inside the document. ``production.py`` and +# ``local.py`` override this default with the appropriate value for each +# environment. +USE_OPENSEARCH_LOGGING = env.bool("USE_OPENSEARCH_LOGGING", default=False) +OPENSEARCH_LOGGING_HOSTS = env.list("OPENSEARCH_LOGGING_HOSTS", default=[]) +OPENSEARCH_LOGGING_ENVIRONMENT = env.str( + "OPENSEARCH_LOGGING_ENVIRONMENT", default="dev" +) +# Base index name (without environment / date suffixes). +OPENSEARCH_LOGGING_INDEX_BASE = env.str( + "OPENSEARCH_LOGGING_INDEX_BASE", default="core-logs" +) +# Final index prefix is composed from the base + environment, e.g. +# ``core-logs-prod`` or ``core-logs-dev``. When ``OPENSEARCH_LOGGING_INDEX`` +# is set explicitly via env, it overrides the composed value (escape hatch). +OPENSEARCH_LOGGING_INDEX = env.str( + "OPENSEARCH_LOGGING_INDEX", + default=f"{OPENSEARCH_LOGGING_INDEX_BASE}-{OPENSEARCH_LOGGING_ENVIRONMENT}", +) +OPENSEARCH_LOGGING_INDEX_DATE_FORMAT = env.str( + "OPENSEARCH_LOGGING_INDEX_DATE_FORMAT", default="%Y.%m.%d" +) +OPENSEARCH_LOGGING_USER = env.str("OPENSEARCH_LOGGING_USER", default="") +OPENSEARCH_LOGGING_PASSWORD = env.str("OPENSEARCH_LOGGING_PASSWORD", default="") +OPENSEARCH_LOGGING_USE_SSL = env.bool("OPENSEARCH_LOGGING_USE_SSL", default=True) +OPENSEARCH_LOGGING_VERIFY_CERTS = env.bool( + "OPENSEARCH_LOGGING_VERIFY_CERTS", default=True +) +OPENSEARCH_LOGGING_LEVEL = env.str("OPENSEARCH_LOGGING_LEVEL", default="INFO") + +_opensearch_handler_enabled = bool(USE_OPENSEARCH_LOGGING and OPENSEARCH_LOGGING_HOSTS) +_opensearch_http_auth = ( + [OPENSEARCH_LOGGING_USER, OPENSEARCH_LOGGING_PASSWORD] + if OPENSEARCH_LOGGING_USER + else None +) +# Default handler list used by the root and "django" loggers. The console +# handler is always present so logs remain available locally; the +# ``opensearch`` handler is appended only when enabled. +_default_log_handlers = ["console"] + ( + ["opensearch"] if _opensearch_handler_enabled else [] +) + LOGGING = { "version": 1, "disable_existing_loggers": False, @@ -346,6 +402,36 @@ "formatter": "simple", "encoding": "utf-8", }, + # Override Django's default AdminEmailHandler (registered in + # django.utils.log.DEFAULT_LOGGING) so that ERROR-level log records + # from the "django" logger (e.g. django.request 500s) are not emailed + # to ADMINS. The application should not report errors via email. + "mail_admins": { + "level": "ERROR", + "class": "logging.NullHandler", + }, + # OpenSearch handler. When ``USE_OPENSEARCH_LOGGING`` is False or no + # hosts are configured the handler is initialised with an empty host + # list and becomes a no-op (and is not attached to any logger). + "opensearch": { + "level": OPENSEARCH_LOGGING_LEVEL, + "class": "config.logging_handlers.OpenSearchLogHandler", + "hosts": OPENSEARCH_LOGGING_HOSTS if _opensearch_handler_enabled else [], + "index": OPENSEARCH_LOGGING_INDEX, + "index_date_format": OPENSEARCH_LOGGING_INDEX_DATE_FORMAT, + "http_auth": _opensearch_http_auth, + "use_ssl": OPENSEARCH_LOGGING_USE_SSL, + "verify_certs": OPENSEARCH_LOGGING_VERIFY_CERTS, + # Global fields merged into every document. Per-call fields can + # be added from application code via ``logger.info(msg, + # extra={"user_id": ..., "request_id": ...})`` — they are + # promoted to top-level fields on the OpenSearch document by + # :class:`config.logging_handlers.OpenSearchLogHandler`. + "extra_fields": { + "service": "scielo-core", + "environment": OPENSEARCH_LOGGING_ENVIRONMENT, + }, + }, }, "loggers": { "profiling": { # <-- Logger usado pelo decorador @@ -353,8 +439,45 @@ "level": "DEBUG", "propagate": False, }, + # Celery runtime/task lifecycle logs. ``celery.app.trace`` is where + # task execution failures are reported (e.g. "Task ... raised ..."). + "celery": { + "handlers": _default_log_handlers, + "level": "INFO", + "propagate": False, + }, + "celery.app.trace": { + "handlers": _default_log_handlers, + "level": "ERROR", + "propagate": False, + }, + "celery.worker": { + "handlers": _default_log_handlers, + "level": "INFO", + "propagate": False, + }, + # Override Django's default "django" logger (defined in + # django.utils.log.DEFAULT_LOGGING) so that the AdminEmailHandler + # attached to it is removed. Without this explicit override the + # handler is preserved (disable_existing_loggers=False) and any + # ERROR logged by django.request would be emailed to ADMINS. + "django": { + "handlers": _default_log_handlers, + "level": "INFO", + "propagate": False, + }, + "opensearch": { + "handlers": ["console"], # ou [] se preferir silêncio total + "level": "INFO", + "propagate": False, + }, + "opensearchpy": { + "handlers": ["console"], + "level": "WARNING", + "propagate": False, + }, }, - "root": {"level": "INFO", "handlers": ["console"]}, + "root": {"level": "INFO", "handlers": _default_log_handlers}, } PROMETHEUS_LATENCY_BUCKETS = (.1, .2, .5, .6, .8, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.5, 9.0, 12.0, 15.0, 20.0, 30.0, float("inf")) PROMETHEUS_EXPORT_MIGRATIONS = env.bool("PROMETHEUS_EXPORT_MIGRATIONS", True) @@ -399,6 +522,9 @@ # https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-task_send_sent_event CELERY_SEND_TASK_SENT_EVENT = True CELERYD_SEND_EVENTS = True +# Keep Django logging handlers (console/opensearch) intact in Celery workers. +# Celery's default is to hijack the root logger, which can drop custom handlers. +CELERY_WORKER_HIJACK_ROOT_LOGGER = False CE_BUCKETS=1,2.5,5,10,30,60,300,600,900,1800 # Tempo em segundos para cancelar uma tarefa se ela não começar. diff --git a/config/settings/local.py b/config/settings/local.py index d79600167..77f7fdbde 100755 --- a/config/settings/local.py +++ b/config/settings/local.py @@ -67,5 +67,58 @@ # http://docs.celeryproject.org/en/latest/userguide/configuration.html#task-eager-propagates CELERY_TASK_EAGER_PROPAGATES = True + +# OpenSearch logging (development) +# ------------------------------------------------------------------------------ +# Re-evaluate OpenSearch environment / index defaults for the development +# environment. ``OPENSEARCH_LOGGING_ENVIRONMENT`` defaults to ``"dev"`` here +# (overridable via env var), and the index name is recomposed so dev logs +# land in a clearly-separated index such as ``core-logs-dev-2026.04.28``. +# +# To exercise the OpenSearch sink locally, set ``USE_OPENSEARCH_LOGGING=yes`` +# in ``.envs/.local/.django`` — the ``opensearch`` service in ``local.yml`` +# will be used as the default host (http://opensearch:9200, no SSL). Leaving +# the flag unset keeps the existing behavior (console-only logging). +OPENSEARCH_LOGGING_ENVIRONMENT = env.str( + "OPENSEARCH_LOGGING_ENVIRONMENT", default="dev" +) +OPENSEARCH_LOGGING_INDEX = env.str( # noqa: F405 + "OPENSEARCH_LOGGING_INDEX", + default=f"{OPENSEARCH_LOGGING_INDEX_BASE}-{OPENSEARCH_LOGGING_ENVIRONMENT}", # noqa: F405 +) +# Reasonable defaults for the local docker-compose ``opensearch`` service: +# unauthenticated, plain HTTP, single node. +if not OPENSEARCH_LOGGING_HOSTS: # noqa: F405 + OPENSEARCH_LOGGING_HOSTS = env.list( + "OPENSEARCH_LOGGING_HOSTS", default=["http://opensearch:9200"] + ) +OPENSEARCH_LOGGING_USE_SSL = env.bool("OPENSEARCH_LOGGING_USE_SSL", default=False) +OPENSEARCH_LOGGING_VERIFY_CERTS = env.bool( + "OPENSEARCH_LOGGING_VERIFY_CERTS", default=False +) + +# Patch the LOGGING dict inherited from base.py so it picks up the +# development-specific OpenSearch settings without rebuilding the whole +# dict. Whether the ``opensearch`` handler is actually attached to the +# loggers is still controlled by ``USE_OPENSEARCH_LOGGING`` (computed in +# base.py): unset/false ⇒ console only; true + a host ⇒ console + opensearch. +_opensearch_handler_enabled = bool(USE_OPENSEARCH_LOGGING and OPENSEARCH_LOGGING_HOSTS) # noqa: F405 +LOGGING["handlers"]["opensearch"].update( # noqa: F405 + { + "hosts": OPENSEARCH_LOGGING_HOSTS if _opensearch_handler_enabled else [], + "index": OPENSEARCH_LOGGING_INDEX, + "use_ssl": OPENSEARCH_LOGGING_USE_SSL, + "verify_certs": OPENSEARCH_LOGGING_VERIFY_CERTS, + } +) +LOGGING["handlers"]["opensearch"]["extra_fields"]["environment"] = ( # noqa: F405 + OPENSEARCH_LOGGING_ENVIRONMENT +) +_default_log_handlers = ["console"] + ( + ["opensearch"] if _opensearch_handler_enabled else [] +) +LOGGING["root"]["handlers"] = _default_log_handlers # noqa: F405 +LOGGING["loggers"]["django"]["handlers"] = _default_log_handlers # noqa: F405 + # Your stuff... # ------------------------------------------------------------------------------ diff --git a/config/settings/production.py b/config/settings/production.py index ff7569d10..ab0e48754 100755 --- a/config/settings/production.py +++ b/config/settings/production.py @@ -162,12 +162,46 @@ # See https://docs.djangoproject.com/en/dev/topics/logging for # more details on how to customize your logging configuration. +# Re-evaluate OpenSearch environment / index defaults for production. The +# environment is "prod" by default here (overridable via env var), and the +# index name is recomposed from the base + environment so that production +# logs land in a clearly-separated index such as +# ``core-logs-prod-2026.04.28``. +OPENSEARCH_LOGGING_ENVIRONMENT = env.str( + "OPENSEARCH_LOGGING_ENVIRONMENT", default="prod" +) +OPENSEARCH_LOGGING_INDEX = env.str( + "OPENSEARCH_LOGGING_INDEX", + default=f"{OPENSEARCH_LOGGING_INDEX_BASE}-{OPENSEARCH_LOGGING_ENVIRONMENT}", # noqa: F405 +) +# Patch the LOGGING dict inherited from base.py so the non-Sentry +# production path also uses the production-specific index/environment. +LOGGING["handlers"]["opensearch"]["index"] = OPENSEARCH_LOGGING_INDEX # noqa: F405 +LOGGING["handlers"]["opensearch"]["extra_fields"]["environment"] = ( # noqa: F405 + OPENSEARCH_LOGGING_ENVIRONMENT +) + if env.bool("USE_SENTRY", default=False): import sentry_sdk from sentry_sdk.integrations.celery import CeleryIntegration from sentry_sdk.integrations.django import DjangoIntegration from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.redis import RedisIntegration + + # Recompute private helpers locally — names starting with ``_`` are not + # exported by ``from .base import *``. + _opensearch_handler_enabled = bool( + USE_OPENSEARCH_LOGGING and OPENSEARCH_LOGGING_HOSTS # noqa: F405 + ) + _opensearch_http_auth = ( + [OPENSEARCH_LOGGING_USER, OPENSEARCH_LOGGING_PASSWORD] # noqa: F405 + if OPENSEARCH_LOGGING_USER # noqa: F405 + else None + ) + _prod_default_handlers = ["console"] + ( + ["opensearch"] if _opensearch_handler_enabled else [] + ) + LOGGING = { "version": 1, "disable_existing_loggers": True, @@ -196,24 +230,74 @@ "formatter": "simple", "encoding": "utf-8", }, + # Override Django's default AdminEmailHandler (registered in + # django.utils.log.DEFAULT_LOGGING) so that ERROR-level log + # records from the "django" logger (e.g. django.request 500s) + # are not emailed to ADMINS. The application should not report + # errors via email; errors are reported via Sentry instead. + "mail_admins": { + "level": "ERROR", + "class": "logging.NullHandler", + }, + # OpenSearch handler. Inactive (no-op) unless + # ``USE_OPENSEARCH_LOGGING=True`` and at least one host is set. + "opensearch": { + "level": OPENSEARCH_LOGGING_LEVEL, # noqa: F405 + "class": "config.logging_handlers.OpenSearchLogHandler", + "hosts": ( + OPENSEARCH_LOGGING_HOSTS # noqa: F405 + if _opensearch_handler_enabled # noqa: F405 + else [] + ), + "index": OPENSEARCH_LOGGING_INDEX, # noqa: F405 + "index_date_format": OPENSEARCH_LOGGING_INDEX_DATE_FORMAT, # noqa: F405 + "http_auth": _opensearch_http_auth, # noqa: F405 + "use_ssl": OPENSEARCH_LOGGING_USE_SSL, # noqa: F405 + "verify_certs": OPENSEARCH_LOGGING_VERIFY_CERTS, # noqa: F405 + "extra_fields": { + "service": "scielo-core", + "environment": OPENSEARCH_LOGGING_ENVIRONMENT, # noqa: F405 + }, + }, }, - "root": {"level": "INFO", "handlers": ["console"]}, + "root": {"level": "INFO", "handlers": _prod_default_handlers}, "loggers": { "django.db.backends": { "level": "ERROR", - "handlers": ["console"], + "handlers": _prod_default_handlers, + "propagate": False, + }, + # Celery runtime/task lifecycle logs. ``celery.app.trace`` emits + # execution failures from task bodies. + "celery": { + "level": "INFO", + "handlers": _prod_default_handlers, + "propagate": False, + }, + "celery.app.trace": { + "level": "ERROR", + "handlers": _prod_default_handlers, + "propagate": False, + }, + "celery.worker": { + "level": "INFO", + "handlers": _prod_default_handlers, "propagate": False, }, # Errors logged by the SDK itself - "sentry_sdk": {"level": "ERROR", "handlers": ["console"], "propagate": False}, + "sentry_sdk": { + "level": "ERROR", + "handlers": _prod_default_handlers, + "propagate": False, + }, "django.security.DisallowedHost": { "level": "ERROR", - "handlers": ["console"], + "handlers": _prod_default_handlers, "propagate": False, }, # Celery Signals "config.celery_signals": { - "handlers": ["console"], + "handlers": _prod_default_handlers, "level": "DEBUG", # ESSENCIAL: para ver a mensagem de depuração "propagate": False, # Não envie para o logger pai (root), para ter controle total }, diff --git a/local.yml b/local.yml index e5bbfdaf2..378444ea9 100755 --- a/local.yml +++ b/local.yml @@ -83,6 +83,45 @@ services: ports: - "6399:6379" + # OpenSearch — sink opcional para os logs estruturados da aplicação. + # Habilite em ``.envs/.local/.django`` com ``USE_OPENSEARCH_LOGGING=yes`` + # para que o handler ``OpenSearchLogHandler`` envie os logs para cá. + # Configurado como single-node, com a camada de segurança desabilitada + # para uso somente em desenvolvimento (NÃO usar em produção). + opensearch: + image: opensearchproject/opensearch:2.15.0 + container_name: scielo_core_local_opensearch + environment: + - cluster.name=scielo-core-local + - node.name=opensearch + - discovery.type=single-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - "DISABLE_INSTALL_DEMO_CONFIG=true" + - "DISABLE_SECURITY_PLUGIN=true" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch_data:/usr/share/opensearch/data + ports: + - "9209:9200" + + opensearch-dashboards: + image: opensearchproject/opensearch-dashboards:2.15.0 + container_name: scielo_core_local_opensearch_dashboards + depends_on: + - opensearch + environment: + - 'OPENSEARCH_HOSTS=["http://opensearch:9200"]' + - "DISABLE_SECURITY_DASHBOARDS_PLUGIN=true" + ports: + - "5609:5601" + celeryworker: <<: *django @@ -115,4 +154,5 @@ services: command: /start-flower volumes: - tmp_csv_data: \ No newline at end of file + tmp_csv_data: + opensearch_data: \ No newline at end of file diff --git a/requirements/base.txt b/requirements/base.txt index 9620db47a..afc93ff4e 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -84,6 +84,9 @@ wagtail-autocomplete==0.12.0 git+https://github.com/django-haystack/django-haystack.git # Django Haystack pysolr>=3.11.0 # https://pypi.org/project/pysolr/ +# OpenSearch (used as an optional logging sink; see config.logging_handlers) +opensearch-py==3.2.0 # https://github.com/opensearch-project/opensearch-py + # packtools # ------------------------------------------------------------------------------ tornado>=6.5.2 # not directly required, pinned by Snyk to avoid a vulnerability