8000 JP-3773: Store logs from calibration pipeline in datamodel by braingram · Pull Request #9211 · spacetelescope/jwst · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

JP-3773: Store logs from calibration pipeline in datamodel #9211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Feb 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/9211.stpipe.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Store step/pipeline log messages in DataModels.
50 changes: 50 additions & 0 deletions jwst/stpipe/_cal_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"""Cal log handling code."""

import getpass
import logging
import re
import socket
import time

_IP_REGEX = re.compile(
r"((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$|^(([a-fA-F]|[a-fA-F][a-fA-F0-9\-]*[a-fA-F0-9])\.)*([A-Fa-f]|[A-Fa-f][A-Fa-f0-9\-]*[A-Fa-f0-9])$|^(?:(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-fA-F]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,1}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,2}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,3}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:[0-9a-fA-F]{1,4})):)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,4}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,5}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,6}(?:(?:[0-9a-fA-F]{1,4})))?::)))))"
)
_HOSTNAME = socket.gethostname()
_USER = getpass.getuser()


def _scrub(msg):
"""
Scrub sensitive information from a message.

Parameters
----------
msg : str
The string to scrub

Returns
-------
scrubbed : str
The scrubbed string
"""
if _USER in msg:
return ""
if _HOSTNAME in msg:
return ""
if re.search(_IP_REGEX, msg):
return ""
return msg


class _ScrubbingFormatter(logging.Formatter):
"""Formatter that removes sensitive information."""

def format(self, record):
return _scrub(super().format(record))


_LOG_FORMATTER = _ScrubbingFormatter(
"%(asctime)s.%(msecs)03d - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S",
)
_LOG_FORMATTER.converter = time.gmtime
23 changes: 20 additions & 3 deletions jwst/stpipe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@

from stdatamodels.jwst.datamodels import JwstDataModel
from stdatamodels.jwst i 8000 mport datamodels
from stpipe import crds_client
from stpipe import Step
from stpipe import Pipeline
from stpipe import crds_client, Step, Pipeline

from jwst import __version_commit__, __version__
from ._cal_logs import _LOG_FORMATTER
from ..lib.suffix import remove_suffix


Expand All @@ -25,6 +24,8 @@
output_ext = string(default='.fits') # Output file type
""" # noqa: E501

_log_records_formatter = _LOG_FORMATTER

@classmethod
def _datamodels_open(cls, init, **kwargs):
return datamodels.open(init, **kwargs)
Expand Down Expand Up @@ -105,6 +106,11 @@
if self.parent is None:
log.info(f"Results used CRDS context: {result.meta.ref_file.crds.context_used}")

if self.class_alias:
if not hasattr(result, "cal_logs"):
result.cal_logs = {}
setattr(result.cal_logs, self.class_alias, self._log_records)

def remove_suffix(self, name):
"""
Remove the suffix if a known suffix is already in name.
Expand Down Expand Up @@ -184,3 +190,14 @@
"Results used CRDS context: "
f"{crds_client.get_context_used(result.crds_observatory)}"
)

if self.class_alias:
if not hasattr(result, "cal_logs"):
result.cal_logs = {}

Check warning on line 196 in jwst/stpipe/core.py

View check run for this annotation

Codecov / codecov/patch

jwst/stpipe/core.py#L196

Added line #L196 was not covered by tests

# remove the step logs as they're captured by the pipeline log
for _, step in self.step_defs.items():
if hasattr(result.cal_logs, step.class_alias):
delattr(result.cal_logs, step.class_alias)

setattr(result.cal_logs, self.class_alias, self._log_records)
26 changes: 26 additions & 0 deletions jwst/stpipe/tests/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,29 @@ class MakeListPipeline(Pipeline):

def process(self, *args): # noqa: D102
return self.make_list.run(*args)


class CalLogsStep(Step):
"""Step for testing cal_logs."""

class_alias = "cal_logs_step"

def process(self, msg): # noqa: D102
from stdatamodels.jwst.datamodels import ImageModel

self.log.info(msg)
return ImageModel()


class CalLogsPipeline(Pipeline):
"""Pipeline for testing cal_logs."""

class_alias = "cal_logs_pipeline"

step_defs = {
"a_step": CalLogsStep,
}

def process(self, msg): # noqa: D102
self.log.info(msg)
return self.a_step.run(msg)
45 changes: 45 additions & 0 deletions jwst/stpipe/tests/test_cal_logs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import pytest

from jwst.stpipe.tests.steps import CalLogsStep, CalLogsPipeline
import jwst.stpipe._cal_logs
from jwst.stpipe._cal_logs import _scrub


_FAKE_HOSTNAME = "my_hostname"
_FAKE_USER = "my_user"


@pytest.fixture(autouse=True)
def dont_want_no_scrubs(monkeypatch):
"""Fake hostname and user for consistent _scrub behavior."""
monkeypatch.setattr(jwst.stpipe._cal_logs, "_HOSTNAME", _FAKE_HOSTNAME)
monkeypatch.setattr(jwst.stpipe._cal_logs, "_USER", _FAKE_USER)
yield


def test_cal_logs_step():
m = CalLogsStep().run("foo")
assert any(("foo" in l for l in m.cal_logs.cal_logs_step))


def test_cal_logs_pipeline():
m = CalLogsPipeline().run("foo")
assert not hasattr(m.cal_logs, "cal_logs_step")
assert any(("foo" in l for l in m.cal_logs.cal_logs_pipeline))


@pytest.mark.parametrize(
"msg, is_empty", [
("2025-02-21T19:16:07.219", False), # our timestamp
(_FAKE_HOSTNAME, True),
(_FAKE_USER, True),
(f" something from {_FAKE_USER}", True),
("123.42.26.1", True),
("123.42.26", False),
("2001:db8::ff00:42:8329", True),
("2001:db8:4006:812::200e", True),
]
)
def test_scrub(msg, is_empty):
target = "" if is_empty else msg
assert _scrub(msg) == target
Loading
0