8000 Remove restriction on online_analysis_interval and checkpoint_interval by ianmkenney · Pull Request #779 · choderalab/openmmtools · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Remove restriction on online_analysis_interval and checkpoint_interval #779

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations 8000
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docs/releasehistory.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
Release History
***************

0.24.3
======

Enhancements
------------

- Remove the requirement that the ``online_analysis_interval`` is a multiple of ``checkpoint_interval``
- Issue a logger warning rather than raise a ``ValueError``
- Note that the real time analysis output file may contain redundant information after restoring from checkpoints that would result in the repeated calculation of a specific iteration index

0.24.2 - Numpy 2 support and FIRE minimization improvements
===========================================================

Expand Down
6 changes: 1 addition & 5 deletions openmmtools/multistate/multistatesampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,13 +590,9 @@ def create(self, thermodynamic_states: list, sampler_states, storage,
raise RuntimeError('Storage file {} already exists; cowardly '
'refusing to overwrite.'.format(self._reporter.filepath))

# Make sure online analysis interval is a multiples of the reporter's checkpoint interval
# this avoids having redundant iteration information in the real time yaml files
# only check if self.online_analysis_interval is set
if self.online_analysis_interval:
if self.online_analysis_interval % self._reporter.checkpoint_interval != 0:
raise ValueError(f"Online analysis interval: {self.online_analysis_interval}, must be a "
f"multiple of the checkpoint interval: {self._reporter.checkpoint_interval}")
logger.warning("An online_analysis_interval that is not a multiple of the checkpoint_interval can lead to redundant information in the real time yaml file after recovering from checkpoints.")

# Make sure sampler_states is an iterable of SamplerStates.
if isinstance(sampler_states, states.SamplerState):
Expand Down
73 changes: 73 additions & 0 deletions openmmtools/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -2227,6 +2227,79 @@ def test_real_time_analysis_yaml(self):
len(yaml_contents) == expected_yaml_entries
), "Expected yaml entries do not match the actual number entries in the file."

@pytest.mark.parametrize("n_iterations,online_interval,checkpoint_interval,iterations_first_run", [(15, 3, 5, 11), (15, 3, 5, 3), (10, 2, 2, 3), (10, 2, 2, 4), (10, 2, 2, 2)])
def test_real_time_analysis_yaml_restore(self, n_iterations, online_interval, checkpoint_interval, iterations_first_run):
"""Test that a restored sampler produces the expected output yaml file."""
thermodynamic_states, sampler_states, unsampled_states = copy.deepcopy(
self.alanine_test
)

with self.temporary_storage_path() as storage_path:

# calculated the expected number of entries and checkpoints
expected_yaml_entries = iterations_first_run // online_interval
expected_checkpoint_states = iterations_first_run // checkpoint_interval
expected_yaml_extra = expected_yaml_entries - checkpoint_interval * expected_checkpoint_states // online_interval
expected_yaml_total_at_end = n_iterations // online_interval + expected_yaml_extra

move = mmtools.mcmc.IntegratorMove(
openmm.VerletIntegrator(1.0 * unit.femtosecond), n_steps=1
)

# initialize the original sampler, which we don't complete
# the full set of iterations for
sampler = self.SAMPLER(
mcmc_moves=move,
number_of_iterations=n_iterations,
online_analysis_interval=online_interval,
)

reporter = self.REPORTER(storage_path, checkpoint_interval=checkpoint_interval)
self.call_sampler_create(
sampler,
reporter,
thermodynamic_states,
sampler_states,
unsampled_states,
)

sampler.run(n_iterations=iterations_first_run)

# load file and check number of iterations
storage_dir, reporter_filename = os.path.split(
sampler._reporter._storage_analysis_file_path
)
# remove extension from filename
yaml_prefix = os.path.splitext(reporter_filename)[0]
output_filepath = os.path.join(
storage_dir, f"{yaml_prefix}_real_time_analysis.yaml"
)
with open(output_filepath) as yaml_file:
yaml_contents = yaml.safe_load(yaml_file)

# Make sure we get the correct number of entries
assert len(yaml_contents) == expected_yaml_entries, "Expected yaml entries do not match the actual number entries in the file."

# Remove before restoring
del sampler

# Restore from storage and finish the rest of the
# iterations
sampler = self.SAMPLER.from_storage(reporter)

# Run for remaining iterations, we expect:
# 1) 1 checkpoint to be written
# 2) 3 real time analysis entries written
sampler.run()

# load file and check number of iterations
with open(output_filepath) as yaml_file:
yaml_contents = yaml.safe_load(yaml_file)

# Make sure we get the correct number of entries
assert (
len(yaml_contents) == expected_yaml_total_at_end
), "Expected yaml entries do not match the actual number entries in the file."

def test_real_time_analysis_can_be_none():
"""Test if real time analysis can be done"""
Expand Down
0