From 7b00534deea29bc1318864c3c9caa715b4eb957f Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Tue, 23 May 2023 13:27:41 -0700 Subject: [PATCH 1/6] fix file naming bug --- CHANGELOG.md | 4 ++++ merlin/study/study.py | 8 +++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 15c9e0ffa..b3897a6e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ All notable changes to Merlin will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [unreleased] +### Fixed +- A bug where the .orig, .partial, and .expanded file names were using the study name rather than the original file name + ## [1.10.1] ### Fixed - A bug where assigning a worker all steps also assigned steps to the default worker diff --git a/merlin/study/study.py b/merlin/study/study.py index 3a51c926e..6c7a81c2e 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -100,6 +100,8 @@ def __init__( # pylint: disable=R0913 self.restart_dir = restart_dir + base_name = os.path.basename(filepath) + base_name = base_name.replace(".yaml", "") self.special_vars = { "SPECROOT": self.original_spec.specroot, "MERLIN_TIMESTAMP": self.timestamp, @@ -116,15 +118,15 @@ def __init__( # pylint: disable=R0913 "MERLIN_SAMPLE_NAMES": " ".join(self.get_sample_labels(from_spec=self.original_spec)), "MERLIN_SPEC_ORIGINAL_TEMPLATE": os.path.join( self.info, - self.original_spec.description["name"].replace(" ", "_") + ".orig.yaml", + base_name + ".orig.yaml", ), "MERLIN_SPEC_EXECUTED_RUN": os.path.join( self.info, - self.original_spec.description["name"].replace(" ", "_") + ".partial.yaml", + base_name + ".partial.yaml", ), "MERLIN_SPEC_ARCHIVED_COPY": os.path.join( self.info, - self.original_spec.description["name"].replace(" ", "_") + ".expanded.yaml", + base_name + ".expanded.yaml", ), } From d5c2da26078db37f446fc14a24cb51da6ee8620b Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 24 May 2023 09:14:44 -0700 Subject: [PATCH 2/6] fix filename bug with variable as study name --- merlin/study/study.py | 58 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 30 deletions(-) diff --git a/merlin/study/study.py b/merlin/study/study.py index 6c7a81c2e..abadb80b4 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -83,6 +83,7 @@ def __init__( # pylint: disable=R0913 pgen_file=None, pargs=None, ): + self.filepath = filepath self.original_spec = MerlinSpec.load_specification(filepath) self.override_vars = override_vars error_override_vars(self.override_vars, self.original_spec.path) @@ -100,8 +101,6 @@ def __init__( # pylint: disable=R0913 self.restart_dir = restart_dir - base_name = os.path.basename(filepath) - base_name = base_name.replace(".yaml", "") self.special_vars = { "SPECROOT": self.original_spec.specroot, "MERLIN_TIMESTAMP": self.timestamp, @@ -116,19 +115,8 @@ def __init__( # pylint: disable=R0913 # below will be substituted for sample values on execution "MERLIN_SAMPLE_VECTOR": " ".join([f"$({k})" for k in self.get_sample_labels(from_spec=self.original_spec)]), "MERLIN_SAMPLE_NAMES": " ".join(self.get_sample_labels(from_spec=self.original_spec)), - "MERLIN_SPEC_ORIGINAL_TEMPLATE": os.path.join( - self.info, - base_name + ".orig.yaml", - ), - "MERLIN_SPEC_EXECUTED_RUN": os.path.join( - self.info, - base_name + ".partial.yaml", - ), - "MERLIN_SPEC_ARCHIVED_COPY": os.path.join( - self.info, - base_name + ".expanded.yaml", - ), } + self._set_special_file_vars() self.pgen_file = pgen_file self.pargs = pargs @@ -136,12 +124,28 @@ def __init__( # pylint: disable=R0913 self.dag = None self.load_dag() - def write_original_spec(self, filename): + def _set_special_file_vars(self): + """Setter for the orig, partial, and expanded file paths of a study.""" + base_name = os.path.basename(self.filepath) + base_name = base_name.replace(".yaml", "") + self.special_vars["MERLIN_SPEC_ORIGINAL_TEMPLATE"] = os.path.join( + self.info, + base_name + ".orig.yaml", + ) + self.special_vars["MERLIN_SPEC_EXECUTED_RUN"] = os.path.join( + self.info, + base_name + ".partial.yaml", + ) + self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"] = os.path.join( + self.info, + base_name + ".expanded.yaml", + ) + + def write_original_spec(self): """ - Copy the original spec into merlin_info/ as '.orig.yaml'. + Copy the original spec into merlin_info/ as '.orig.yaml'. """ - spec_name = os.path.join(self.info, filename + ".orig.yaml") - shutil.copyfile(self.original_spec.path, spec_name) + shutil.copyfile(self.original_spec.path, self.special_vars["MERLIN_SPEC_ORIGINAL_TEMPLATE"]) def label_clash_error(self): """ @@ -370,10 +374,6 @@ def expanded_spec(self): return self.get_expanded_spec() result = self.get_expanded_spec() - expanded_name = result.description["name"].replace(" ", "_") + ".expanded.yaml" - - # Set expanded filepath - expanded_filepath = os.path.join(self.info, expanded_name) # expand provenance spec filename if contains_token(self.original_spec.name) or contains_shell_ref(self.original_spec.name): @@ -396,8 +396,8 @@ def expanded_spec(self): self.workspace = expanded_workspace self.info = os.path.join(self.workspace, "merlin_info") self.special_vars["MERLIN_INFO"] = self.info + self._set_special_file_vars() - expanded_filepath = os.path.join(self.info, expanded_name) new_spec_text = expand_by_line(result.dump(), MerlinStudy.get_user_vars(result)) result = MerlinSpec.load_spec_from_string(new_spec_text) result = expand_env_vars(result) @@ -415,23 +415,21 @@ def expanded_spec(self): ) # write expanded spec for provenance - with open(expanded_filepath, "w") as f: # pylint: disable=C0103 + with open(self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"], "w") as f: # pylint: disable=C0103 f.write(result.dump()) # write original spec for provenance - result = MerlinSpec.load_spec_from_string(result.dump()) - result.path = expanded_filepath - name = result.description["name"].replace(" ", "_") - self.write_original_spec(name) + self.write_original_spec() # write partially-expanded spec for provenance partial_spec = deepcopy(self.original_spec) + result = MerlinSpec.load_spec_from_string(result.dump()) + result.path = self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"] if "variables" in result.environment: partial_spec.environment["variables"] = result.environment["variables"] if "labels" in result.environment: partial_spec.environment["labels"] = result.environment["labels"] - partial_spec_path = os.path.join(self.info, name + ".partial.yaml") - with open(partial_spec_path, "w") as f: # pylint: disable=C0103 + with open(self.special_vars["MERLIN_SPEC_EXECUTED_RUN"], "w") as f: # pylint: disable=C0103 f.write(partial_spec.dump()) LOG.info(f"Study workspace is '{self.workspace}'.") From 8265a3c1cfd183a2da3065bcb85c773738de30a2 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 24 May 2023 09:15:25 -0700 Subject: [PATCH 3/6] add tests for the file name special vars changes --- tests/integration/conditions.py | 12 ++++--- tests/integration/test_definitions.py | 24 ++++++++----- tests/unit/study/test_study.py | 50 +++++++++++++++++++++++++-- 3 files changed, 72 insertions(+), 14 deletions(-) diff --git a/tests/integration/conditions.py b/tests/integration/conditions.py index db21e5429..4da8c36a1 100644 --- a/tests/integration/conditions.py +++ b/tests/integration/conditions.py @@ -249,14 +249,16 @@ class ProvenanceYAMLFileHasRegex(HasRegex): MUST contain a given regular expression. """ - def __init__(self, regex, name, output_path, provenance_type, negate=False): # pylint: disable=R0913 + def __init__(self, regex, spec_file_name, study_name, output_path, provenance_type, negate=False): # pylint: disable=R0913 """ :param `regex`: a string regex pattern - :param `name`: the name of a study + :param `spec_file_name`: the name of the spec file + :param `study_name`: the name of a study :param `output_path`: the $(OUTPUT_PATH) of a study """ super().__init__(regex, negate=negate) - self.name = name + self.spec_file_name = spec_file_name + self.study_name = study_name self.output_path = output_path provenance_types = ["orig", "partial", "expanded"] if provenance_type not in provenance_types: @@ -277,7 +279,9 @@ def glob_string(self): """ Returns a regex string for the glob library to recursively find files with. """ - return f"{self.output_path}/{self.name}" f"_[0-9]*-[0-9]*/merlin_info/{self.name}.{self.prov_type}.yaml" + return ( + f"{self.output_path}/{self.study_name}" f"_[0-9]*-[0-9]*/merlin_info/{self.spec_file_name}.{self.prov_type}.yaml" + ) def is_within(self): # pylint: disable=W0221 """ diff --git a/tests/integration/test_definitions.py b/tests/integration/test_definitions.py index 093644f9f..cf7c008de 100644 --- a/tests/integration/test_definitions.py +++ b/tests/integration/test_definitions.py @@ -435,31 +435,36 @@ def define_tests(): # pylint: disable=R0914,R0915 HasReturnCode(), ProvenanceYAMLFileHasRegex( regex=r"HELLO: \$\(SCRIPTS\)/hello_world.py", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="orig", ), ProvenanceYAMLFileHasRegex( regex=r"name: \$\(NAME\)", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="partial", ), ProvenanceYAMLFileHasRegex( regex="studies/feature_demo_", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="partial", ), ProvenanceYAMLFileHasRegex( regex="name: feature_demo", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="expanded", ), ProvenanceYAMLFileHasRegex( regex=r"\$\(NAME\)", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="expanded", negate=True, @@ -510,13 +515,15 @@ def define_tests(): # pylint: disable=R0914,R0915 "conditions": [ ProvenanceYAMLFileHasRegex( regex=r"\[0.3333333", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="expanded", ), ProvenanceYAMLFileHasRegex( regex=r"\[0.5", - name="feature_demo", + spec_file_name="feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="expanded", negate=True, @@ -715,7 +722,8 @@ def define_tests(): # pylint: disable=R0914,R0915 HasReturnCode(), ProvenanceYAMLFileHasRegex( regex="cli_test_demo_workers:", - name="feature_demo", + spec_file_name="remote_feature_demo", + study_name="feature_demo", output_path=OUTPUT_DIR, provenance_type="expanded", ), diff --git a/tests/unit/study/test_study.py b/tests/unit/study/test_study.py index a00995d55..cb15805cd 100644 --- a/tests/unit/study/test_study.py +++ b/tests/unit/study/test_study.py @@ -41,6 +41,15 @@ nodes: 1 task_queue: hello_queue + - name: test_special_vars + description: test the special vars + run: + cmd: | + echo $(MERLIN_SPEC_ORIGINAL_TEMPLATE) + echo $(MERLIN_SPEC_EXECUTED_RUN) + echo $(MERLIN_SPEC_ARCHIVED_COPY) + task_queue: special_var_queue + global.parameters: X2: values : [0.5] @@ -234,11 +243,16 @@ class TestMerlinStudy(unittest.TestCase): @staticmethod def file_contains_string(f, string): - return string in open(f, "r").read() + result = False + with open(f, "r") as infile: + if string in infile.read(): + result = True + return result def setUp(self): self.tmpdir = tempfile.mkdtemp() - self.merlin_spec_filepath = os.path.join(self.tmpdir, "basic_ensemble.yaml") + self.base_name = "basic_ensemble" + self.merlin_spec_filepath = os.path.join(self.tmpdir, f"{self.base_name}.yaml") with open(self.merlin_spec_filepath, "w+") as _file: _file.write(MERLIN_SPEC) @@ -263,6 +277,34 @@ def test_expanded_spec(self): assert TestMerlinStudy.file_contains_string(self.study.expanded_spec.path, "$PATH") assert not TestMerlinStudy.file_contains_string(self.study.expanded_spec.path, "PATH_VAR: $PATH") + # Special vars are in the second step of MERLIN_SPEC so grab that step here + original_special_var_step = self.study.original_spec.study[1]["run"]["cmd"] + expanded_special_var_step = self.study.expanded_spec.study[1]["run"]["cmd"] + + # Make sure the special filepath variables aren't expanded in the original spec + assert "$(MERLIN_SPEC_ORIGINAL_TEMPLATE)" in original_special_var_step + assert "$(MERLIN_SPEC_EXECUTED_RUN)" in original_special_var_step + assert "$(MERLIN_SPEC_ARCHIVED_COPY)" in original_special_var_step + + # Make sure the special filepath variables aren't left in their variable form in the expanded spec + assert "$(MERLIN_SPEC_ORIGINAL_TEMPLATE)" not in expanded_special_var_step + assert "$(MERLIN_SPEC_EXECUTED_RUN)" not in expanded_special_var_step + assert "$(MERLIN_SPEC_ARCHIVED_COPY)" not in expanded_special_var_step + + # Make sure the special filepath variables we're expanded appropriately in the expanded spec + assert ( + f"{self.base_name}.orig.yaml" in expanded_special_var_step + and "unit_test1.orig.yaml" not in expanded_special_var_step + ) + assert ( + f"{self.base_name}.partial.yaml" in expanded_special_var_step + and "unit_test1.partial.yaml" not in expanded_special_var_step + ) + assert ( + f"{self.base_name}.expanded.yaml" in expanded_special_var_step + and "unit_test1.expanded.yaml" not in expanded_special_var_step + ) + def test_column_label_conflict(self): """ If there is a common key between Maestro's global.parameters and @@ -291,3 +333,7 @@ def test_no_env(self): assert isinstance(study_no_env, MerlinStudy), bad_type_err except Exception as e: assert False, f"Encountered unexpected exception, {e}, for viable MerlinSpec without optional 'env' section." + + +if __name__ == "__main__": + unittest.main() From 64bf0d0cc710e5e66b61b8e842ab2c8eb25b64d0 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 24 May 2023 09:15:41 -0700 Subject: [PATCH 4/6] modify changelog --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3897a6e1..603086aaa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - A bug where the .orig, .partial, and .expanded file names were using the study name rather than the original file name +### Added +- Tests for ensuring `$(MERLIN_SPEC_ORIGINAL_TEMPLATE)`, `$(MERLIN_SPEC_ARCHIVED_COPY)`, and `$(MERLIN_SPEC_EXECUTED_RUN)` are stored correctly + +### Changed +- The ProvenanceYAMLFileHasRegex condition for integration tests now saves the study name and spec file name as attributes instead of just the study name + - This lead to minor changes in 3 tests ("local override feature demo", "local pgen feature demo", and "remote feature demo") with what we pass to this specific condition + ## [1.10.1] ### Fixed - A bug where assigning a worker all steps also assigned steps to the default worker From 8f8d9bf8defd1a9d6b8341f03db50bc9b28b6bed Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 24 May 2023 10:51:34 -0700 Subject: [PATCH 5/6] implement Luc's suggestions --- merlin/study/study.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/merlin/study/study.py b/merlin/study/study.py index abadb80b4..441ede612 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -36,6 +36,7 @@ import time from contextlib import suppress from copy import deepcopy +from pathlib import Path from cached_property import cached_property from maestrowf.datastructures.core import Study @@ -126,7 +127,7 @@ def __init__( # pylint: disable=R0913 def _set_special_file_vars(self): """Setter for the orig, partial, and expanded file paths of a study.""" - base_name = os.path.basename(self.filepath) + base_name = Path(self.filepath).stem base_name = base_name.replace(".yaml", "") self.special_vars["MERLIN_SPEC_ORIGINAL_TEMPLATE"] = os.path.join( self.info, @@ -414,17 +415,16 @@ def expanded_spec(self): os.path.join(self.info, os.path.basename(self.samples_file)), ) - # write expanded spec for provenance + # write expanded spec for provenance and set the path (necessary for testing) with open(self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"], "w") as f: # pylint: disable=C0103 f.write(result.dump()) + result.path = self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"] # write original spec for provenance self.write_original_spec() # write partially-expanded spec for provenance partial_spec = deepcopy(self.original_spec) - result = MerlinSpec.load_spec_from_string(result.dump()) - result.path = self.special_vars["MERLIN_SPEC_ARCHIVED_COPY"] if "variables" in result.environment: partial_spec.environment["variables"] = result.environment["variables"] if "labels" in result.environment: From 33e3abfd6614b9834fa99153a3638fbbb562db14 Mon Sep 17 00:00:00 2001 From: Brian Gunnarson Date: Wed, 24 May 2023 11:15:53 -0700 Subject: [PATCH 6/6] remove replace line --- merlin/study/study.py | 1 - 1 file changed, 1 deletion(-) diff --git a/merlin/study/study.py b/merlin/study/study.py index 441ede612..5446c84a1 100644 --- a/merlin/study/study.py +++ b/merlin/study/study.py @@ -128,7 +128,6 @@ def __init__( # pylint: disable=R0913 def _set_special_file_vars(self): """Setter for the orig, partial, and expanded file paths of a study.""" base_name = Path(self.filepath).stem - base_name = base_name.replace(".yaml", "") self.special_vars["MERLIN_SPEC_ORIGINAL_TEMPLATE"] = os.path.join( self.info, base_name + ".orig.yaml",