8000 [jsonschemagen] Add lifecycle methods to jsonschemagen by sneakers-the-rat · Pull Request #2369 · linkml/linkml · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

[jsonschemagen] Add lifecycle methods to jsonschemagen #2369

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions linkml/generators/common/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
"""

import dataclasses
from abc import abstractmethod
from typing import Any, TypeVar

try:
Expand Down Expand Up @@ -57,11 +56,11 @@ class BuildResult(BaseModel):

model_config = ConfigDict(arbitrary_types_allowed=True)

@abstractmethod
def merge(self, other: T) -> T:
"""
Build results should have some means of merging results of a like kind
"""
raise NotImplementedError("This build result doesn't know how to merge!")


class SchemaResult(BuildResult):
Expand Down
20 changes: 18 additions & 2 deletions linkml/generators/common/lifecycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
TypeDefinition,
)

from linkml.generators.common.build import ClassResult, RangeResult, SchemaResult, SlotResult, TypeResult
from linkml.generators.common.build import ClassResult, EnumResult, RangeResult, SchemaResult, SlotResult, TypeResult
from linkml.generators.common.template import TemplateModel

TSchema = TypeVar("TSchema", bound=SchemaResult)
TClass = TypeVar("TClass", bound=ClassResult)
TSlot = TypeVar("TSlot", bound=SlotResult)
TRange = TypeVar("TRange", bound=RangeResult)
TType = TypeVar("TType", bound=TypeResult)
TEnum = TypeVar("TEnum", bound=EnumDefinition)
TEnum = TypeVar("TEnum", bound=EnumResult)
TTemplate = TypeVar("TTemplate", bound=TemplateModel)


Expand Down Expand Up @@ -93,6 +93,22 @@
def after_generate_slots(self, slot: Iterable[TSlot], sv: SchemaView) -> Iterable[TSlot]:
return slot

def before_generate_class_slot(self, slot: SlotDefinition, cls: ClassDefinition, sv: SchemaView) -> SlotDefinition:
return slot

def after_generate_class_slot(self, slot: TSlot, cls: ClassDefinition, sv: SchemaView) -> TSlot:
return slot

def before_generate_class_slots(
self, slot: Iterable[SlotDefinition], cls: ClassDefinition, sv: SchemaView
) -> Iterable[SlotDefinition]:
return slot

def after_generate_class_slots(
self, slot: Iterable[TSlot], cls: ClassDefinition, sv: SchemaView
) -> Iterable[TSlot]:
return slot

Check warning on line 110 in linkml/generators/common/lifecycle.py

View check run for this annotation

Codecov / codecov/patch

linkml/generators/common/lifecycle.py#L110

Added line #L110 was not covered by tests

def before_generate_type(self, typ: TypeDefinition, sv: SchemaView) -> TypeDefinition:
return typ

Expand Down
76 changes: 71 additions & 5 deletions linkml/generators/jsonschemagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
from linkml_runtime.utils.formatutils import be, camelcase, underscore

from linkml._version import __version__
from linkml.generators.common import build
from linkml.generators.common.lifecycle import LifecycleMixin
from linkml.generators.common.type_designators import get_type_designator_value
from linkml.utils.generator import Generator, shared_arguments

Expand Down Expand Up @@ -177,8 +179,32 @@ def array_of(cls, subschema: "JsonSchema", required: bool = True) -> "JsonSchema
return JsonSchema(schema)


class SchemaResult(build.SchemaResult):
"""Top-level result of building a json schema"""

schema_: JsonSchema


class EnumResult(build.EnumResult):
"""A single built enum"""

schema_: JsonSchema


class ClassResult(build.ClassResult):
"""A single built class"""

schema_: JsonSchema


class SlotResult(build.SlotResult):
"""A slot within the context of a class"""

schema_: JsonSchema


@dataclass
class JsonSchemaGenerator(Generator):
class JsonSchemaGenerator(Generator, LifecycleMixin):
"""
Generates JSONSchema documents from a LinkML SchemaDefinition

Expand All @@ -187,6 +213,21 @@ class JsonSchemaGenerator(Generator):
- Composition not yet implemented
- Enumerations treated as strings
- Foreign key references are treated as semantics-free strings

This generator implements the following :class:`.LifecycleMixin` methods:

* :meth:`.LifecycleMixin.before_generate_schema`
* :meth:`.LifecycleMixin.after_generate_schema`
* :meth:`.LifecycleMixin.before_generate_classes`
* :meth:`.LifecycleMixin.before_generate_enums`
* :meth:`.LifecycleMixin.before_generate_class_slots`
* :meth:`.LifecycleMixin.before_generate_class`
* :meth:`.LifecycleMixin.after_generate_class`
* :meth:`.LifecycleMixin.before_generate_class_slot`
* :meth:`.LifecycleMixin.after_generate_class_slot`
* :meth:`.LifecycleMixin.before_generate_enum`
* :meth:`.LifecycleMixin.after_generate_enum`

"""

# ClassVars
Expand Down Expand Up @@ -233,7 +274,7 @@ def __post_init__(self):
if self.schemaview.get_class(self.top_class) is None:
logger.warning(f"No class in schema named {self.top_class}")

def start_schema(self, inline: bool = False) -> JsonSchema:
def start_schema(self, inline: bool = False):
self.inline = inline

self.top_level_schema = JsonSchema(
Expand All @@ -249,6 +290,8 @@ def start_schema(self, inline: bool = False) -> JsonSchema:
)

def handle_class(self, cls: ClassDefinition) -> None:
cls = self.before_generate_class(cls, self.schemaview)

if cls.mixin or cls.abstract:
return

Expand All @@ -268,7 +311,10 @@ def handle_class(self, cls: ClassDefinition) -> None:
if self.title_from == "title" and cls.title:
class_subschema["title"] = cls.title

for slot_definition in self.schemaview.class_induced_slots(cls.name):
class_slots = self.before_generate_class_slots(
self.schemaview.class_induced_slots(cls.name), cls, self.schemaview
)
for slot_definition in class_slots:
self.handle_class_slot(subschema=class_subschema, cls=cls, slot=slot_definition)

rule_subschemas = []
Expand Down Expand Up @@ -318,6 +364,10 @@ def handle_class(self, cls: ClassDefinition) -> None:
class_subschema["allOf"] = []
class_subschema["allOf"].extend(rule_subschemas)

class_subschema = self.after_generate_class(
ClassResult.model_construct(schema_=class_subschema, source=cls), self.schemaview
).schema_

self.top_level_schema.add_def(cls.name, class_subschema)

if (self.top_class is not None and camelcase(self.top_class) == camelcase(cls.name)) or (
Expand Down Expand Up @@ -375,6 +425,7 @@ def get_subschema_for_anonymous_class(
def handle_enum(self, enum: EnumDefinition) -> None:
# TODO: this only works with explicitly permitted values. It will need to be extended to
# support other pv_formula
enum = self.before_generate_enum(enum, self.schemaview)

def extract_permissible_text(pv):
if isinstance(pv, str):
Expand All @@ -398,6 +449,10 @@ def extract_permissible_text(pv):

if permissible_values_texts:
enum_schema["enum"] = permissible_values_texts

enum_schema = self.after_generate_enum(
EnumResult.model_construct(schema_=enum_schema, source=enum), self.schemaview
).schema_
self.top_level_schema.add_def(enum.name, enum_schema)

def get_type_info_for_slot_subschema(
Expand Down Expand Up @@ -596,6 +651,7 @@ def get_subschema_for_slot(
return prop

def handle_class_slot(self, subschema: JsonSchema, cls: ClassDefinition, slot: SlotDefinition) -> None:
slot = self.before_generate_class_slot(slot, cls, self.schemaview)
class_id_slot = self.schemaview.get_identifier_slot(cls.name, use_key=True)
value_required = (
slot.required or slot == class_id_slot or slot.value_presence == PresenceEnum(PresenceEnum.PRESENT)
Expand All @@ -604,6 +660,9 @@ def handle_class_slot(self, subschema: JsonSchema, cls: ClassDefinition, slot: S

aliased_slot_name = self.aliased_slot_name(slot)
prop = self.get_subschema_for_slot(slot, include_null=self.include_null)
prop = self.after_generate_class_slot(
SlotResult.model_construct(schema_=prop, source=slot), cls, self.schemaview
).schema_
subschema.add_property(
aliased_slot_name, prop, value_required=value_required, value_disallowed=value_disallowed
)
Expand All @@ -613,13 +672,20 @@ def handle_class_slot(self, subschema: JsonSchema, cls: ClassDefinition, slot: S
prop["enum"] = [type_value]

def generate(self) -> JsonSchema:
self.schema = self.before_generate_schema(self.schema, self.schemaview)
self.start_schema()
for enum_definition in self.schemaview.all_enums().values():

all_enums = self.before_generate_enums(self.schemaview.all_enums().values(), self.schemaview)
for enum_definition in all_enums:
self.handle_enum(enum_definition)

for class_definition in self.schemaview.all_classes().values():
all_classes = self.before_generate_classes(self.schemaview.all_classes().values(), self.schemaview)
for class_definition in all_classes:
self.handle_class(class_definition)

self.top_level_schema = self.after_generate_schema(
SchemaResult.model_construct(schema_=self.top_level_schema, source=self.schema), self.schemaview
).schema_
return self.top_level_schema

def serialize(self, **kwargs) -> str:
Expand Down
76 changes: 74 additions & 2 deletions tests/test_generators/test_jsonschemagen.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import json
import logging
from pathlib import Path
from typing import Dict, List, Optional, Union
from typing import Dict, Iterable, List, Optional, Union

import jsonschema
import pytest
import yaml
from linkml_runtime import SchemaView
from linkml_runtime.dumpers import json_dumper
from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.linkml_model import ClassDefinition, SchemaDefinition, SlotDefinition
from linkml_runtime.loaders import yaml_loader

from linkml.generators.jsonschemagen import JsonSchemaGenerator
Expand Down Expand Up @@ -329,6 +330,77 @@ def test_slot_not_required_nullability(input_path, not_closed):
assert {"type": "null"} in prop["anyOf"], f"{key} does not allow null"


def test_lifecycle_classes(kitchen_sink_path):
"""We can modify the generation process by subclassing lifecycle hooks"""

class TestJsonSchemaGen(JsonSchemaGenerator):
def before_generate_classes(self, cls: Iterable[ClassDefinition], sv: SchemaView) -> Iterable[ClassDefinition]:
cls = [c for c in cls]

# delete a class and make sure we don't get it in the output
assert cls[0].name == "activity"
del cls[0]
return cls

def before_generate_class(self, cls: ClassDefinition, sv: SchemaView) -> ClassDefinition:
# change all the descriptions, idk
cls.description = "TEST MODIFYING CLASSES"
return cls

def after_generate_class(self, cls, sv: SchemaView):
# make additionalProperties True
cls.schema_["additionalProperties"] = True
return cls

generator = TestJsonSchemaGen(kitchen_sink_path, mergeimports=True, top_class="Dataset", not_closed=False)
schema = json.loads(generator.serialize())
assert "Activity" not in schema["$defs"]
for cls in schema["$defs"].values():
if "enum" in cls:
continue
assert cls["additionalProperties"]
assert cls["description"] == "TEST MODIFYING CLASSES"


def test_lifecycle_slots(kitchen_sink_path):
"""We can modify the generation process by subclassing lifecycle hooks"""

class TestJsonSchemaGen(JsonSchemaGenerator):
def before_generate_class_slots(
self, slot: Iterable[SlotDefinition], cls, sv: SchemaView
) -> Iterable[SlotDefinition]:
# make a new slot that's the number of slots for some reason
slot = [s for s in slot]
slot.append(SlotDefinition(name="number_of_slots", range="integer", ifabsent=f"integer({len(slot)})"))
return slot

def before_generate_class_slot(self, slot: SlotDefinition, cls, sv: SchemaView) -> SlotDefinition:
slot.description = "TEST MODIFYING SLOTS"
return slot

def after_generate_class_slot(self, slot, cls, sv: SchemaView):
# make em all required
if "type" not in slot.schema_:
slot.schema_["type"] = ["faketype"]
elif isinstance(slot.schema_["type"], list):
slot.schema_["type"].append("faketype")
else:
slot.schema_["type"] = [slot.schema_["type"], "faketype"]

return slot

generator = TestJsonSchemaGen(kitchen_sink_path, mergeimports=True, top_class="Dataset", not_closed=False)
schema = json.loads(generator.serialize())

for cls in schema["$defs"].values():
if "enum" in cls:
continue
assert "number_of_slots" in cls["properties"]
for prop in cls["properties"].values():
assert prop["description"] == "TEST MODIFYING SLOTS"
assert "faketype" in prop["type"]


# **********************************************************
#
# Utility functions
Expand Down
Loading
0