-
Notifications
You must be signed in to change notification settings - Fork 0
Normalization #11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
Implementation:from pprint import pprint
from pyld import jsonld
from jsondiff import diff
graph = {
"@context": {
"schema": "http://schema.org/",
"demo": "https://oo-ld.github.io/demo/",
"name": "schema:name",
"full_name": "demo:full_name",
"label": "demo:label",
"works_for": {"@id": "schema:worksFor", "@type": "@id"},
"is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"},
"employes":{"@id": "schema:employes", "@type": "@id"},
"type": "@type",
"id": "@id"
},
"@graph": [
{
"id": "demo:person1",
"type": "schema:Person",
"name": "Person1",
"works_for": "demo:organizationA",
},
{
"id": "demo:person2",
"type": "schema:Person",
"full_name": "Person2",
"is_employed_by": "demo:organizationA"
},
{
"id": "demo:person3",
"type": "schema:Person",
"name": "Person3"
},
{
"id": "demo:organizationA",
"type": "schema:Organization",
"label": "organizationA",
"employes": "demo:person3"
}
]
}
#graph["@graph"] = sorted(graph["@graph"], key=lambda x: x['@id'])
context = {
"schema": "http://schema.org/",
"demo": "https://oo-ld.github.io/demo/",
"skos": "http://www.w3.org/2004/02/skos/core#",
"name": "schema:name",
"name*": "demo:full_name",
#"_demo_full_name": "demo:full_name", # generated
##"label": {"@id": "skos:prefLabel", "@container": "@set", "@language": "en", "@context": {"text": "@value", "lang": "@language"}},
"text": "@value", "lang": "@language",
"label": {"@id": "skos:prefLabel", "@container": "@set"},
"label*": {"@id": "demo:label", "@container": "@set", "@language": "en"},
#"_demo_label": {"@id": "demo:label"},#, "@container": "@set", "@language": "en"}, # generated
"employes": {"@id": "schema:employes", "@type": "@id"},
"employes*": {"@reverse": "schema:worksFor", "@type": "@id"},
#"_schema_worksFor": {"@id": "schema:worksFor", "@type": "@id"}, # generated
"employes**": {"@reverse": "demo:is_employed_by", "@type": "@id"},
#"_demo_is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"}, # generated
"type": "@type",
"id": "@id"
}
temp1 = {}
temp2 = {}
for key, value in context.items():
if key.endswith("*"):
temp1_value = {}
temp2_value = {}
if type(value) is dict:
if "@id" in value: temp1_value["@id"] = value["@id"]
if "@reverse" in value: temp1_value["@id"] = value["@reverse"]
if "@type" in value: temp1_value["@type"] = value["@type"]
temp2_value = {**value}
#if "@id" in value: del temp2_value["@id"]
#if "@reverse" in value: del temp2_value["@reverse"]
else:
temp1_value["@id"] = value
temp2_value["@id"] = value
org_key = key.replace("*", "")
org_value = context[org_key]
if type(org_value) is dict:
if "@id" in org_value:
#temp2_value["@id"] = org_value["@id"]
if "@id" in temp2_value: temp2_value["@id"] = org_value["@id"]
if "@reverse" in temp2_value: temp2_value["@reverse"] = org_value["@id"]
#if "@reverse" in org_value: temp2_value["@id"] = org_value["@reverse"]
else: print("Error")
else:
if "@id" in temp2_value: temp2_value["@id"] = org_value
if "@reverse" in temp2_value: temp2_value["@reverse"] = org_value
temp1["_" + temp1_value["@id"].replace(":", "_")] = temp1_value
temp2["_" + temp1_value["@id"].replace(":", "_")] = temp2_value
pprint(temp1)
pprint(temp2)
print(diff(temp1, {
"_demo_full_name": {"@id": "demo:full_name"}, # generated
"_demo_label": {"@id": "demo:label"},#, "@container": "@set", "@language": "en"}, # generated
"_schema_worksFor": {"@id": "schema:worksFor", "@type": "@id"}, # generated
"_demo_is_employed_by": {"@id": "demo:is_employed_by", "@type": "@id"}, # generated
}))
print(diff(temp2, {
"_demo_full_name": {"@id": "schema:name"}, # generated
"_demo_label": {"@id": "skos:prefLabel", "@container": "@set", "@language": "en"}, # generated
"_schema_worksFor": {"@reverse": "schema:employes", "@type": "@id"}, # generated
"_demo_is_employed_by": {"@reverse": "schema:employes", "@type": "@id"}, # generated
}))
graph = jsonld.compact(graph, {**context, **temp1})
graph["@context"] = {**context, **temp2}
graph = jsonld.flatten(graph)
graph = jsonld.compact(graph, context)
pprint(graph)
print(diff(graph, {
'@context': {'demo': 'https://oo-ld.github.io/demo/',
'employes': {'@id': 'schema:employes', '@type': '@id'},
'employes*': {'@reverse': 'schema:worksFor', '@type': '@id'},
'employes**': {'@reverse': 'demo:is_employed_by', '@type': '@id'},
'id': '@id',
'label': {'@container': '@set', '@id': 'skos:prefLabel'},
'label*': {'@container': '@set',
'@id': 'demo:label',
'@language': 'en'},
'lang': '@language',
'name': 'schema:name',
'name*': 'demo:full_name',
'schema': 'http://schema.org/',
'skos': 'http://www.w3.org/2004/02/skos/core#',
'text': '@value',
'type': '@type'},
'@graph': [{'employes': ['demo:person1', 'demo:person2', 'demo:person3'],
'id': 'demo:organizationA',
'label': [{'lang': 'en', 'text': 'organizationA'}],
'type': 'schema:Organization'},
{'id': 'demo:person1', 'name': 'Person1', 'type': 'schema:Person'},
{'id': 'demo:person2', 'name': 'Person2', 'type': 'schema:Person'},
{'id': 'demo:person3', 'name': 'Person3', 'type': 'schema:Person'}]
})) To nest the persons within the organization, framing can be applied: frame = {
"type": "schema:Organization",
"custom_type": {"@default": "demo:Org"},
"employes": {
"type": "schema:Person"
}
}
frame["@context"] = {**context,
"custom_type": "demo:customType"
}
graph = jsonld.frame(graph, frame, options={"requireAll": True}) resulting in: {
"employes": [
{
"id": "demo:person1",
"name": "Person1",
"type": "schema:Person"
},
{
"id": "demo:person2",
"name": "Person2",
"type": "schema:Person"
},
{
"id": "demo:person3",
"name": "Person3",
"type": "schema:Person"
}
],
"id": "demo:organizationA",
"label": [
{
"lang": "en",
"text": "organizationA"
}
],
"type": "schema:Organization"
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Alias notation see #12
ToDo: Class / Instance alias notation
Normalization can be realized by chaining JSON-LD algorithms:
Mapping-Context could also be constructed from https://github.com/mapping-commons/sssom
As an example a dataset could consist of persons and organisations that report their relations in a inoperable way:
"skos:label": "test"
"skos_label": "skos:prefLabel"
"skos_label": {"@id": "skos:prefLabel", "@language": "en", "@container": "@set"}
"skos:label": [{"@value": "test", "@lang": "en"}]
"rdfs:label": "test"
"rdfs_label": "rdfs:prefLabel"
"rdfs_label": skos:label
"skos:label": "test"
"@id": "ex:P", "schema:worksFor": "ex:C"
"schema_worksFor": "schema:worksFor"
"schema_worksFor": {"@reverse": "schema:employes"}
"@id": "ex:C", "schema:employes": "ex:P"
Example input:
Example output:
flattened
playground
Notebook: https://oo-ld.github.io/jupyterlite/lab/index.html?path=data_normalization.ipynb
The text was updated successfully, but these errors were encountered: