Documents

About

Documents: These include datasets, reports or other documents

See also

For OIH the focus is on generic documents which can scope reports, data and other resources. In those cases where the resources being described are of type Dataset you may wish to review patterns developed for GeoScience Datasets by the ESIP Science on Schema community.

Creative works (documents)

Documents will include maps, reports, guidance and other creative works. Due to this OIH will focus on a generic example of schema.org/CreativeWork and then provide examples for more focused creative work examples.

 1{
 2  "@context": {
 3    "@vocab": "https://schema.org/"
 4  },
 5  "@type": "CreativeWork",
 6  "@id": "https://example.org/id/XYZ",
 7  "name": "Name or title of the document",
 8  "description": "Description of the creative work to aid in searching",
 9  "url": "https://www.sample-data-repository.org/creativework/report.pdf",
10  "contributor": {
11    "@type": "Organization",
12    "@id": "http://www.foo.org/orgID",
13    "legalName": "Some Institute"
14  },
15  "author": {
16    "@id": "https://www.sample-data-repository.org/person/51317",
17    "@type": "Person",
18    "name": "Dr Uta Passow",
19    "givenName": "Uta",
20    "familyName": "Passow",
21    "url": "https://www.sample-data-repository.org/person/51317"
22  },
23  "identifier": {
24    "@id": "https://doi.org/10.5066/F7VX0DMQ",
25    "@type": "PropertyValue",
26    "propertyID": "https://registry.identifiers.org/registry/doi",
27    "value": "doi:10.5066/F7VX0DMQ",
28    "url": "https://doi.org/10.5066/F7VX0DMQ"
29  },
30  "keywords": {
31    "@type": "DefinedTerm",
32    "inDefinedTermSet": {
33      "@type": "DefinedTermSet",
34      "name": "Name of the set",
35      "description": "Description of the set",
36      "url": "url for the set"
37    },
38    "termCode": "A code that identifies this DefinedTerm within a DefinedTermSet"
39  },
40  "provider": {
41    "@id": "https://www.repositoryB.org",
42    "@type": "Organization",
43    "legalName": "Sample Data Repository Office",
44    "name": "SDRO",
45    "sameAs": "http://www.re3data.org/repository/r3dxxxxxxxxx",
46    "url": "https://www.sample-data-repository.org"
47  },
48  "license": "http://spdx.org/licenses/CC0-1.0",
49  "publisher": {
50    "@id": "https://www.publishingrus.org",
51    "@type": "Organization",
52    "legalName": "Some Institute"
53  }
54}
import json
from pyld import jsonld
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils
with open("./graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)
jbutils.show_graph(compacted)
../../_images/README_1_0.svg

Details: Indentifier

For each profile there are a few key elements we need to know about. One key element is what the authoritative reference or canonical identifier is for a resource.

import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("./graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@requireAll": "true",
  "@type":     "CreativeWork",
  "identifier": ""
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "identifier": {
        "@id": "https://doi.org/10.5066/F7VX0DMQ",
        "@type": "PropertyValue",
        "propertyID": "https://registry.identifiers.org/registry/doi",
        "url": "https://doi.org/10.5066/F7VX0DMQ",
        "value": "doi:10.5066/F7VX0DMQ"
    }
}
../../_images/README_3_1.svg

Frame on publisher and provider

Our JSON-LD documents are graphs that can use framing to subset. In this case we can look closer at the author property which points to a type Person.

import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("./graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@type":     "CreativeWork",
  "provider": {},
  "publisher": {}
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "provider": {
        "@id": "https://www.repositoryB.org",
        "@type": "Organization",
        "legalName": "Sample Data Repository Office",
        "name": "SDRO",
        "sameAs": "http://www.re3data.org/repository/r3dxxxxxxxxx",
        "url": "https://www.sample-data-repository.org"
    },
    "publisher": {
        "@id": "https://www.publishingrus.org",
        "@type": "Organization",
        "legalName": "Some Institute"
    }
}
../../_images/README_5_1.svg

Frame on author type Person

Our JSON-LD documents are graphs that can use framing to subset. In this case we can look closer at the author property which points to a type Person.

import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("./graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@type":     "CreativeWork",
  "author": ""
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "author": {
        "@id": "https://www.sample-data-repository.org/person/51317",
        "@type": "Person",
        "familyName": "Passow",
        "givenName": "Uta",
        "name": "Dr Uta Passow",
        "url": "https://www.sample-data-repository.org/person/51317"
    }
}
../../_images/README_7_1.svg

References