Documents#

About#

Documents: These include datasets, reports or other documents

See also

For OIH the focus is on generic documents which can scope reports, data and other resources. In those cases where the resources being described are of type Dataset you may wish to review patterns developed for GeoScience Datasets by the ESIP Science on Schema community.

Creative works (documents)#

Documents will include maps, reports, guidance and other creative works. Due to this OIH will focus on a generic example of schema.org/CreativeWork and then provide examples for more focused creative work examples.

 1{
 2  "@context": {
 3    "@vocab": "https://schema.org/"
 4  },
 5  "@type": "CreativeWork",
 6  "@id": "https://example.org/id/XYZ",
 7  "name": "Name or title of the document",
 8  "description": "Description of the creative work to aid in searching",
 9  "url": "https://www.sample-data-repository.org/creativework/report.pdf",
10  "contributor": {
11    "@type": "Organization",
12    "@id": "http://www.foo.org/orgID",
13    "legalName": "Some Institute"
14  },
15  "author": {
16    "@id": "https://www.sample-data-repository.org/person/51317",
17    "@type": "Person",
18    "name": "Dr Uta Passow",
19    "givenName": "Uta",
20    "familyName": "Passow",
21    "url": "https://www.sample-data-repository.org/person/51317"
22  },
23  "identifier": {
24    "@id": "https://doi.org/10.5066/F7VX0DMQ",
25    "@type": "PropertyValue",
26    "propertyID": "https://registry.identifiers.org/registry/doi",
27    "value": "doi:10.5066/F7VX0DMQ",
28    "url": "https://doi.org/10.5066/F7VX0DMQ"
29  },
30  "keywords": {
31    "@type": "DefinedTerm",
32    "inDefinedTermSet": {
33      "@type": "DefinedTermSet",
34      "name": "Name of the set",
35      "description": "Description of the set",
36      "url": "url for the set"
37    },
38    "termCode": "A code that identifies this DefinedTerm within a DefinedTermSet"
39  },
40  "provider": {
41    "@id": "https://www.repositoryB.org",
42    "@type": "Organization",
43    "legalName": "Sample Data Repository Office",
44    "name": "SDRO",
45    "sameAs": "http://www.re3data.org/repository/r3dxxxxxxxxx",
46    "url": "https://www.sample-data-repository.org"
47  },
48  "license": "http://spdx.org/licenses/CC0-1.0",
49  "publisher": {
50    "@id": "https://www.publishingrus.org",
51    "@type": "Organization",
52    "legalName": "Some Institute"
53  }
54}
Hide code cell source
import json
from pyld import jsonld
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils
with open("../../../odis-in/dataGraphs/thematics/docs/graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)
jbutils.show_graph(compacted)
../../_images/6b144c024f1bda29aaf0fe5eae85ac075457f5a29f4377e98ae796b8f1df82e9.svg

Details: Indentifier#

For each profile there are a few key elements we need to know about. One key element is what the authoritative reference or canonical identifier is for a resource.

Hide code cell source
import json
from pyld import jsonld
import os, sys
import urllib

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)


with open("../../../odis-in/dataGraphs/thematics/docs/graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@requireAll": "true",
  "@type":     "CreativeWork",
  "identifier": ""
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "identifier": {
        "@id": "https://doi.org/10.5066/F7VX0DMQ",
        "@type": "PropertyValue",
        "propertyID": "https://registry.identifiers.org/registry/doi",
        "url": "https://doi.org/10.5066/F7VX0DMQ",
        "value": "doi:10.5066/F7VX0DMQ"
    }
}
../../_images/147685ac70ea2618b862728bb037b0bb07d2f80f0ffd6001204d2b3de1f65aac.svg

Publisher and provider#

Our JSON-LD documents are graphs that can use framing to subset. In this case we can look closer at the provider and publisher properties, which are both of type Organization.

Hide code cell source
import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("../../../odis-in/dataGraphs/thematics/docs/graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@type":     "CreativeWork",
  "provider": {},
  "publisher": {}
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "provider": {
        "@id": "https://www.repositoryB.org",
        "@type": "Organization",
        "legalName": "Sample Data Repository Office",
        "name": "SDRO",
        "sameAs": "http://www.re3data.org/repository/r3dxxxxxxxxx",
        "url": "https://www.sample-data-repository.org"
    },
    "publisher": {
        "@id": "https://www.publishingrus.org",
        "@type": "Organization",
        "legalName": "Some Institute"
    }
}
../../_images/fa18120d1e6a00e772b4ac49905de0d48a618ec2ae5cb8623dd95e62e6f8811d.svg

Author type Person#

Our JSON-LD documents are graphs that can use framing to subset. In this case we can look closer at the author property which points to a type Person.

Hide code cell source
import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("../../../odis-in/dataGraphs/thematics/docs/graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@type":     "CreativeWork",
  "author": ""
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "author": {
        "@id": "https://www.sample-data-repository.org/person/51317",
        "@type": "Person",
        "familyName": "Passow",
        "givenName": "Uta",
        "name": "Dr Uta Passow",
        "url": "https://www.sample-data-repository.org/person/51317"
    }
}
../../_images/cb7f8c3e45a04561a385e30a5f1db15903ac6bb86113bf5b5cfba079198ad247.svg

License#

Hide code cell source
import json
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_graph
from pyld import jsonld
import graphviz
import os, sys

currentdir = os.path.dirname(os.path.abspath(''))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0, parentdir)
from lib import jbutils

with open("../../../odis-in/dataGraphs/thematics/docs/graphs/creativework.json") as dgraph:
    doc = json.load(dgraph)

frame = {
  "@context": {"@vocab": "https://schema.org/"},
  "@explicit": "true",
  "@type":     "CreativeWork",
  "license": {}
}

context = {
    "@vocab": "https://schema.org/",
}

compacted = jsonld.compact(doc, context)

framed = jsonld.frame(compacted, frame)
jd = json.dumps(framed, indent=4)
print(jd)

jbutils.show_graph(framed)
{
    "@context": {
        "@vocab": "https://schema.org/"
    },
    "@id": "https://example.org/id/XYZ",
    "@type": "CreativeWork",
    "license": "http://spdx.org/licenses/CC0-1.0"
}
../../_images/61d6a0e085d06595eb7f46ce11f831dd3a16ab967dce10be106f4c802c2769e5.svg

License as URL#

{
  "@context": "https://schema.org/",
  "license": "https://creativecommons.org/licenses/by/4.0/"
}

License as CreativeWork#

{
  "@context": "https://schema.org/",
  "license": {
    "@type": "CreativeWork",
    "name": "Creative Commons Attribution 4.0",
    "url": "https://creativecommons.org/licenses/by/4.0/"
  }
}

License as SPDX URL#

  • Use a simple URL

  • SPDX creates URLs for many licenses including those that don’t have URLs

  • From a source that harvesters can rely on (e.g. use URL to lookup more information about the license)

{
  "@context": "https://schema.org/",
  "license": "https://spdx.org/licenses/CC-BY-4.0"
}

OR, include both the SPDX and the Creative Commons URLs in an array:

{
  "@context": "https://schema.org/",
  "license": ["https://spdx.org/licenses/CC-BY-4.0", "https://creativecommons.org/licenses/by/4.0/"]
}

References#