Skip to content

Class: DatasetCollection

A collection of related datasets, likely containing multiple files of multiple potential purposes and properties.

URI: data_sheets_schema:DatasetCollection

erDiagram DatasetCollection { CompressionEnum compression uriorcurie conforms_to uriorcurie conforms_to_class uriorcurie conforms_to_schema stringList created_by string created_on string description uriorcurie doi uri download_url string id string issued stringList keywords string language string last_updated_on string license string modified_by string page uriorcurie publisher uriorcurie status string title string version string was_derived_from } Dataset { integer bytes string dialect EncodingEnum encoding FormatEnum format string hash string md5 string media_type string path string sha256 string is_tabular CompressionEnum compression uriorcurie conforms_to uriorcurie conforms_to_class uriorcurie conforms_to_schema stringList created_by string created_on string description uriorcurie doi uri download_url string id string issued stringList keywords string language string last_updated_on string license string modified_by string page uriorcurie publisher uriorcurie status string title string version string was_derived_from } Deidentification { stringList description string id string name } ExtensionMechanism { stringList description string id string name } VersionAccess { stringList description string id string name } RetentionLimits { stringList description string id string name } UpdatePlan { stringList description string id string name } Erratum { stringList description string id string name } Maintainer { stringList description string id string name } ExportControlRegulatoryRestrictions { stringList description string id string name } IPRestrictions { stringList description string id string name } LicenseAndUseTerms { stringList description string id string name } DistributionDate { stringList description string id string name } DistributionFormat { stringList description string id string name } DiscouragedUse { stringList description string id string name } FutureUseImpact { stringList description string id string name } OtherTask { stringList description string id string name } UseRepository { stringList description string id string name } ExistingUse { stringList description string id string name } RawData { stringList description string id string name } LabelingStrategy { stringList description string id string name } CleaningStrategy { stringList description string id string name } PreprocessingStrategy { stringList description string id string name } DataProtectionImpact { stringList description string id string name } EthicalReview { stringList description string id string name } CollectionTimeframe { stringList description string id string name } DataCollector { stringList description string id string name } SamplingStrategy { stringList is_sample stringList is_random stringList source_data stringList is_representative stringList representative_verification stringList why_not_representative stringList strategies string id string name string description } CollectionMechanism { stringList description string id string name } InstanceAcquisition { stringList description string was_directly_observed string was_reported_by_subjects string was_inferred_derived string was_validated_verified string id string name } SensitiveElement { stringList description string id string name } Subpopulation { stringList identification stringList distribution string id string name string description } ContentWarning { stringList warnings string id string name string description } Confidentiality { stringList description string id string name } ExternalResource { stringList external_resources stringList future_guarantees stringList archival stringList restrictions string id string name string description } DataAnomaly { stringList description string id string name } Instance { string representation string instance_type string data_type integer counts string label string id string name string description } DataSubset { string is_data_split string is_subpopulation integer bytes string dialect EncodingEnum encoding FormatEnum format string hash string md5 string media_type string path string sha256 string is_tabular CompressionEnum compression uriorcurie conforms_to uriorcurie conforms_to_class uriorcurie conforms_to_schema stringList created_by string created_on string description uriorcurie doi uri download_url string id string issued stringList keywords string language string last_updated_on string license string modified_by string page uriorcurie publisher uriorcurie status string title string version string was_derived_from } FundingMechanism { string id string name string description } Creator { string id string name string description } AddressingGap { string response string id string name string description } Task { string response string id string name string description } Purpose { string response string id string name string description } DatasetCollection ||--}o Dataset : "resources" Dataset ||--}o Purpose : "purposes" Dataset ||--}o Task : "tasks" Dataset ||--}o AddressingGap : "addressing_gaps" Dataset ||--}o Creator : "creators" Dataset ||--}o FundingMechanism : "funders" Dataset ||--}o DataSubset : "subsets" Dataset ||--}o Instance : "instances" Dataset ||--}o DataAnomaly : "anomalies" Dataset ||--}o ExternalResource : "external_resources" Dataset ||--}o Confidentiality : "confidential_elements" Dataset ||--}o ContentWarning : "content_warnings" Dataset ||--}o Subpopulation : "subpopulations" Dataset ||--}o SensitiveElement : "sensitive_elements" Dataset ||--}o InstanceAcquisition : "acquisition_methods" Dataset ||--}o CollectionMechanism : "collection_mechanisms" Dataset ||--}o SamplingStrategy : "sampling_strategies" Dataset ||--}o DataCollector : "data_collectors" Dataset ||--}o CollectionTimeframe : "collection_timeframes" Dataset ||--}o EthicalReview : "ethical_reviews" Dataset ||--}o DataProtectionImpact : "data_protection_impacts" Dataset ||--}o PreprocessingStrategy : "preprocessing_strategies" Dataset ||--}o CleaningStrategy : "cleaning_strategies" Dataset ||--}o LabelingStrategy : "labeling_strategies" Dataset ||--}o RawData : "raw_sources" Dataset ||--}o ExistingUse : "existing_uses" Dataset ||--}o UseRepository : "use_repository" Dataset ||--}o OtherTask : "other_tasks" Dataset ||--}o FutureUseImpact : "future_use_impacts" Dataset ||--}o DiscouragedUse : "discouraged_uses" Dataset ||--}o DistributionFormat : "distribution_formats" Dataset ||--}o DistributionDate : "distribution_dates" Dataset ||--|o LicenseAndUseTerms : "license_and_use_terms" Dataset ||--|o IPRestrictions : "ip_restrictions" Dataset ||--|o ExportControlRegulatoryRestrictions : "regulatory_restrictions" Dataset ||--}o Maintainer : "maintainers" Dataset ||--}o Erratum : "errata" Dataset ||--|o UpdatePlan : "updates" Dataset ||--|o RetentionLimits : "retention_limit" Dataset ||--|o VersionAccess : "version_access" Dataset ||--|o ExtensionMechanism : "extension_mechanism" Dataset ||--|o Deidentification : "is_deidentified" Deidentification ||--}o Software : "used_software" ExtensionMechanism ||--}o Software : "used_software" VersionAccess ||--}o Software : "used_software" RetentionLimits ||--}o Software : "used_software" UpdatePlan ||--}o Software : "used_software" Erratum ||--}o Software : "used_software" Maintainer ||--}o Software : "used_software" ExportControlRegulatoryRestrictions ||--}o Software : "used_software" IPRestrictions ||--}o Software : "used_software" LicenseAndUseTerms ||--}o Software : "used_software" DistributionDate ||--}o Software : "used_software" DistributionFormat ||--}o Software : "used_software" DiscouragedUse ||--}o Software : "used_software" FutureUseImpact ||--}o Software : "used_software" OtherTask ||--}o Software : "used_software" UseRepository ||--}o Software : "used_software" ExistingUse ||--}o Software : "used_software" RawData ||--}o Software : "used_software" LabelingStrategy ||--}o Software : "used_software" CleaningStrategy ||--}o Software : "used_software" PreprocessingStrategy ||--}o Software : "used_software" DataProtectionImpact ||--}o Software : "used_software" EthicalReview ||--}o Software : "used_software" CollectionTimeframe ||--}o Software : "used_software" DataCollector ||--}o Software : "used_software" SamplingStrategy ||--}o Software : "used_software" CollectionMechanism ||--}o Software : "used_software" InstanceAcquisition ||--}o Software : "used_software" SensitiveElement ||--}o Software : "used_software" Subpopulation ||--}o Software : "used_software" ContentWarning ||--}o Software : "used_software" Confidentiality ||--}o Software : "used_software" ExternalResource ||--}o Software : "used_software" DataAnomaly ||--}o Software : "used_software" Instance ||--}o SamplingStrategy : "sampling_strategies" Instance ||--}o MissingInfo : "missing_information" Instance ||--}o Software : "used_software" DataSubset ||--}o Purpose : "purposes" DataSubset ||--}o Task : "tasks" DataSubset ||--}o AddressingGap : "addressing_gaps" DataSubset ||--}o Creator : "creators" DataSubset ||--}o FundingMechanism : "funders" DataSubset ||--}o DataSubset : "subsets" DataSubset ||--}o Instance : "instances" DataSubset ||--}o DataAnomaly : "anomalies" DataSubset ||--}o ExternalResource : "external_resources" DataSubset ||--}o Confidentiality : "confidential_elements" DataSubset ||--}o ContentWarning : "content_warnings" DataSubset ||--}o Subpopulation : "subpopulations" DataSubset ||--}o SensitiveElement : "sensitive_elements" DataSubset ||--}o InstanceAcquisition : "acquisition_methods" DataSubset ||--}o CollectionMechanism : "collection_mechanisms" DataSubset ||--}o SamplingStrategy : "sampling_strategies" DataSubset ||--}o DataCollector : "data_collectors" DataSubset ||--}o CollectionTimeframe : "collection_timeframes" DataSubset ||--}o EthicalReview : "ethical_reviews" DataSubset ||--}o DataProtectionImpact : "data_protection_impacts" DataSubset ||--}o PreprocessingStrategy : "preprocessing_strategies" DataSubset ||--}o CleaningStrategy : "cleaning_strategies" DataSubset ||--}o LabelingStrategy : "labeling_strategies" DataSubset ||--}o RawData : "raw_sources" DataSubset ||--}o ExistingUse : "existing_uses" DataSubset ||--}o UseRepository : "use_repository" DataSubset ||--}o OtherTask : "other_tasks" DataSubset ||--}o FutureUseImpact : "future_use_impacts" DataSubset ||--}o DiscouragedUse : "discouraged_uses" DataSubset ||--}o DistributionFormat : "distribution_formats" DataSubset ||--}o DistributionDate : "distribution_dates" DataSubset ||--|o LicenseAndUseTerms : "license_and_use_terms" DataSubset ||--|o IPRestrictions : "ip_restrictions" DataSubset ||--|o ExportControlRegulatoryRestrictions : "regulatory_restrictions" DataSubset ||--}o Maintainer : "maintainers" DataSubset ||--}o Erratum : "errata" DataSubset ||--|o UpdatePlan : "updates" DataSubset ||--|o RetentionLimits : "retention_limit" DataSubset ||--|o VersionAccess : "version_access" DataSubset ||--|o ExtensionMechanism : "extension_mechanism" DataSubset ||--|o Deidentification : "is_deidentified" FundingMechanism ||--|o Grantor : "grantor" FundingMechanism ||--|o Grant : "grant" FundingMechanism ||--}o Software : "used_software" Creator ||--|o Person : "principal_investigator" Creator ||--|o Organization : "affiliation" Creator ||--}o Software : "used_software" AddressingGap ||--}o Software : "used_software" Task ||--}o Software : "used_software" Purpose ||--}o Software : "used_software"

Inheritance

Slots

Name Cardinality and Range Description Inheritance
resources 0..*
Dataset
direct
compression 0..1
CompressionEnum
The compression format of the data Information
conforms_to 0..1
Uriorcurie
The standard to which the data conforms Information
conforms_to_class 0..1
Uriorcurie
The class in the schema to which the data object instantiates Information
conforms_to_schema 0..1
Uriorcurie
The schema to which the data conforms Information
created_by 0..*
String
Agent that created the element Information
created_on 0..1
String
Date and Time at which the element was created Information
description 0..1
String
human readable description of the information Information
doi 0..1
Uriorcurie
The Digital Object Identifier of the data, with the doi prefix Information
download_url 0..1
Uri
URL from which the data can be downloaded Information
id 1..1
String
the unique name of the dataset Information
issued 0..1
String
Information
keywords 0..*
String
Keywords associated with the data Information
language 0..1
String
language in which the information is expressed Information
last_updated_on 0..1
String
Date and Time at which the element was last updated Information
license 0..1
String
license for the data Information
modified_by 0..1
String
agent that modified the element Information
page 0..1
String
Information
publisher 0..1
Uriorcurie
Information
status 0..1
Uriorcurie
Status of the element in terms of its maturity or life cycle Information
title 0..1
String
the official title of the element Information
version 0..1
String
particular version of schema Information
was_derived_from 0..1
String
A derivation is a transformation of an entity into another, an update of an e... Information

Aliases

  • file collection
  • dataset collection
  • data resource collection

Identifier and Mapping Information

Schema Source

  • from schema: https://w3id.org/bridge2ai/data-sheets-schema

Mappings

Mapping Type Mapped Value
self data_sheets_schema:DatasetCollection
native data_sheets_schema:DatasetCollection
exact dcat:Dataset
close dcat:Catalog

LinkML Source

Direct

name: DatasetCollection
description: A collection of related datasets, likely containing multiple files of
  multiple potential purposes and properties.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- file collection
- dataset collection
- data resource collection
exact_mappings:
- dcat:Dataset
close_mappings:
- dcat:Catalog
is_a: Information
attributes:
  resources:
    name: resources
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    multivalued: true
    range: Dataset
tree_root: true

Induced

name: DatasetCollection
description: A collection of related datasets, likely containing multiple files of
  multiple potential purposes and properties.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- file collection
- dataset collection
- data resource collection
exact_mappings:
- dcat:Dataset
close_mappings:
- dcat:Catalog
is_a: Information
attributes:
  resources:
    name: resources
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    multivalued: true
    alias: resources
    owner: DatasetCollection
    domain_of:
    - DatasetCollection
    range: Dataset
  compression:
    name: compression
    description: The compression format of the data. This is not the same as the media
      type. Rather, this is the compression format of the data in a more specific
      sense, e.g., zip, gzip, etc.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    alias: compression
    owner: DatasetCollection
    domain_of:
    - Information
    range: CompressionEnum
  conforms_to:
    name: conforms_to
    description: The standard to which the data conforms. This is not the same as
      the media type. Rather, this is the standard to which the data conforms in a
      more specific sense, e.g., frictionless, schema.org, etc.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:conformsTo
    alias: conforms_to
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  conforms_to_class:
    name: conforms_to_class
    description: The class in the schema to which the data object instantiates.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    is_a: conforms_to
    alias: conforms_to_class
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  conforms_to_schema:
    name: conforms_to_schema
    description: The schema to which the data conforms. This is not the same as the
      media type. Rather, this is the schema to which the data conforms in a more
      specific sense, and even more specific than the general set of standards it
      conforms to.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - frictionless:schema
    rank: 1000
    is_a: conforms_to
    alias: conforms_to_schema
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  created_by:
    name: created_by
    description: Agent that created the element
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: pav:createdBy
    multivalued: true
    alias: created_by
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  created_on:
    name: created_on
    description: Date and Time at which the element was created
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: pav:createdOn
    alias: created_on
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  description:
    name: description
    description: human readable description of the information
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:description
    alias: description
    owner: DatasetCollection
    domain_of:
    - NamedThing
    - Information
    - Relationships
    - Splits
    - DataAnomaly
    - Confidentiality
    - Deidentification
    - SensitiveElement
    - InstanceAcquisition
    - CollectionMechanism
    - DataCollector
    - CollectionTimeframe
    - EthicalReview
    - DirectCollection
    - CollectionNotification
    - CollectionConsent
    - ConsentRevocation
    - DataProtectionImpact
    - PreprocessingStrategy
    - CleaningStrategy
    - LabelingStrategy
    - RawData
    - ExistingUse
    - UseRepository
    - OtherTask
    - FutureUseImpact
    - DiscouragedUse
    - ThirdPartySharing
    - DistributionFormat
    - DistributionDate
    - LicenseAndUseTerms
    - IPRestrictions
    - ExportControlRegulatoryRestrictions
    - Maintainer
    - Erratum
    - UpdatePlan
    - RetentionLimits
    - VersionAccess
    - ExtensionMechanism
    range: string
  doi:
    name: doi
    description: The Digital Object Identifier of the data, with the doi prefix.
    examples:
    - value: doi:10.48550/arXiv.2310.03666
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    alias: doi
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  download_url:
    name: download_url
    description: URL from which the data can be downloaded. This is not the same as
      the landing page, which is a page that describes the dataset. Rather, this URL
      points directly to the data itself.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:url
    close_mappings:
    - frictionless:path
    rank: 1000
    slot_uri: dcat:downloadURL
    alias: download_url
    owner: DatasetCollection
    domain_of:
    - Information
    range: uri
  id:
    name: id
    description: the unique name of the dataset
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:name
    rank: 1000
    slot_uri: dcterms:identifier
    identifier: true
    alias: id
    owner: DatasetCollection
    domain_of:
    - NamedThing
    - Information
    range: string
    required: true
  issued:
    name: issued
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:issued
    alias: issued
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  keywords:
    name: keywords
    description: Keywords associated with the data. These may be provided by the data
      creator or assigned later in a manual or automated manner.
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:keywords
    rank: 1000
    singular_name: keyword
    slot_uri: dcat:keyword
    multivalued: true
    alias: keywords
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  language:
    name: language
    description: language in which the information is expressed
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    alias: language
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  last_updated_on:
    name: last_updated_on
    description: Date and Time at which the element was last updated
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: pav:lastUpdatedOn
    alias: last_updated_on
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  license:
    name: license
    description: license for the data
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - frictionless:licenses
    rank: 1000
    slot_uri: dcterms:license
    alias: license
    owner: DatasetCollection
    domain_of:
    - Information
    - Software
    range: string
  modified_by:
    name: modified_by
    description: agent that modified the element
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: oslc:modifiedBy
    alias: modified_by
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  page:
    name: page
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcat:landingPage
    alias: page
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  publisher:
    name: publisher
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:publisher
    alias: publisher
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  status:
    name: status
    description: Status of the element in terms of its maturity or life cycle
    examples:
    - value: bibo:draft
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: bibo:status
    alias: status
    owner: DatasetCollection
    domain_of:
    - Information
    range: uriorcurie
  title:
    name: title
    description: the official title of the element
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: dcterms:title
    alias: title
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
  version:
    name: version
    description: particular version of schema
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    exact_mappings:
    - schema:version
    - dcterms:hasVersion
    rank: 1000
    slot_uri: pav:version
    alias: version
    owner: DatasetCollection
    domain_of:
    - Information
    - Software
    range: string
  was_derived_from:
    name: was_derived_from
    description: A derivation is a transformation of an entity into another, an update
      of an entity resulting in a new one, or the construction of a new entity based
      on a pre-existing entity.@en
    from_schema: https://w3id.org/bridge2ai/data-sheets-schema
    rank: 1000
    slot_uri: prov:wasDerivedFrom
    alias: was_derived_from
    owner: DatasetCollection
    domain_of:
    - Information
    range: string
tree_root: true