Class: DatasetCollection
A collection of related datasets, likely containing multiple files of multiple potential purposes and properties.
URI: data_sheets_schema:DatasetCollection
erDiagram
DatasetCollection {
CompressionEnum compression
uriorcurie conforms_to
uriorcurie conforms_to_class
uriorcurie conforms_to_schema
stringList created_by
string created_on
string description
uriorcurie doi
uri download_url
string id
string issued
stringList keywords
string language
string last_updated_on
string license
string modified_by
string page
uriorcurie publisher
uriorcurie status
string title
string version
string was_derived_from
}
Dataset {
integer bytes
string dialect
EncodingEnum encoding
FormatEnum format
string hash
string md5
string media_type
string path
string sha256
string is_tabular
CompressionEnum compression
uriorcurie conforms_to
uriorcurie conforms_to_class
uriorcurie conforms_to_schema
stringList created_by
string created_on
string description
uriorcurie doi
uri download_url
string id
string issued
stringList keywords
string language
string last_updated_on
string license
string modified_by
string page
uriorcurie publisher
uriorcurie status
string title
string version
string was_derived_from
}
Deidentification {
stringList description
string id
string name
}
ExtensionMechanism {
stringList description
string id
string name
}
VersionAccess {
stringList description
string id
string name
}
RetentionLimits {
stringList description
string id
string name
}
UpdatePlan {
stringList description
string id
string name
}
Erratum {
stringList description
string id
string name
}
Maintainer {
stringList description
string id
string name
}
ExportControlRegulatoryRestrictions {
stringList description
string id
string name
}
IPRestrictions {
stringList description
string id
string name
}
LicenseAndUseTerms {
stringList description
string id
string name
}
DistributionDate {
stringList description
string id
string name
}
DistributionFormat {
stringList description
string id
string name
}
DiscouragedUse {
stringList description
string id
string name
}
FutureUseImpact {
stringList description
string id
string name
}
OtherTask {
stringList description
string id
string name
}
UseRepository {
stringList description
string id
string name
}
ExistingUse {
stringList description
string id
string name
}
RawData {
stringList description
string id
string name
}
LabelingStrategy {
stringList description
string id
string name
}
CleaningStrategy {
stringList description
string id
string name
}
PreprocessingStrategy {
stringList description
string id
string name
}
DataProtectionImpact {
stringList description
string id
string name
}
EthicalReview {
stringList description
string id
string name
}
CollectionTimeframe {
stringList description
string id
string name
}
DataCollector {
stringList description
string id
string name
}
SamplingStrategy {
stringList is_sample
stringList is_random
stringList source_data
stringList is_representative
stringList representative_verification
stringList why_not_representative
stringList strategies
string id
string name
string description
}
CollectionMechanism {
stringList description
string id
string name
}
InstanceAcquisition {
stringList description
string was_directly_observed
string was_reported_by_subjects
string was_inferred_derived
string was_validated_verified
string id
string name
}
SensitiveElement {
stringList description
string id
string name
}
Subpopulation {
stringList identification
stringList distribution
string id
string name
string description
}
ContentWarning {
stringList warnings
string id
string name
string description
}
Confidentiality {
stringList description
string id
string name
}
ExternalResource {
stringList external_resources
stringList future_guarantees
stringList archival
stringList restrictions
string id
string name
string description
}
DataAnomaly {
stringList description
string id
string name
}
Instance {
string representation
string instance_type
string data_type
integer counts
string label
string id
string name
string description
}
DataSubset {
string is_data_split
string is_subpopulation
integer bytes
string dialect
EncodingEnum encoding
FormatEnum format
string hash
string md5
string media_type
string path
string sha256
string is_tabular
CompressionEnum compression
uriorcurie conforms_to
uriorcurie conforms_to_class
uriorcurie conforms_to_schema
stringList created_by
string created_on
string description
uriorcurie doi
uri download_url
string id
string issued
stringList keywords
string language
string last_updated_on
string license
string modified_by
string page
uriorcurie publisher
uriorcurie status
string title
string version
string was_derived_from
}
FundingMechanism {
string id
string name
string description
}
Creator {
string id
string name
string description
}
AddressingGap {
string response
string id
string name
string description
}
Task {
string response
string id
string name
string description
}
Purpose {
string response
string id
string name
string description
}
DatasetCollection ||--}o Dataset : "resources"
Dataset ||--}o Purpose : "purposes"
Dataset ||--}o Task : "tasks"
Dataset ||--}o AddressingGap : "addressing_gaps"
Dataset ||--}o Creator : "creators"
Dataset ||--}o FundingMechanism : "funders"
Dataset ||--}o DataSubset : "subsets"
Dataset ||--}o Instance : "instances"
Dataset ||--}o DataAnomaly : "anomalies"
Dataset ||--}o ExternalResource : "external_resources"
Dataset ||--}o Confidentiality : "confidential_elements"
Dataset ||--}o ContentWarning : "content_warnings"
Dataset ||--}o Subpopulation : "subpopulations"
Dataset ||--}o SensitiveElement : "sensitive_elements"
Dataset ||--}o InstanceAcquisition : "acquisition_methods"
Dataset ||--}o CollectionMechanism : "collection_mechanisms"
Dataset ||--}o SamplingStrategy : "sampling_strategies"
Dataset ||--}o DataCollector : "data_collectors"
Dataset ||--}o CollectionTimeframe : "collection_timeframes"
Dataset ||--}o EthicalReview : "ethical_reviews"
Dataset ||--}o DataProtectionImpact : "data_protection_impacts"
Dataset ||--}o PreprocessingStrategy : "preprocessing_strategies"
Dataset ||--}o CleaningStrategy : "cleaning_strategies"
Dataset ||--}o LabelingStrategy : "labeling_strategies"
Dataset ||--}o RawData : "raw_sources"
Dataset ||--}o ExistingUse : "existing_uses"
Dataset ||--}o UseRepository : "use_repository"
Dataset ||--}o OtherTask : "other_tasks"
Dataset ||--}o FutureUseImpact : "future_use_impacts"
Dataset ||--}o DiscouragedUse : "discouraged_uses"
Dataset ||--}o DistributionFormat : "distribution_formats"
Dataset ||--}o DistributionDate : "distribution_dates"
Dataset ||--|o LicenseAndUseTerms : "license_and_use_terms"
Dataset ||--|o IPRestrictions : "ip_restrictions"
Dataset ||--|o ExportControlRegulatoryRestrictions : "regulatory_restrictions"
Dataset ||--}o Maintainer : "maintainers"
Dataset ||--}o Erratum : "errata"
Dataset ||--|o UpdatePlan : "updates"
Dataset ||--|o RetentionLimits : "retention_limit"
Dataset ||--|o VersionAccess : "version_access"
Dataset ||--|o ExtensionMechanism : "extension_mechanism"
Dataset ||--|o Deidentification : "is_deidentified"
Deidentification ||--}o Software : "used_software"
ExtensionMechanism ||--}o Software : "used_software"
VersionAccess ||--}o Software : "used_software"
RetentionLimits ||--}o Software : "used_software"
UpdatePlan ||--}o Software : "used_software"
Erratum ||--}o Software : "used_software"
Maintainer ||--}o Software : "used_software"
ExportControlRegulatoryRestrictions ||--}o Software : "used_software"
IPRestrictions ||--}o Software : "used_software"
LicenseAndUseTerms ||--}o Software : "used_software"
DistributionDate ||--}o Software : "used_software"
DistributionFormat ||--}o Software : "used_software"
DiscouragedUse ||--}o Software : "used_software"
FutureUseImpact ||--}o Software : "used_software"
OtherTask ||--}o Software : "used_software"
UseRepository ||--}o Software : "used_software"
ExistingUse ||--}o Software : "used_software"
RawData ||--}o Software : "used_software"
LabelingStrategy ||--}o Software : "used_software"
CleaningStrategy ||--}o Software : "used_software"
PreprocessingStrategy ||--}o Software : "used_software"
DataProtectionImpact ||--}o Software : "used_software"
EthicalReview ||--}o Software : "used_software"
CollectionTimeframe ||--}o Software : "used_software"
DataCollector ||--}o Software : "used_software"
SamplingStrategy ||--}o Software : "used_software"
CollectionMechanism ||--}o Software : "used_software"
InstanceAcquisition ||--}o Software : "used_software"
SensitiveElement ||--}o Software : "used_software"
Subpopulation ||--}o Software : "used_software"
ContentWarning ||--}o Software : "used_software"
Confidentiality ||--}o Software : "used_software"
ExternalResource ||--}o Software : "used_software"
DataAnomaly ||--}o Software : "used_software"
Instance ||--}o SamplingStrategy : "sampling_strategies"
Instance ||--}o MissingInfo : "missing_information"
Instance ||--}o Software : "used_software"
DataSubset ||--}o Purpose : "purposes"
DataSubset ||--}o Task : "tasks"
DataSubset ||--}o AddressingGap : "addressing_gaps"
DataSubset ||--}o Creator : "creators"
DataSubset ||--}o FundingMechanism : "funders"
DataSubset ||--}o DataSubset : "subsets"
DataSubset ||--}o Instance : "instances"
DataSubset ||--}o DataAnomaly : "anomalies"
DataSubset ||--}o ExternalResource : "external_resources"
DataSubset ||--}o Confidentiality : "confidential_elements"
DataSubset ||--}o ContentWarning : "content_warnings"
DataSubset ||--}o Subpopulation : "subpopulations"
DataSubset ||--}o SensitiveElement : "sensitive_elements"
DataSubset ||--}o InstanceAcquisition : "acquisition_methods"
DataSubset ||--}o CollectionMechanism : "collection_mechanisms"
DataSubset ||--}o SamplingStrategy : "sampling_strategies"
DataSubset ||--}o DataCollector : "data_collectors"
DataSubset ||--}o CollectionTimeframe : "collection_timeframes"
DataSubset ||--}o EthicalReview : "ethical_reviews"
DataSubset ||--}o DataProtectionImpact : "data_protection_impacts"
DataSubset ||--}o PreprocessingStrategy : "preprocessing_strategies"
DataSubset ||--}o CleaningStrategy : "cleaning_strategies"
DataSubset ||--}o LabelingStrategy : "labeling_strategies"
DataSubset ||--}o RawData : "raw_sources"
DataSubset ||--}o ExistingUse : "existing_uses"
DataSubset ||--}o UseRepository : "use_repository"
DataSubset ||--}o OtherTask : "other_tasks"
DataSubset ||--}o FutureUseImpact : "future_use_impacts"
DataSubset ||--}o DiscouragedUse : "discouraged_uses"
DataSubset ||--}o DistributionFormat : "distribution_formats"
DataSubset ||--}o DistributionDate : "distribution_dates"
DataSubset ||--|o LicenseAndUseTerms : "license_and_use_terms"
DataSubset ||--|o IPRestrictions : "ip_restrictions"
DataSubset ||--|o ExportControlRegulatoryRestrictions : "regulatory_restrictions"
DataSubset ||--}o Maintainer : "maintainers"
DataSubset ||--}o Erratum : "errata"
DataSubset ||--|o UpdatePlan : "updates"
DataSubset ||--|o RetentionLimits : "retention_limit"
DataSubset ||--|o VersionAccess : "version_access"
DataSubset ||--|o ExtensionMechanism : "extension_mechanism"
DataSubset ||--|o Deidentification : "is_deidentified"
FundingMechanism ||--|o Grantor : "grantor"
FundingMechanism ||--|o Grant : "grant"
FundingMechanism ||--}o Software : "used_software"
Creator ||--|o Person : "principal_investigator"
Creator ||--|o Organization : "affiliation"
Creator ||--}o Software : "used_software"
AddressingGap ||--}o Software : "used_software"
Task ||--}o Software : "used_software"
Purpose ||--}o Software : "used_software"
Inheritance
- Information
- DatasetCollection
Slots
Name | Cardinality and Range | Description | Inheritance |
---|---|---|---|
resources | * Dataset |
direct | |
compression | 0..1 CompressionEnum |
The compression format of the data | Information |
conforms_to | 0..1 Uriorcurie |
The standard to which the data conforms | Information |
conforms_to_class | 0..1 Uriorcurie |
The class in the schema to which the data object instantiates | Information |
conforms_to_schema | 0..1 Uriorcurie |
The schema to which the data conforms | Information |
created_by | * String |
Agent that created the element | Information |
created_on | 0..1 String |
Date and Time at which the element was created | Information |
description | 0..1 String |
human readable description of the information | Information |
doi | 0..1 Uriorcurie |
The Digital Object Identifier of the data, with the doi prefix | Information |
download_url | 0..1 Uri |
URL from which the data can be downloaded | Information |
id | 1 String |
the unique name of the dataset | Information |
issued | 0..1 String |
Information | |
keywords | * String |
Keywords associated with the data | Information |
language | 0..1 String |
language in which the information is expressed | Information |
last_updated_on | 0..1 String |
Date and Time at which the element was last updated | Information |
license | 0..1 String |
license for the data | Information |
modified_by | 0..1 String |
agent that modified the element | Information |
page | 0..1 String |
Information | |
publisher | 0..1 Uriorcurie |
Information | |
status | 0..1 Uriorcurie |
Status of the element in terms of its maturity or life cycle | Information |
title | 0..1 String |
the official title of the element | Information |
version | 0..1 String |
particular version of schema | Information |
was_derived_from | 0..1 String |
A derivation is a transformation of an entity into another, an update of an e... | Information |
Aliases
- file collection
- dataset collection
- data resource collection
Identifier and Mapping Information
Schema Source
- from schema: https://w3id.org/bridge2ai/data-sheets-schema
Mappings
Mapping Type | Mapped Value |
---|---|
self | data_sheets_schema:DatasetCollection |
native | data_sheets_schema:DatasetCollection |
exact | dcat:Dataset |
close | dcat:Catalog |
LinkML Source
Direct
name: DatasetCollection
description: A collection of related datasets, likely containing multiple files of
multiple potential purposes and properties.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- file collection
- dataset collection
- data resource collection
exact_mappings:
- dcat:Dataset
close_mappings:
- dcat:Catalog
is_a: Information
attributes:
resources:
name: resources
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
multivalued: true
domain_of:
- DatasetCollection
range: Dataset
tree_root: true
Induced
name: DatasetCollection
description: A collection of related datasets, likely containing multiple files of
multiple potential purposes and properties.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
aliases:
- file collection
- dataset collection
- data resource collection
exact_mappings:
- dcat:Dataset
close_mappings:
- dcat:Catalog
is_a: Information
attributes:
resources:
name: resources
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
multivalued: true
alias: resources
owner: DatasetCollection
domain_of:
- DatasetCollection
range: Dataset
compression:
name: compression
description: The compression format of the data. This is not the same as the media
type. Rather, this is the compression format of the data in a more specific
sense, e.g., zip, gzip, etc.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: compression
owner: DatasetCollection
domain_of:
- Information
range: CompressionEnum
conforms_to:
name: conforms_to
description: The standard to which the data conforms. This is not the same as
the media type. Rather, this is the standard to which the data conforms in a
more specific sense, e.g., frictionless, schema.org, etc.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:conformsTo
alias: conforms_to
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
conforms_to_class:
name: conforms_to_class
description: The class in the schema to which the data object instantiates.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
is_a: conforms_to
alias: conforms_to_class
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
conforms_to_schema:
name: conforms_to_schema
description: The schema to which the data conforms. This is not the same as the
media type. Rather, this is the schema to which the data conforms in a more
specific sense, and even more specific than the general set of standards it
conforms to.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- frictionless:schema
rank: 1000
is_a: conforms_to
alias: conforms_to_schema
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
created_by:
name: created_by
description: Agent that created the element
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: pav:createdBy
multivalued: true
alias: created_by
owner: DatasetCollection
domain_of:
- Information
range: string
created_on:
name: created_on
description: Date and Time at which the element was created
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: pav:createdOn
alias: created_on
owner: DatasetCollection
domain_of:
- Information
range: string
description:
name: description
description: human readable description of the information
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:description
alias: description
owner: DatasetCollection
domain_of:
- NamedThing
- Information
- Relationships
- Splits
- DataAnomaly
- Confidentiality
- Deidentification
- SensitiveElement
- InstanceAcquisition
- CollectionMechanism
- DataCollector
- CollectionTimeframe
- EthicalReview
- DirectCollection
- CollectionNotification
- CollectionConsent
- ConsentRevocation
- DataProtectionImpact
- PreprocessingStrategy
- CleaningStrategy
- LabelingStrategy
- RawData
- ExistingUse
- UseRepository
- OtherTask
- FutureUseImpact
- DiscouragedUse
- ThirdPartySharing
- DistributionFormat
- DistributionDate
- LicenseAndUseTerms
- IPRestrictions
- ExportControlRegulatoryRestrictions
- Maintainer
- Erratum
- UpdatePlan
- RetentionLimits
- VersionAccess
- ExtensionMechanism
range: string
doi:
name: doi
description: The Digital Object Identifier of the data, with the doi prefix.
examples:
- value: doi:10.48550/arXiv.2310.03666
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: doi
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
download_url:
name: download_url
description: URL from which the data can be downloaded. This is not the same as
the landing page, which is a page that describes the dataset. Rather, this URL
points directly to the data itself.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:url
close_mappings:
- frictionless:path
rank: 1000
slot_uri: dcat:downloadURL
alias: download_url
owner: DatasetCollection
domain_of:
- Information
range: uri
id:
name: id
description: the unique name of the dataset
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:name
rank: 1000
slot_uri: dcterms:identifier
identifier: true
alias: id
owner: DatasetCollection
domain_of:
- NamedThing
- Information
range: string
required: true
issued:
name: issued
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:issued
alias: issued
owner: DatasetCollection
domain_of:
- Information
range: string
keywords:
name: keywords
description: Keywords associated with the data. These may be provided by the data
creator or assigned later in a manual or automated manner.
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:keywords
rank: 1000
singular_name: keyword
slot_uri: dcat:keyword
multivalued: true
alias: keywords
owner: DatasetCollection
domain_of:
- Information
range: string
language:
name: language
description: language in which the information is expressed
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
alias: language
owner: DatasetCollection
domain_of:
- Information
range: string
last_updated_on:
name: last_updated_on
description: Date and Time at which the element was last updated
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: pav:lastUpdatedOn
alias: last_updated_on
owner: DatasetCollection
domain_of:
- Information
range: string
license:
name: license
description: license for the data
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- frictionless:licenses
rank: 1000
slot_uri: dcterms:license
alias: license
owner: DatasetCollection
domain_of:
- Information
- Software
range: string
modified_by:
name: modified_by
description: agent that modified the element
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: oslc:modifiedBy
alias: modified_by
owner: DatasetCollection
domain_of:
- Information
range: string
page:
name: page
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcat:landingPage
alias: page
owner: DatasetCollection
domain_of:
- Information
range: string
publisher:
name: publisher
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:publisher
alias: publisher
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
status:
name: status
description: Status of the element in terms of its maturity or life cycle
examples:
- value: bibo:draft
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: bibo:status
alias: status
owner: DatasetCollection
domain_of:
- Information
range: uriorcurie
title:
name: title
description: the official title of the element
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: dcterms:title
alias: title
owner: DatasetCollection
domain_of:
- Information
range: string
version:
name: version
description: particular version of schema
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
exact_mappings:
- schema:version
- dcterms:hasVersion
rank: 1000
slot_uri: pav:version
alias: version
owner: DatasetCollection
domain_of:
- Information
- Software
range: string
was_derived_from:
name: was_derived_from
description: A derivation is a transformation of an entity into another, an update
of an entity resulting in a new one, or the construction of a new entity based
on a pre-existing entity.@en
from_schema: https://w3id.org/bridge2ai/data-sheets-schema
rank: 1000
slot_uri: prov:wasDerivedFrom
alias: was_derived_from
owner: DatasetCollection
domain_of:
- Information
range: string
tree_root: true