Enhances a relation_schema object with foreign key reference
information.
Arguments
- relation_schemas
a
relation_schemaobject, as returned bysynthesiseorrelation_schema.- references
a list of references, each represented by a list containing four character elements. In order, the elements are a scalar giving the name of the child (referrer) schema, a vector giving the child attribute names, a scalar giving the name of the parent (referee) schema, and a vector giving the parent attribute names. The vectors must be of the same length and contain names for attributes present in their respective schemas, and the parent attributes must form a key.
Value
A database_schema object, containing relation_schemas
with references stored in an attribute of the same name.
References are stored with their attributes in the order they appear in
their respective relation schemas.
Details
Unlike functional_dependency and relation_schema,
database_schema is not designed to be vector-like: it only holds a
single database schema. This adheres to the usual package use case, where a
single data frame is being analysed at a time. However, it inherits from
relation_schema, so is vectorised with respect to its relation
schemas.
As with relation_schema, duplicate relation schemas, after
ordering by attribute, are allowed, and can be removed with
unique.
References, i.e. foreign key references, are allowed to have different
attribute names in the child and parent relations; this can't occur in the
output for autoref and normalise.
Subsetting removes any references that involve removed relation schemas.
Removing duplicates with unique changes references involving
duplicates to involve the kept equivalent schemas instead. Renaming relation
schemas with names<- also changes their names in
the references.
See also
attrs, keys, attrs_order,
and references for extracting parts of the information in a
database_schema; create for creating a
database object that uses the given schema; gv
for converting the schema into Graphviz code; rename_attrs
for renaming the attributes in attrs_order; reduce for
filtering a schema's relations to those connected to a given relation by
foreign key references; subschemas to return the
relation_schema that the given schema contains;
merge_empty_keys for combining relations with an empty key;
merge_schemas for combining relations with matching sets of
keys.
Examples
rs <- relation_schema(
list(
a = list(c("a", "b"), list("a")),
b = list(c("b", "c"), list("b", "c"))
),
attrs_order = c("a", "b", "c", "d")
)
ds <- database_schema(
rs,
list(list("a", "b", "b", "b"))
)
print(ds)
#> database schema with 2 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> references:
#> a.{b} -> b.{b}
attrs(ds)
#> $a
#> [1] "a" "b"
#>
#> $b
#> [1] "b" "c"
#>
keys(ds)
#> $a
#> $a[[1]]
#> [1] "a"
#>
#>
#> $b
#> $b[[1]]
#> [1] "b"
#>
#> $b[[2]]
#> [1] "c"
#>
#>
attrs_order(ds)
#> [1] "a" "b" "c" "d"
names(ds)
#> [1] "a" "b"
references(ds)
#> [[1]]
#> [[1]][[1]]
#> [1] "a"
#>
#> [[1]][[2]]
#> [1] "b"
#>
#> [[1]][[3]]
#> [1] "b"
#>
#> [[1]][[4]]
#> [1] "b"
#>
#>
# relations can't reference themselves
if (FALSE) { # \dontrun{
database_schema(
relation_schema(
list(a = list("a", list("a"))),
c("a", "b")
),
list(list("a", "a", "a", "a"))
)
database_schema(
relation_schema(
list(a = list(c("a", "b"), list("a"))),
c("a", "b")
),
list(list("a", "b", "a", "a"))
)
} # }
# an example with references between differently-named attributes
print(database_schema(
relation_schema(
list(
citation = list(c("citer", "citee"), list(c("citer", "citee"))),
article = list("article", list("article"))
),
c("citer", "citee", "article")
),
list(
list("citation", "citer", "article", "article"),
list("citation", "citee", "article", "article")
)
))
#> database schema with 2 relation schemas
#> 3 attributes: citer, citee, article
#> schema citation: citer, citee
#> key 1: citer, citee
#> schema article: article
#> key 1: article
#> references:
#> citation.{citer} -> article.{article}
#> citation.{citee} -> article.{article}
# vector operations
ds2 <- database_schema(
relation_schema(
list(
e = list(c("a", "e"), list("e"))
),
attrs_order = c("a", "e")
),
list()
)
c(ds, ds2) # attrs_order attributes are merged
#> database schema with 3 relation schemas
#> 5 attributes: a, b, c, d, e
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> schema e: e, a
#> key 1: e
#> references:
#> a.{b} -> b.{b}
unique(c(ds, ds))
#> database schema with 2 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> references:
#> a.{b} -> b.{b}
# subsetting
ds[1]
#> database schema with 1 relation schema
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> no references
stopifnot(identical(ds[[1]], ds[1]))
ds[c(1, 2, 1, 2)] # replicates the foreign key references
#> database schema with 4 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> schema a.1: a, b
#> key 1: a
#> schema b.1: b, c
#> key 1: b
#> key 2: c
#> references:
#> a.{b} -> b.{b}
#> a.{b} -> b.1.{b}
#> a.1.{b} -> b.{b}
#> a.1.{b} -> b.1.{b}
c(ds[c(1, 2)], ds[c(1, 2)]) # doesn't reference between separate copies of ds
#> database schema with 4 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> schema a.1: a, b
#> key 1: a
#> schema b.1: b, c
#> key 1: b
#> key 2: c
#> references:
#> a.{b} -> b.{b}
#> a.1.{b} -> b.1.{b}
unique(ds[c(1, 2, 1, 2)]) # unique() also merges references
#> database schema with 2 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: b, c
#> key 1: b
#> key 2: c
#> references:
#> a.{b} -> b.{b}
# another example of unique() merging references
ds_merge <- database_schema(
relation_schema(
list(
a = list(c("a", "b"), list("a")),
b = list(c("b", "c", "d"), list("b")),
c_d = list(c("c", "d", "e"), list(c("c", "d"))),
a.1 = list(c("a", "b"), list("a")),
b.1 = list(c("b", "c", "d"), list("b"))
),
c("a", "b", "c", "d", "e")
),
list(
list("a", "b", "b", "b"),
list("b.1", c("c", "d"), "c_d", c("c", "d"))
)
)
print(ds_merge)
#> database schema with 5 relation schemas
#> 5 attributes: a, b, c, d, e
#> schema a: a, b
#> key 1: a
#> schema b: b, c, d
#> key 1: b
#> schema c_d: c, d, e
#> key 1: c, d
#> schema a.1: a, b
#> key 1: a
#> schema b.1: b, c, d
#> key 1: b
#> references:
#> a.{b} -> b.{b}
#> b.1.{c, d} -> c_d.{c, d}
unique(ds_merge)
#> database schema with 3 relation schemas
#> 5 attributes: a, b, c, d, e
#> schema a: a, b
#> key 1: a
#> schema b: b, c, d
#> key 1: b
#> schema c_d: c, d, e
#> key 1: c, d
#> references:
#> a.{b} -> b.{b}
#> b.{c, d} -> c_d.{c, d}
# reassignment
# can't change keys included in references
if (FALSE) keys(ds)[[2]] <- list("c") # \dontrun{}
# can't remove attributes included in keys
if (FALSE) attrs(ds)[[2]] <- list("c", "d") # \dontrun{}
# can't remove attributes included in references
if (FALSE) attrs(ds)[[1]] <- c("a", "d") # \dontrun{}
ds3 <- ds
# can change subset of schema, but loses references between altered and
# non-altered subsets
ds3[2] <- database_schema(
relation_schema(
list(d = list(c("d", "c"), list("d"))),
attrs_order(ds3)
),
list()
)
print(ds3) # note the schema's name doesn't change
#> database schema with 2 relation schemas
#> 4 attributes: a, b, c, d
#> schema a: a, b
#> key 1: a
#> schema b: d, c
#> key 1: d
#> no references
# names(ds3)[2] <- "d" # this would change the name
keys(ds3)[[2]] <- list(character()) # removing keys first...
attrs(ds3)[[2]] <- c("b", "c") # so we can change the attrs legally
keys(ds3)[[2]] <- list("b", "c") # add the new keys
# add the reference lost during subset replacement
references(ds3) <- c(references(ds3), list(list("a", "b", "b", "b")))
stopifnot(identical(ds3, ds))
# changing appearance priority for attributes
attrs_order(ds3) <- c("d", "c", "b", "a")
print(ds3)
#> database schema with 2 relation schemas
#> 4 attributes: d, c, b, a
#> schema a: a, b
#> key 1: a
#> schema b: c, b
#> key 1: c
#> key 2: b
#> references:
#> a.{b} -> b.{b}
# changing relation schema names changes them in references
names(ds3) <- paste0(names(ds3), "_long")
print(ds3)
#> database schema with 2 relation schemas
#> 4 attributes: d, c, b, a
#> schema a_long: a, b
#> key 1: a
#> schema b_long: c, b
#> key 1: c
#> key 2: b
#> references:
#> a_long.{b} -> b_long.{b}
# reconstructing from components
ds_recon <- database_schema(
relation_schema(
Map(list, attrs(ds), keys(ds)),
attrs_order(ds)
),
references(ds)
)
stopifnot(identical(ds_recon, ds))
ds_recon2 <- database_schema(
subschemas(ds),
references(ds)
)
stopifnot(identical(ds_recon2, ds))
# can be a data frame column
data.frame(id = 1:2, schema = ds)
#> id schema
#> 1 1 schema a
#> 2 2 schema b