Databases — database • autodb

Enhances a relation object with foreign key reference information.

Usage

database(relations, references)

Arguments

relations: a relation object.
references: a list of references, each represented by a list containing four character elements. In order, the elements are a scalar giving the name of the child (referrer) schema, a vector giving the child attribute names, a scalar giving the name of the parent (referee) schema, and a vector giving the parent attribute names. The vectors must be of the same length and contain names for attributes present in their respective schemas, and the parent attributes must form a key.

Value

A database object, containing relations with references stored in an attribute of the same name. References are stored with their attributes in the order they appear in their respective relations.

Details

Unlike relation_schema and relation, and like database_schema, database is not designed to be vector-like: it only holds a single database. This adheres to the usual package use case, where a single data frame is being analysed at a time. However, it inherits from relation, so is vectorised with respect to its relations.

As with relation, duplicate relations, after ordering by attribute, are allowed, and can be removed with unique.

References, i.e. foreign key references, are allowed to have different attribute names in the child and parent relations; this can't occur in the output for autoref and normalise.

Subsetting removes any references that involve removed relations. Removing duplicates with unique changes references involving duplicates to involve the kept equivalent relations instead. Renaming relations with names<- also changes their names in the references.

Examples

rels <- relation(
  list(
    a = list(
      df = data.frame(a = logical(), b = logical()),
      keys = list("a")
    ),
    b = list(
      df = data.frame(b = logical(), c = logical()),
      keys = list("b", "c")
    )
  ),
  attrs_order = c("a", "b", "c", "d")
)
db <- database(
  rels,
  list(list("a", "b", "b", "b"))
)
print(db)
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
attrs(db)
#> $a
#> [1] "a" "b"
#> 
#> $b
#> [1] "b" "c"
#> 
stopifnot(identical(
  attrs(db),
  lapply(records(db), names)
))
keys(db)
#> $a
#> $a[[1]]
#> [1] "a"
#> 
#> 
#> $b
#> $b[[1]]
#> [1] "b"
#> 
#> $b[[2]]
#> [1] "c"
#> 
#> 
attrs_order(db)
#> [1] "a" "b" "c" "d"
names(db)
#> [1] "a" "b"
references(db)
#> [[1]]
#> [[1]][[1]]
#> [1] "a"
#> 
#> [[1]][[2]]
#> [1] "b"
#> 
#> [[1]][[3]]
#> [1] "b"
#> 
#> [[1]][[4]]
#> [1] "b"
#> 
#> 

# relations can't reference themselves
if (FALSE) { # \dontrun{
  database(
    relation(
      list(a = list(df = data.frame(a = 1:5), keys = list("a"))),
      c("a", "b")
    ),
    list(list("a", "a", "a", "a"))
  )
  database(
    relation(
      list(a = list(df = data.frame(a = 1:5, b = 6:10), keys = list("a"))),
      c("a", "b")
    ),
    list(list("a", "b", "a", "a"))
  )
} # }

# an example with references between differently-named attributes
print(database(
  relation(
    list(
      citation = list(
        df = data.frame(citer = 1:5, citee = 6:10),
        keys = list(c("citer", "citee"))
      ),
      article = list(df = data.frame(article = 1:10), keys = list("article"))
    ),
    c("citer", "citee", "article")
  ),
  list(
    list("citation", "citer", "article", "article"),
    list("citation", "citee", "article", "article")
  )
))
#> database with 2 relations
#> 3 attributes: citer, citee, article
#> relation citation: citer, citee; 5 records
#>   key 1: citer, citee
#> relation article: article; 10 records
#>   key 1: article
#> references:
#> citation.{citer} -> article.{article}
#> citation.{citee} -> article.{article}

# inserting data
insert(db, data.frame(a = 1L, b = 2L, c = 3L, d = 4L))
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 1 record
#>   key 1: a
#> relation b: b, c; 1 record
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
# data is only inserted into relations where all columns are given...
insert(db, data.frame(a = 1L, b = 2L, c = 3L))
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 1 record
#>   key 1: a
#> relation b: b, c; 1 record
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
# and that are listed in relations argument
insert(
  db,
  data.frame(a = 1L, b = 2L, c = 3L, d = 4L),
  relations = "b"
)
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 1 record
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
# inserted data can't violate keys
if (FALSE) { # \dontrun{
  insert(
    db,
    data.frame(a = 1L, b = 1:2)
  )
} # }
# inserted data can't violate foreign key references
if (FALSE) { # \dontrun{
  insert(
    db,
    data.frame(a = 1L, b = 2L, c = 3L, d = 4L),
    relations = "a"
  )
} # }

# vector operations
db2 <- database(
  relation(
    list(
      e = list(df = data.frame(a = 1:5, e = 6:10), keys = list("e"))
    ),
    attrs_order = c("a", "e")
  ),
  list()
)
c(db, db2) # attrs_order attributes are merged
#> database with 3 relations
#> 5 attributes: a, b, c, d, e
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> relation e: e, a; 5 records
#>   key 1: e
#> references:
#> a.{b} -> b.{b}
unique(c(db, db))
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}

# subsetting
db[1]
#> database with 1 relation
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> no references
stopifnot(identical(db[[1]], db[1]))
db[c(1, 2, 1, 2)] # replicates the foreign key references
#> database with 4 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> relation a.1: a, b; 0 records
#>   key 1: a
#> relation b.1: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
#> a.{b} -> b.1.{b}
#> a.1.{b} -> b.{b}
#> a.1.{b} -> b.1.{b}
c(db[c(1, 2)], db[c(1, 2)]) # doesn't reference between separate copies of db
#> database with 4 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> relation a.1: a, b; 0 records
#>   key 1: a
#> relation b.1: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}
#> a.1.{b} -> b.1.{b}
unique(db[c(1, 2, 1, 2)]) # unique() also merges references
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c; 0 records
#>   key 1: b
#>   key 2: c
#> references:
#> a.{b} -> b.{b}

# another example of unique() merging references
db_merge <- database(
  relation(
    list(
      a = list(
        df = data.frame(a = logical(), b = logical()),
        keys = list("a")
      ),
      b = list(
        df = data.frame(b = logical(), c = logical(), d = logical()),
        keys = list("b")
      ),
      c_d = list(
        df = data.frame(c = logical(), d = logical(), e = logical()),
        keys = list(c("c", "d"))
      ),
      a.1 = list(
        df = data.frame(a = logical(), b = logical()),
        keys = list("a")
      ),
      b.1 = list(
        df = data.frame(b = logical(), c = logical(), d = logical()),
        keys = list("b")
      )
    ),
    c("a", "b", "c", "d", "e")
  ),
  list(
    list("a", "b", "b", "b"),
    list("b.1", c("c", "d"), "c_d", c("c", "d"))
  )
)
print(db_merge)
#> database with 5 relations
#> 5 attributes: a, b, c, d, e
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c, d; 0 records
#>   key 1: b
#> relation c_d: c, d, e; 0 records
#>   key 1: c, d
#> relation a.1: a, b; 0 records
#>   key 1: a
#> relation b.1: b, c, d; 0 records
#>   key 1: b
#> references:
#> a.{b} -> b.{b}
#> b.1.{c, d} -> c_d.{c, d}
unique(db_merge)
#> database with 3 relations
#> 5 attributes: a, b, c, d, e
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: b, c, d; 0 records
#>   key 1: b
#> relation c_d: c, d, e; 0 records
#>   key 1: c, d
#> references:
#> a.{b} -> b.{b}
#> b.{c, d} -> c_d.{c, d}

# reassignment
# can't change keys included in references
if (FALSE) keys(db)[[2]] <- list("c") # \dontrun{}
# can't remove attributes included in keys
if (FALSE) attrs(db)[[2]] <- list("c", "d") # \dontrun{}
# can't remove attributes included in references
if (FALSE) attrs(db)[[1]] <- c("a", "d") # \dontrun{}
db3 <- db
# can change subset of schema, but loses references between altered and
# non-altered subsets
db3[2] <- database(
  relation(
    list(d = list(df = data.frame(d = logical(), c = logical()), keys = list("d"))),
    attrs_order(db3)
  ),
  list()
)
print(db3) # note the schema's name doesn't change
#> database with 2 relations
#> 4 attributes: a, b, c, d
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: d, c; 0 records
#>   key 1: d
#> no references
# names(db3)[2] <- "d" # this would change the name
keys(db3)[[2]] <- list(character()) # removing keys first...
# for a database_schema, we could then change the attrs for
# the second database. For a created relation, this is not
# allowed.
if (FALSE) { # \dontrun{
  attrs(db3)[[2]] <- c("b", "c")
  names(records(db3)[[2]]) <- c("b", "c")
} # }

# changing appearance priority for attributes
attrs_order(db3) <- c("d", "c", "b", "a")
print(db3)
#> database with 2 relations
#> 4 attributes: d, c, b, a
#> relation a: a, b; 0 records
#>   key 1: a
#> relation b: d, c; 0 records
#>   key 1: 
#> no references

# changing relation schema names changes them in references
names(db3) <- paste0(names(db3), "_long")
print(db3)
#> database with 2 relations
#> 4 attributes: d, c, b, a
#> relation a_long: a, b; 0 records
#>   key 1: a
#> relation b_long: d, c; 0 records
#>   key 1: 
#> no references

# reconstructing from components
db_recon <- database(
  relation(
    Map(list, df = records(db), keys = keys(db)),
    attrs_order(db)
  ),
  references(db)
)
stopifnot(identical(db_recon, db))
db_recon2 <- database(
  subrelations(db),
  references(db)
)
stopifnot(identical(db_recon2, db))

# can be a data frame column
data.frame(id = 1:2, relation = db)
#>   id               relation
#> 1  1 relation a (0 records)
#> 2  2 relation b (0 records)