Suppliers (S) S# SNAME STATUS CITY
S1 Smith 20 London
S2 Jones 10 Paris
S3 Blake 30 Paris
S4 Clark 20 London
S5 Adams 30 Athens
Each file is a flat table with rows and columns.
Problem 1: direct access
How to get to a specific row (say for S4) quickly?
Problem 2: sequential access
How to get to a range of rows quickly? (Blake, Clark, ..., Jones)
B-Trees: Index Mechanisms [74]
Direct
sql_select all from S where 'S.S#=="S3"'
Range
sql_select all from S
where '(S.CITY>="London")&&(S.CITY<="Paris")'
Problem
How to perform these queries efficiently?
How to avoid reading the entire file?
Solution
Direct access: index of keys in the table
Index: B-Tree
Sequential access: inverted file for ranges
Inverted file: sorted on a column
Index-sequential: B+Tree
B+Tree and Inverted File [75]
B-Tree and B+Tree [76]
B-Tree
balanced tree (all leaves have the same depth)
hierarchy of pages
each page has a sorted list of keys
between each pair of keys is a pointer
pointer is to pages with intermediate key values
Order of B-Tree is m: branch by m/2..m pointers
branching by m/2 yields log_m/2(#keys) depth
10,000,000 records with m=20: depth=7
find, insert, delete: O(log(#keys))
insert maintains balance and sorting
delete maintains balance and sorting
B+Tree
all keys are at the leaves
index portion: roadmap to keys at the leaves
leaves of tree are linked for sequential access
B-Tree: Find, Read, Next [77]
Find(key)
direct access
start at root
binary search of page for closest key
take associated index pointer and repeat
if key found: stop (B+Tree: continue to leaf)
O(log(#keys))
Read(data)
take associated data pointer to get the record
Next()
sequential access
use a B+Tree
Find(startkey) for beginning of search
return next key in page
or take sequential pointer to next page
O(1)
B-Tree: Insert(key) [78]
Insert(key)
Find(key) to locate new position
move right neighbors within page
if overflow split into 2 pages
promote middle key to parent page
if parent overflows, repeat
tree only grows at the root
maintains balance
O(log(#keys))
B-Tree: Find, Insert [79]
Order: 5 (max 5 children/node; max 4 keys/node; min 2 keys/node)
Find: h, x
Insert: u (OK)
B-Tree: Insert (Overflow) [80]
Insert: p (Overflow)
Split; Promote m; Overflow
Split; Promote j; New Root
B-Tree: btree [81]
% gcc -o btree btree.c > tcc btree.c % btree > h h(elp; q(uit; i(nsert x; d(elete x; f(ind x; p(rint; A(scending; Descending; L(eaves; r(ecursive
Data File with Test Cases: btree.dat
i a i g i f i b i k i d i h i m i j i e i s i i i r i x i c i l i n i t i u i p
p d h p
L
0 [j]
__________|___________
/ \
1 [c,f] [m,r]
_______|_______ ________|________
/ / \ / / \
2 [a,b] [d,e] [g,h,i] [k,l] [n,p] [s,t,u,x]
L
0 [j]
__________|__________
/ \
1 [c,f] [m,r]
______|______ ________|________
/ / \ / / \
2 [a,b] [d,e] [g,i] [k,l] [n,p] [s,t,u,x]
B-Tree: Depth-First Search (DFS) [82]
void DFS(Node_type *p, int depth) { typedef char Key_type;
int i; typedef struct node_tag{
if (p != NULL) { int count;
if (depth > levels) levels = depth; Key_type key[MAX+1];
ptrs[depth][cnt[depth]] = p; struct node_tag *branch[MAX+1];
cnt[depth]++; }Node_type;
for (i=0; i<=p->count; i++)
DFS(p->branch[i],depth+1); /*NOTE: p->key[0] is never used*/
}
}
DFS(root,0);
L
0 [j] cnt[0] =
__________|__________
/ \
1 [c,f] [m,r] cnt[1] =
______|______ ________|________
/ / \ / / \
2 [a,b] [d,e] [g,i] [k,l] [n,p] [s,t,u,x] cnt[2] =
B-Tree: Exercises btree [83]
% btree btree.dat
[f,j,m,t]
______________|______________
/ / \ \ \
[a,b,c,e] [g,i] [k,l] [n,s] [u,x]
ASCENDING ORDER:
a b c e f g i j k l m n s t u x
DESCENDING ORDER:
x u t s n m l k j i g f e c b a
RECURSIVE FINDMIN: a
/* Ex1 : print keys in INCREASING order using recursion like DFS */
void ascending(Node_type *p) { }
/* Ex2 : print keys in DECREASING order using recursion like DFS */
void descending(Node_type *p) { }
/* Ex6 : find the minimum key using RECURSION. If empty, return ' ' */
Key_type recursive_find_min(Node_type *p) {
return ' ';
}
B-Tree: Insert [84]
/* inserts newkey into the B-Tree with the given root;
requires that newkey is not already present in the tree */
Node_type *Insert(Key_type newkey, Node_type *root) {
Key_type x; /* node to be reinserted as new root */
Node_type *xr; /* subtree on right of x */
Node_type *p; /* pointer for temporary use */
Bool pushup; /* Has the height of the tree increased? */
pushup = PushDown(newkey,root,&x,&xr);
if (pushup) { /* Tree grows in height. */
/* Make a new root: */
p = (Node_type *)malloc(sizeof(Node_type));
p->count = 1;
p->key[1] = x;
p->branch[0] = root;
p->branch[1] = xr;
return p;
}
return root;
}
B-Tree: PushDown [85]
/* recursively move down tree searching for newkey. */
Bool PushDown(Key_type newkey,Node_type *p,Key_type *x, Node_type **xr)
{
int k; /* branch on which to continue the search */
if (p == NULL) { /* cannot insert into empty tree; terminates */
*x = newkey;
*xr = NULL;
return TRUE;
} else { /* Search the current node. */
if (SeqSearch(newkey,p,&k))
Error("inserting duplicate key");
if (PushDown(newkey,p->branch[k],x,xr))
/* Reinsert median key. */
if (p->count < MAX) {
PushIn(*x,*xr,p,k);
return FALSE;
} else {
Split(*x,*xr,p,k,x,xr);
return TRUE;
}
return FALSE;
}
}
B-Tree: SeqSearch, PushIn [86]
/* searches keys in node p for target;
returns location k of target, or branch on which to continue search */
Bool SeqSearch(Key_type target,Node_type *p, int *k) {
if (target < p->key[1]) {
*k = 0;
return FALSE;
} else { /* Ex4 : replace this code with BinSearch */
*k = p->count;
while ((target<p->key[*k]) && *k > 1){
(*k)--; step++;
}
return (target==p->key[*k]);
} }
/* inserts key x and pointer xr into node p at position k;
requires that the node was not previously full. */
void PushIn(Key_type x,Node_type *xr,Node_type *p, int k) {
int i; /* index to move keys to make room for x */
for (i = p->count; i > k; i--) {
p->key[i+1] = p->key[i];
p->branch[i+1] = p->branch[i];
}
p->key[k+1] = x;
p->branch[k+1] = xr;
p->count++;
}
B-Tree: Split [87]
/* splits node *p with key x and pointer xr at position k into
nodes *p and *yr with median key y */
void Split(Key_type x, Node_type *xr, Node_type *p, int k, Key_type *y,
Node_type **yr) {
int median, i;
if (k <= MIN)
median = MIN;
else
median = MIN + 1;
/* Get a new node and put it on the right. */
*yr = (Node_type *)malloc(sizeof(Node_type));
for (i = median+1; i <= MAX; i++) { /* Move half the keys. */
(*yr)->key[i-median] = p->key[i];
(*yr)->branch[i-median] = p->branch[i];
}
(*yr)->count = MAX - median;
p->count = median;
if (k <= MIN) /* Push in the new key. */
PushIn(x,xr,p,k);
else
PushIn(x,xr,*yr,k - median);
*y = p->key[p->count];
(*yr)->branch[0] = p->branch[p->count];
p->count--;
}
B-Tree: Binary Search [88]
Given a sorted array, search for key x=42 in the array.
At each step, try the middle position between left and right.
If key = middle value, then stop (FOUND).
If key > than middle value, then left = middle + 1.
If key < than middle value, then right = middle - 1.
If search space disappears, then stop (NOT_FOUND)
B-Tree: Binary Search Analysis [89]
Cut the Search Space by ONE-HALF at each step.
| Step | Search Space | Example |
| 0 | n | 32 |
| 1 | n/2 | 16 |
| 2 | n/4 | 8 |
| 3 | n/8 | 4 |
| 4 | n/16 | 2 |
| ... | ... | ... |
| y | n/(2**y) | 1 |
Search Space goes to 1: 2**y = n implies y = log_2 n
Worst case: O( )
Sequential Search: O( )
B-Tree: Binary Search bsearch [90]
% gcc -o bsearch bsearch.c > tcc bsearch.c
Data File with Test Case (n, array, key): bsearch.dat
16 10 34 8 64 51 32 21 17 22 7 14 42 33 25 6 45 42 % bsearch bsearch.dat left mid right a[mid] 1 8 16 22 9 12 16 34 13 14 16 45 13 13 13 42 key=42 found at position=13 in array: 6 7 8 10 14 17 21 22 25 32 33 34 42 45 51 64
B-Trees: Binary Search bsearch.c [91]
int binary_search(input_type a[], input_type x, unsigned int n) {
int left, mid, right;
left = 1; right = n;
while (left <= right) {
mid = (left + right) / 2;
if (a[mid] < x)
left = mid + 1;
else
if (a[mid] > x)
right = mid - 1;
else
return(mid); /* found */
}
return(-1);
}
/* Ex4 : BINARY SEARCH instead of sequential search using recursion */
compute middle
if left > right then return FALSE /* stop the recursion */
else if key is found then return TRUE
else if key is in left region then
make a recursive call for the left region
else make a recursive call for the right region
B-Tree: Delete(key) [92]
Delete(key)
Find(key)
if not on leaf, swap with successor on leaf
delete from leaf
if at least m/2 keys then stop else underflow
if neighbor has enough keys, borrow keys
else combine with neighbor
if parent underflows, repeat
tree only shrinks at the root
maintains balance
O(log(#keys))
B-Tree: Delete [93]
Delete: h (OK)
Delete: r (Not on leaf; Swap with successor on leaf s; Delete OK)
B-Tree: Delete (Underflow) [94]
Delete: p (Underflow)
Borrow from neighbor; Moveleft s t
B-Tree: Delete (Combine) [95]
Delete: d (Underflow; Cannot borrow)
Combine a b c e; Remove old page; Underflow
Combine f j m t; Remove old root
B-Tree: Delete [96]
/* deletes the key target from the B-tree with the given root */
Node_type *Delete(Key_type target, Node_type *root) {
Node_type *p,*t; /* used to dispose of an empty root */
t = root;
if (!RecDelete(target, t))
Error("Target was not in the B-tree.");
else
if (root->count == 0) { / *root is empty. */
p = root;
root = root->branch[0];
free(p);
}
return root;
}
B-Tree: RecDelete [97]
/* look for target to delete */
Bool RecDelete(Key_type target,Node_type *p) {
int k; /* location of target or of branch on which to search */
Bool found;
if (p==NULL)
return FALSE; /*Hitting an empty tree is an error */
else{
if ((found=SeqSearch(target,p,&k)))
if (p->branch[k-1]){ /* test for NULL??? */
Successor(p,k); /*replaces key[k] by its successor */
if (!(found=RecDelete(p->key[k],p->branch[k])))
/* We know that the new key[k] is in the leaf. */
Error("Key not found.");
}else
Remove(p,k); /*removes key from position k of *p */
else /*Target was not found in current node.*/
found=RecDelete(target,p->branch[k]);
/* At this point, the function has returned from a recursive call.*/
if (p->branch[k] != NULL)
if (p->branch[k]->count<MIN)
Restore(p,k);
return found;
}
}
B-Tree: Successor, Remove [98]
/* replaces p->key[k] by its immediate successor under natural order */
void Successor(Node_type *p, int k) {
Node_type *q; /* used to move down the tree to a leaf */
for (q=p->branch[k]; q->branch[0]; q=q->branch[0])
;
p->key[k]=q->key[1];
}
/* removes key[k] and branch[k] from *p */
void Remove(Node_type *p,int k) {
int i; /* index to move entries */
for (i=k+1; i<=p->count; i++){
p->key[i-1] = p->key[i];
p->branch[i-1] = p->branch[i];
}
p->count--;
}
B-Tree: Restore [99]
/* finds a key and inserts it into p->branch[k] */
void Restore(Node_type *p, int k) {
if (k==0) /* case: leftmost key */
if (p->branch[1]->count >MIN)
MoveLeft(p,1);
else
Combine(p,1);
else if (k==p->count) /* case: rightmost key */
if (p->branch[k-1]->count >MIN)
MoveRight(p,k);
else
Combine(p,k);
else if (p->branch[k-1]->count>MIN) /*remaining cases */
MoveRight(p,k);
else if (p->branch[k+1]->count>MIN)
MoveLeft(p,k+1);
else
Combine(p,k);
}
B-Tree: MoveLeft [100]
/* move a key to the left. */
void MoveLeft(Node_type *p,int k) {
int c;
Node_type *t;
/*Move key from parent into left node. */
t = p->branch[k-1];
t->count++;
t->key[t->count] = p->key[k];
t->branch[t->count] = p->branch[k]->branch[0];
/* Move key from right node into parent. */
t = p->branch[k];
p->key[k] = t->key[1];
t->branch[0] = t->branch[1];
t->count--;
for (c=1; c<=t->count; c++){
/* Shift all keys in right node one position leftward. */
t->key[c] = t->key[c+1];
t->branch[c] = t->branch[c+1];
}
}
B-Tree: MoveRight [101]
/* move a key to the right. */
void MoveRight(Node_type *p,int k) {
int c;
Node_type *t;
t = p->branch[k];
for (c=t->count; c>0; c--){
/* Shift all keys in the right node one position. */
t->key[c+1]=t->key[c];
t->branch[c+1]=t->branch[c];
}
t->branch[1] = t->branch[0]; /* Move key from parent to right node. */
t->count++;
t->key[1]=p->key[k];
t=p->branch [k-1]; /* Move last key of left node into parent. */
p->key[k]=t->key[t->count];
p->branch[k]->branch[0]=t->branch[t->count];
t->count--;
}
B-Tree: Combine [102]
/* combine adjacent nodes. */
void Combine(Node_type *p,int k) {
int c;
Node_type *q; /*points to the right nodei, which will be emptied and deleted*/
Node_type *l;
q = p->branch[k];
l = p->branch[k-1]; /* Work with the left node. */
l->count++; /* Insert the key from the parent. */
l->key[l->count] = p->key[k];
l->branch[l->count] = q->branch[0];
for (c=1; c<=q->count; c++){ /* Insert all keys from right node.*/
l->count++;
l->key[l->count] = q->key[c];
l->branch[l->count] = q->branch[c];
}
for (c=k; c<p->count; c++){ /* Delete key from parent node. */
p->key[c] = p->key[c+1];
p->branch[c] = p->branch[c+1];
}
p->count--;
free(q); /* Dispose of the empty right node. */
}