1
0
Fork 0
critbit/trees/cb/cb.factor

234 lines
6.0 KiB
Factor

! Copyright (C) 2020 Michael Raitza
! See http://factorcode.org/license.txt for BSD license.
!
! * Crit-bit trees
! ** Rationale
! Critbit trees are described in [[https://cr.yp.to/critbit.html][djb's crit-bit tree]]. They are an evolution of
! PATRICIA trees showing that fast insertion, deletion, exact searching and suffix
! searching is possible with this data structure.
! The strength of this data structure, according to its author, lies in its
! simple design and its optimisation to be machine parsable using machine
! word-sized operations where possible. Like PATRICIA trees, crit-bit trees are
! prefix-compressed, with internal nodes storing next decision point (the
! critical bit) in a length field (encoded as an integer and a mask) and two
! successor pointers. Arbitrary data objects make up its leaves.
USING: accessors arrays assocs fry kernel math namespaces sequences serialize
trees trees.private ;
IN: trees.cb
TUPLE: cb < tree ;
: <cb> ( -- tree ) cb new-tree ; inline
<PRIVATE
TUPLE: cb-node { byte# integer } { bits integer } left right ;
: new-node ( byte# bits class -- node )
new
swap >>bits
swap >>byte# ; inline
: <cb-node> ( byte# bits -- node )
cb-node new-node ;
! -1 = left
! 1 = right
: key-side ( bits byte -- side )
bitor 1 + -8 shift 0 = -1 1 ? ;
! Produce a byte with all bits set except the msb from bits*.
! See MAGIC Algorithms for rationale.
: msb0 ( bits* -- bits )
dup -1 shift bitor
dup -2 shift bitor
dup -4 shift bitor
dup -1 shift bitnot bitand 255 bitxor ;
! Calculate the direction and the critical bit for the differing byte.
: byte-diff ( newbyte oldbyte -- side bits )
swap over
bitxor msb0
[ key-side ] keep ;
! For two byte strings, calculate the critical bit, byte and direction of
! difference (0 = left, 1 = right).
: bytes-diff ( newbytes oldbytes -- side bits byte# )
2dup mismatch
[
[ '[ _ swap nth ] bi@ byte-diff ] keep
] [
! Equal prefix over full (shorter) byte sequence.
[ 1 255 ] 2dip shorter length 1 -
] if* ;
! Keep the byte sequence of the current key in =key-bytes= and provide a working
! environment for it with =with-key=.
SYMBOL: key-bytes
SYMBOL: current-key
SYMBOL: new-side
: key>bytes ( key -- bytes )
object>bytes ;
: with-key ( key quot -- )
[
{ current-key key-bytes new-side } swap
dup key>bytes left 3array zip
] dip with-variables ; inline
! Extract the critical byte
: byte-at ( byte# -- byte/0 )
key-bytes get ?nth [ 0 ] unless* ;
! For the current key and cb-node determin which side to go next
: select-side ( node -- node side )
dup [ bits>> ] [ byte#>> ] bi
byte-at key-side ;
! ** Insertion
! Tree insertion must be done by traversing the tree from the root, as it is
! ordered.
! ! Insert a node at a leaf-node by first inserting a split node.
! : cb-insert ( value leaf-node -- node created? )
! [ current-key get swap <node> ] dip
! dup key>> key>bytes key-bytes get swap
! bytes-diff swap <cb-node>
! ! ( new-leaf old-leaf side cb-node -- )
! [ swap
! [ [ set-node-link ] keep
! set-node+link
! ] with-side
! ] keep t ;
! DEFER: cb-set
! ! Insert a node into a non-empty tree.
! : (cb-set) ( value parent node -- parent node created? )
! dup cb-node? [
! select-side [
! [ node-link (cb-set) ] keep
! swap [ [ set-node-link ] keep ] dip
! ] with-side
! ] [
! cb-insert
! ] if ;
! ! Insert a node into the tree
! : cb-set ( value node -- node created? )
! [ dup (cb-set) ] [ current-key get swap <node> t ] if* ;
! ** Walking the tree for the best fit
! We need to remember the parent node as it will be the split node where our new
! value starts to differ, should it not be (key-)identical with the current
! node. Start with =f= as the parent.
GENERIC: cb-best-fit ( parent/f node -- node child )
M: f cb-best-fit ;
M: node cb-best-fit ;
M: cb-node cb-best-fit
select-side [
nip dup node-link cb-best-fit
] with-side ;
GENERIC: cb-update ( value parent/f node -- node created? )
M: f cb-update
2drop current-key get swap <node> t ;
: attach-node ( value side cb-node -- cb-node )
swap [
[ current-key get swap <node> ] dip
[ set-node+link ] keep
] with-side ;
M: node cb-update
dup key>> current-key get = [
current-key get >>key
nip swap >>value f
] [
dup key>> key>bytes key-bytes get swap
bytes-diff swap <cb-node>
attach-node t
] if ;
GENERIC: cb-insert ( new-node cur-node -- node )
M: f cb-insert drop ;
M: node cb-insert
new-side get [
swap [ set-node-link ] keep
] with-side ;
M: cb-node cb-insert
2dup
[ [ [ byte#>> ] bi@ < ]
[ [ [ byte#>> ] bi@ = ]
[ [ bits>> ] bi@ <]
] 2&&
] 2|| [
! break
select-side [ set-node-link ] with-side
] [
! step down
] if
PRIVATE>
M: cb set-at ( value key cb -- )
[ swap
[ ! ( value root -- new created? root )
[ f swap cb-best-fit cb-update ] keep swap
[ cb-insert t ] [ 2nip f ] if
] with-key swap ] change-root
swap [ dup inc-count ] when drop ;
! ** Deletion
<PRIVATE
GENERIC: cb-delete ( node -- node deleted? )
M: f cb-delete f ;
M: node cb-delete
dup key>> current-key get = [ drop f t ] [ f ] if ;
M: cb-node cb-delete ( node -- node deleted? )
select-side [
dup node-link dup cb-delete [
! ( node old-child new-child -- )
[
! ( node old new )
! Deleted a split node (received some node)
tuck eq? [
drop t
] [
swap tuck set-node-link
t
] if
] [
! Deleted a leaf node return other child.
drop
node+link t
] if*
f
] when drop
] with-side ;
PRIVATE>
M: cb delete-at ( key cb -- )
[ swap [ cb-delete ] with-key swap ] change-root
swap [ dup dec-count ] when drop ;