234 lines
6.0 KiB
Factor
234 lines
6.0 KiB
Factor
! Copyright (C) 2020 Michael Raitza
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
!
|
|
! * Crit-bit trees
|
|
! ** Rationale
|
|
! Critbit trees are described in [[https://cr.yp.to/critbit.html][djb's crit-bit tree]]. They are an evolution of
|
|
! PATRICIA trees showing that fast insertion, deletion, exact searching and suffix
|
|
! searching is possible with this data structure.
|
|
|
|
! The strength of this data structure, according to its author, lies in its
|
|
! simple design and its optimisation to be machine parsable using machine
|
|
! word-sized operations where possible. Like PATRICIA trees, crit-bit trees are
|
|
! prefix-compressed, with internal nodes storing next decision point (the
|
|
! critical bit) in a length field (encoded as an integer and a mask) and two
|
|
! successor pointers. Arbitrary data objects make up its leaves.
|
|
|
|
USING: accessors arrays assocs fry kernel math namespaces sequences serialize
|
|
trees trees.private ;
|
|
IN: trees.cb
|
|
|
|
TUPLE: cb < tree ;
|
|
|
|
: <cb> ( -- tree ) cb new-tree ; inline
|
|
|
|
<PRIVATE
|
|
|
|
TUPLE: cb-node { byte# integer } { bits integer } left right ;
|
|
|
|
: new-node ( byte# bits class -- node )
|
|
new
|
|
swap >>bits
|
|
swap >>byte# ; inline
|
|
|
|
: <cb-node> ( byte# bits -- node )
|
|
cb-node new-node ;
|
|
|
|
! -1 = left
|
|
! 1 = right
|
|
: key-side ( bits byte -- side )
|
|
bitor 1 + -8 shift 0 = -1 1 ? ;
|
|
|
|
! Produce a byte with all bits set except the msb from bits*.
|
|
! See MAGIC Algorithms for rationale.
|
|
: msb0 ( bits* -- bits )
|
|
dup -1 shift bitor
|
|
dup -2 shift bitor
|
|
dup -4 shift bitor
|
|
dup -1 shift bitnot bitand 255 bitxor ;
|
|
|
|
! Calculate the direction and the critical bit for the differing byte.
|
|
: byte-diff ( newbyte oldbyte -- side bits )
|
|
swap over
|
|
bitxor msb0
|
|
[ key-side ] keep ;
|
|
|
|
! For two byte strings, calculate the critical bit, byte and direction of
|
|
! difference (0 = left, 1 = right).
|
|
: bytes-diff ( newbytes oldbytes -- side bits byte# )
|
|
2dup mismatch
|
|
[
|
|
[ '[ _ swap nth ] bi@ byte-diff ] keep
|
|
] [
|
|
! Equal prefix over full (shorter) byte sequence.
|
|
[ 1 255 ] 2dip shorter length 1 -
|
|
] if* ;
|
|
|
|
! Keep the byte sequence of the current key in =key-bytes= and provide a working
|
|
! environment for it with =with-key=.
|
|
SYMBOL: key-bytes
|
|
SYMBOL: current-key
|
|
SYMBOL: new-side
|
|
|
|
: key>bytes ( key -- bytes )
|
|
object>bytes ;
|
|
|
|
: with-key ( key quot -- )
|
|
[
|
|
{ current-key key-bytes new-side } swap
|
|
dup key>bytes left 3array zip
|
|
] dip with-variables ; inline
|
|
|
|
! Extract the critical byte
|
|
: byte-at ( byte# -- byte/0 )
|
|
key-bytes get ?nth [ 0 ] unless* ;
|
|
|
|
! For the current key and cb-node determin which side to go next
|
|
: select-side ( node -- node side )
|
|
dup [ bits>> ] [ byte#>> ] bi
|
|
byte-at key-side ;
|
|
|
|
! ** Insertion
|
|
|
|
! Tree insertion must be done by traversing the tree from the root, as it is
|
|
! ordered.
|
|
|
|
! ! Insert a node at a leaf-node by first inserting a split node.
|
|
! : cb-insert ( value leaf-node -- node created? )
|
|
! [ current-key get swap <node> ] dip
|
|
! dup key>> key>bytes key-bytes get swap
|
|
! bytes-diff swap <cb-node>
|
|
! ! ( new-leaf old-leaf side cb-node -- )
|
|
! [ swap
|
|
! [ [ set-node-link ] keep
|
|
! set-node+link
|
|
! ] with-side
|
|
! ] keep t ;
|
|
|
|
! DEFER: cb-set
|
|
|
|
! ! Insert a node into a non-empty tree.
|
|
! : (cb-set) ( value parent node -- parent node created? )
|
|
! dup cb-node? [
|
|
! select-side [
|
|
! [ node-link (cb-set) ] keep
|
|
! swap [ [ set-node-link ] keep ] dip
|
|
! ] with-side
|
|
! ] [
|
|
! cb-insert
|
|
! ] if ;
|
|
|
|
! ! Insert a node into the tree
|
|
! : cb-set ( value node -- node created? )
|
|
! [ dup (cb-set) ] [ current-key get swap <node> t ] if* ;
|
|
|
|
! ** Walking the tree for the best fit
|
|
|
|
! We need to remember the parent node as it will be the split node where our new
|
|
! value starts to differ, should it not be (key-)identical with the current
|
|
! node. Start with =f= as the parent.
|
|
|
|
GENERIC: cb-best-fit ( parent/f node -- node child )
|
|
|
|
M: f cb-best-fit ;
|
|
|
|
M: node cb-best-fit ;
|
|
|
|
M: cb-node cb-best-fit
|
|
select-side [
|
|
nip dup node-link cb-best-fit
|
|
] with-side ;
|
|
|
|
GENERIC: cb-update ( value parent/f node -- node created? )
|
|
|
|
M: f cb-update
|
|
2drop current-key get swap <node> t ;
|
|
|
|
: attach-node ( value side cb-node -- cb-node )
|
|
swap [
|
|
[ current-key get swap <node> ] dip
|
|
[ set-node+link ] keep
|
|
] with-side ;
|
|
|
|
M: node cb-update
|
|
dup key>> current-key get = [
|
|
current-key get >>key
|
|
nip swap >>value f
|
|
] [
|
|
dup key>> key>bytes key-bytes get swap
|
|
bytes-diff swap <cb-node>
|
|
attach-node t
|
|
] if ;
|
|
|
|
GENERIC: cb-insert ( new-node cur-node -- node )
|
|
|
|
M: f cb-insert drop ;
|
|
|
|
M: node cb-insert
|
|
new-side get [
|
|
swap [ set-node-link ] keep
|
|
] with-side ;
|
|
|
|
M: cb-node cb-insert
|
|
2dup
|
|
[ [ [ byte#>> ] bi@ < ]
|
|
[ [ [ byte#>> ] bi@ = ]
|
|
[ [ bits>> ] bi@ <]
|
|
] 2&&
|
|
] 2|| [
|
|
! break
|
|
select-side [ set-node-link ] with-side
|
|
] [
|
|
! step down
|
|
] if
|
|
|
|
|
|
PRIVATE>
|
|
|
|
M: cb set-at ( value key cb -- )
|
|
[ swap
|
|
[ ! ( value root -- new created? root )
|
|
[ f swap cb-best-fit cb-update ] keep swap
|
|
[ cb-insert t ] [ 2nip f ] if
|
|
] with-key swap ] change-root
|
|
swap [ dup inc-count ] when drop ;
|
|
|
|
! ** Deletion
|
|
|
|
<PRIVATE
|
|
|
|
GENERIC: cb-delete ( node -- node deleted? )
|
|
|
|
M: f cb-delete f ;
|
|
|
|
M: node cb-delete
|
|
dup key>> current-key get = [ drop f t ] [ f ] if ;
|
|
|
|
M: cb-node cb-delete ( node -- node deleted? )
|
|
select-side [
|
|
dup node-link dup cb-delete [
|
|
! ( node old-child new-child -- )
|
|
[
|
|
! ( node old new )
|
|
! Deleted a split node (received some node)
|
|
tuck eq? [
|
|
drop t
|
|
] [
|
|
swap tuck set-node-link
|
|
t
|
|
] if
|
|
] [
|
|
! Deleted a leaf node return other child.
|
|
drop
|
|
node+link t
|
|
] if*
|
|
f
|
|
] when drop
|
|
] with-side ;
|
|
|
|
PRIVATE>
|
|
|
|
M: cb delete-at ( key cb -- )
|
|
[ swap [ cb-delete ] with-key swap ] change-root
|
|
swap [ dup dec-count ] when drop ;
|