1
0
Fork 0
critbit/trees/cb/cb.factor

302 lines
7.8 KiB
Factor

! Copyright (C) 2020 Michael Raitza
! See http://factorcode.org/license.txt for BSD license.
!
! * Crit-bit trees
! ** Rationale
! Critbit trees are described in [[https://cr.yp.to/critbit.html][djb's crit-bit tree]]. They are an evolution of
! PATRICIA trees showing that fast insertion, deletion, exact searching and suffix
! searching is possible with this data structure.
! The strength of this data structure, according to its author, lies in its
! simple design and its optimisation to be machine parsable using machine
! word-sized operations where possible. Like PATRICIA trees, crit-bit trees are
! prefix-compressed, with internal nodes storing next decision point (the
! critical bit) in a length field (encoded as an integer and a mask) and two
! successor pointers. Arbitrary data objects make up its leaves.
USING: accessors alien arrays assocs byte-arrays combinators
combinators.short-circuit fry io.binary io.encodings.binary io.encodings.private
io.encodings.string io.encodings.utf8 kernel layouts locals make math math.order
math.private namespaces parser prettyprint.custom sequences sequences.private
serialize strings trees trees.private vectors ;
IN: trees.cb
TUPLE: cb < tree ;
: <cb> ( -- tree ) cb new-tree ; inline
<PRIVATE
TUPLE: cb-node { byte# integer } { bits fixnum } left right ;
: new-node ( byte# bits class -- node )
new
swap >>bits
swap >>byte# ; inline
: <cb-node> ( bits byte# -- node )
swap cb-node new-node ;
: key-side ( bits byte -- side )
bitor 1 + -8 shift 0 = left right ? ;
! Produce a byte with all bits set except the msb from bits*.
! See MAGIC Algorithms for rationale.
: msb0 ( bits* -- bits )
dup -1 shift bitor
dup -2 shift bitor
dup -4 shift bitor
dup -1 shift bitnot bitand 255 bitxor ;
! Calculate the direction and the critical bit for the differing byte.
: byte-diff ( newbyte oldbyte -- side bits )
swap over
bitxor msb0
[ key-side ] keep ;
: nth0 ( n seq -- elt/0 )
?nth [ 0 ] unless* ;
: 2nth0 ( n seq1 seq2 -- elt1/0 elt2/0 )
[ nth0 ] bi-curry@ bi ;
! For two byte strings, calculate the critical bit, byte and direction of
! difference. For meaningful results ensure that newbytes ≠ oldbytes
: bytes-diff ( newbytes oldbytes -- side bits byte# )
2dup mismatch
[
[ -rot 2nth-unsafe byte-diff ] keep
] [
[ min-length dup ] 2keep
2nth0 byte-diff rot
] if* ;
PRIVATE>
GENERIC: key>bytes* ( key -- bytes )
M: object key>bytes*
object>bytes ;
M: byte-array key>bytes* ;
M: string key>bytes* utf8 encode ;
M: fixnum key>bytes* cell >le ;
M: bignum key>bytes* dup (log2) 8 /i 1 + >le ;
! Assumes that a double is never larger than a pointer
M: float key>bytes* double>bits cell >le ;
<PRIVATE
: key>bytes ( key -- bytes )
key>bytes* ;
! Keep the byte sequence of the current key in =key-bytes= and provide a working
! environment for it with =with-key=.
SYMBOL: key-bytes
SYMBOL: current-key
SYMBOL: new-side
: with-key ( key quot -- )
[
{ current-key key-bytes new-side } swap
dup key>bytes left 3array zip
] dip with-variables ; inline
! Extract the critical byte
: byte-at ( byte# -- byte/0 )
key-bytes get nth0 ;
! For the current key and cb-node determin which side to go next
: select-side ( node -- node side )
dup [ bits>> ] [ byte#>> ] bi
byte-at key-side ;
! ** Insertion
! Tree insertion must be done by traversing the tree from the root, as it is
! ordered.
! ** Walking the tree for the best fit
GENERIC: cb-best-fit ( node -- node )
M: f cb-best-fit ;
M: node cb-best-fit ;
M: cb-node cb-best-fit
select-side [
node-link cb-best-fit
] with-side ;
GENERIC: cb-update ( value node -- node created? )
M: f cb-update
drop current-key get swap <node> t ;
! Attach a new leaf node and record =new-side=. New leaf node is attached
! opposite of =new-side=.
: attach-node ( value side cb-node -- cb-node )
swap [ new-side set ] keep
[
[ current-key get swap <node> ] dip
[ set-node+link ] keep
] with-side ;
! Update the tree by either updating a leaf node with a new key object and value
! or create a new split node and attach a fresh leaf node with the new key and
! value.
M: node cb-update
dup key>> key>bytes key-bytes get = [
current-key get >>key
swap >>value f
] [
[ key-bytes get ] dip key>> key>bytes
bytes-diff <cb-node>
attach-node t
] if ;
! Break off the search when:
! - the top node is no longer a split node
! - the top split node is larger than the new split node (i.e. refers to a later
! byte or more significant bit in the current byte)
: break? ( new-node node -- ? )
{ [ nip cb-node? not ]
[ [ byte#>> ] bi@ < ]
[ { [ [ byte#>> ] bi@ = ]
[ [ bits>> ] bi@ < ]
} 2&& ]
} 2|| ;
! Walk the tree and insert =new-node= at the pre-determined best place. We have
! to keep track of the parent instead of the current node, as we might need to
! relink from the parent to =new-node=.
GENERIC: cb-insert ( new-node parent -- )
M:: cb cb-insert ( n p -- )
p root>> :> c
n c break?
[ n p root<<
new-side get [
c n set-node-link
] with-side
]
[ n c cb-insert ] if ;
M:: cb-node cb-insert ( n p -- )
p select-side [
drop
p node-link :> c
n c break?
[ n p set-node-link
new-side get [
c n set-node-link
] with-side
]
[ n c cb-insert ] if
] with-side ;
M: cb set-at ( value key cb -- )
[ swap [
[ root>> cb-best-fit cb-update ]
[ swap [ cb-insert t ] [ 2drop f ] if ] bi
] with-key ] keep
swap [ dup inc-count ] when drop ;
! ** Deletion
GENERIC: cb-delete ( node -- node deleted? )
M: f cb-delete f ;
M: node cb-delete
dup key>> current-key get = [ drop f t ] [ f ] if ;
M: cb-node cb-delete ( node -- node deleted? )
select-side [
dup node-link dup cb-delete [
! ( node old-child new-child -- )
[
! ( node old new )
! Deleted a split node (received some node)
tuck eq? [
drop t
] [
swap tuck set-node-link
t
] if
] [
! Deleted a leaf node return other child.
drop
node+link t
] if*
f
] when drop
] with-side ;
M: cb delete-at ( key cb -- )
[ swap [ cb-delete ] with-key swap ] change-root
swap [ dup dec-count ] when drop ;
M: cb new-assoc
2drop <cb> ;
GENERIC: (cb-node>alist) ( node -- )
M: object (cb-node>alist) drop ;
M: node (cb-node>alist)
entry, ;
: cb-node>entry ( node -- entry ) [ byte#>> ] [ bits>> ] bi 2array ;
: cb-entry, ( node -- ) cb-node>entry , ;
M: cb-node (cb-node>alist)
[ left>> (cb-node>alist) ]
[ cb-entry, ]
[ right>> (cb-node>alist) ]
tri ;
M: cb >alist
[ root>> (cb-node>alist) ] { } make ;
! Post-order traversal
!
! Assumes =f= in =left= and =right= slots of leaf nodes and ≠ =f= in split
! nodes.
: each-leaf-node ( node quot: ( ... entry -- ... ) -- ... )
[ [ dup left>> ] dip over [ each-leaf-node drop ] [ nip [ node>entry ] dip call ] if ]
[ [ right>> ] dip over [ each-leaf-node ] [ 2drop ] if ]
2bi ; inline recursive
: >cb-alist ( tree -- alist )
dup assoc-size <vector> [
[ push ] curry [ root>> ] dip each-leaf-node
] keep ;
M: cb assoc-clone-like
[ dup cb? [ >cb-alist ] when ] dip call-next-method ;
PRIVATE>
: >cb ( assoc -- tree )
<cb> assoc-clone-like ;
SYNTAX: CB{
\ } [ >cb ] parse-literal ;
<PRIVATE
M: cb assoc-like drop dup cb? [ >cb ] unless ;
M: cb pprint-delims drop \ CB{ \ } ;
M: cb >pprint-sequence >cb-alist ;
M: cb pprint-narrow? drop t ;
PRIVATE>