182 lines
4.7 KiB
Factor
182 lines
4.7 KiB
Factor
! Copyright (C) 2019 Michael Raitza
|
|
!
|
|
! * Crit-bit trees
|
|
! ** Rationale
|
|
! Critbit trees are described in [[https://cr.yp.to/critbit.html][djb's crit-bit tree]]. They are an evolution of
|
|
! PATRICIA trees showing that fast insertion, deletion, exact searching and suffix
|
|
! searching is possible with this data structure.
|
|
|
|
! The strength of this datastructure, according to its author, lies in its simple
|
|
! design and its optimisation to be machine parsable using machine word-sized
|
|
! operations where possible. Like PATRICIA trees, crit-bit trees are
|
|
! prefix-compressed, with internal nodes storing next decision point (the critical
|
|
! bit) in a length field (encoded as an integer and a mask) and two successor
|
|
! pointers. Arbitrary data objects make up its leaves.
|
|
|
|
USING: accessors fry kernel math sequences serialize ;
|
|
IN: critbit
|
|
|
|
TUPLE: critbit root ;
|
|
|
|
<PRIVATE
|
|
|
|
: new-critbit ( class -- critbit )
|
|
new
|
|
f >>root ; inline
|
|
|
|
PRIVATE>
|
|
|
|
: <critbit> ( -- critbit )
|
|
critbit new-critbit ;
|
|
|
|
<PRIVATE
|
|
|
|
TUPLE: node left right { byte# integer } { bits integer } ;
|
|
|
|
: empty? ( tree -- ? )
|
|
root>> f = ; inline
|
|
|
|
: new-node ( bits byte# class -- node )
|
|
new
|
|
swap >>byte#
|
|
swap >>bits ; inline
|
|
|
|
: <node> ( bits byte# -- node )
|
|
node new-node ;
|
|
|
|
: get-child ( node direction -- child )
|
|
0 = [ right>> ] [ left>> ] if ;
|
|
|
|
: set-child ( node child direction -- node )
|
|
0 = [ >>right ] [ >>left ] if ;
|
|
|
|
: set-child-opposite ( node child direction -- node )
|
|
1 swap - set-child ;
|
|
|
|
: direction ( byte bits -- direction )
|
|
bitor 1 + -8 shift ;
|
|
|
|
! ** Walking the tree
|
|
! Extract the split byte number and bit from node and return the proper child the
|
|
! proper child corresponding to the bytestring's prefix.
|
|
|
|
: successor ( bytes node -- bytes successor via-direction )
|
|
2dup [ byte#>> ] [ bits>> ] bi
|
|
[ swap ?nth [ 0 ] unless* ] dip
|
|
direction [ get-child ] keep ;
|
|
|
|
! Walk the tree until we find a leaf.
|
|
|
|
: walk-critbit ( obj tree -- objbytes leaf )
|
|
[ object>bytes ] [ root>> ] bi*
|
|
[ dup node? ] [ successor drop ] while ;
|
|
|
|
PRIVATE>
|
|
|
|
: member? ( obj tree -- ? )
|
|
dup critbit?
|
|
[ walk-critbit = ]
|
|
[ 2drop f ] if ;
|
|
|
|
<PRIVATE
|
|
|
|
! Produce a byte with all bits set except the msb from bits*.
|
|
! See MAGIC Algorithms for rationale.
|
|
|
|
: msb0 ( bits* -- bits )
|
|
dup -1 shift bitor
|
|
dup -2 shift bitor
|
|
dup -4 shift bitor
|
|
dup -1 shift bitnot bitand 255 bitxor ;
|
|
|
|
: byte-diff ( newbyte oldbyte -- direction bits )
|
|
swap over
|
|
bitxor msb0
|
|
[ direction ] keep ;
|
|
|
|
: node-params ( newbytes oldbytes -- direction bits byte# )
|
|
2dup mismatch
|
|
[ [ '[ _ swap nth ] bi@ byte-diff ] keep ]
|
|
! Equal prefix over full (shorter) byte sequence.
|
|
[ [ 1 255 ] 2dip shorter length 1 - ] if* ;
|
|
|
|
: derive-node ( obj newbytes leaf -- node direction )
|
|
object>bytes
|
|
node-params <node> -rot [ set-child-opposite ] keep ;
|
|
|
|
: insert-node ( node split-node direction -- successor ? )
|
|
[
|
|
[ [ byte#>> ] bi@ - [ 0 < ] [ 0 = ] bi ] 2keep
|
|
[ bits>> ] > and
|
|
or
|
|
] dip drop
|
|
;
|
|
|
|
: insertion-walk ( tree split-node new-direction -- tree )
|
|
! FIXME Unfinished. We need obj to calculate succession :(
|
|
rot [
|
|
root>> dup node?
|
|
[ ] [ 2over insert-node ] while 3drop
|
|
] keep ;
|
|
|
|
:: walk-on? ( split-node node -- ? )
|
|
node node? not
|
|
node byte#>> split-node byte#>> >
|
|
or
|
|
node byte#>> split-node byte#>> =
|
|
node bits>> split-node bits>> >
|
|
and
|
|
or
|
|
not ;
|
|
|
|
:: insertion-walk ( obj split-node new-direction -- ? )
|
|
obj obj>bytes :> objbytes
|
|
tree root>> :> node
|
|
[ split-node node walk-on? ] [
|
|
node :> predecessor
|
|
objbytes node successor :> node-direction :> node drop
|
|
] while
|
|
node split-node new-direction set-child
|
|
;
|
|
|
|
! :: insertion-successor ( objbytes bits byte# node -- split-node )
|
|
! node node? not
|
|
! [ node ]
|
|
! [
|
|
! node byte#>>
|
|
! [ byte# > ] keep
|
|
! byte# = node bits>> bits > and
|
|
! or
|
|
! ! objbytes no longer matches prefix?
|
|
! [ node ]
|
|
! [
|
|
! ! FIXME: complete node insertion
|
|
! ] if
|
|
! ] if ;
|
|
|
|
! :: (insert-node) ( obj tree objbytes treebytes direction bits byte# -- tree )
|
|
! bits byte# <node>
|
|
! direction 0 = [ obj >>right ] [ obj >>left ] if :> node
|
|
! tree root>> [ dup node? ] [ insertion-successor]
|
|
! ;
|
|
|
|
! : insert-node ( obj tree objbytes treebytes -- tree )
|
|
! 2dup node-params (insert-node)
|
|
! ;
|
|
|
|
PRIVATE>
|
|
|
|
: (put) ( tree obj -- tree )
|
|
swap dup empty?
|
|
[ swap >>root ]
|
|
[ [ 2dup walk-critbit
|
|
derive-node
|
|
! FIXME Unfinished, this node must now be inserted into the tree
|
|
] 2keep
|
|
2drop ! <- remove;
|
|
! insert-node
|
|
] if ;
|
|
|
|
: put ( obj tree -- tree )
|
|
dup critbit? [ (put) ] [ 2drop "Not a critbit tree" throw ] if ;
|