2020-02-09 01:24:19 +01:00
|
|
|
! Copyright (C) 2020 Michael Raitza
|
|
|
|
! See http://factorcode.org/license.txt for BSD license.
|
|
|
|
!
|
|
|
|
! * Crit-bit trees
|
|
|
|
! ** Rationale
|
|
|
|
! Critbit trees are described in [[https://cr.yp.to/critbit.html][djb's crit-bit tree]]. They are an evolution of
|
|
|
|
! PATRICIA trees showing that fast insertion, deletion, exact searching and suffix
|
|
|
|
! searching is possible with this data structure.
|
|
|
|
|
2020-02-09 15:54:27 +01:00
|
|
|
! The strength of this data structure, according to its author, lies in its
|
|
|
|
! simple design and its optimisation to be machine parsable using machine
|
|
|
|
! word-sized operations where possible. Like PATRICIA trees, crit-bit trees are
|
|
|
|
! prefix-compressed, with internal nodes storing next decision point (the
|
|
|
|
! critical bit) in a length field (encoded as an integer and a mask) and two
|
|
|
|
! successor pointers. Arbitrary data objects make up its leaves.
|
|
|
|
|
|
|
|
USING: accessors arrays assocs kernel math namespaces sequences serialize trees
|
|
|
|
trees.private ;
|
2020-02-09 01:24:19 +01:00
|
|
|
IN: trees.cb;
|
|
|
|
|
|
|
|
TUPLE: cb < tree ;
|
|
|
|
|
|
|
|
: <cb> ( -- tree ) cb new-tree ; inline
|
|
|
|
|
|
|
|
<PRIVATE
|
|
|
|
|
2020-02-09 15:54:27 +01:00
|
|
|
TUPLE: cb-node < node { byte# integer } { bits integer } ;
|
2020-02-09 01:24:19 +01:00
|
|
|
|
|
|
|
: <cb-node> ( byte# bits key value -- node )
|
|
|
|
cb-node new-node
|
2020-02-09 15:54:27 +01:00
|
|
|
swap >>bits
|
|
|
|
swap >>byte# ; inline
|
2020-02-09 01:24:19 +01:00
|
|
|
|
|
|
|
! 0 = left
|
|
|
|
! 1 = right
|
2020-02-09 15:54:27 +01:00
|
|
|
: direction ( n n -- direction )
|
2020-02-09 01:24:19 +01:00
|
|
|
bitor 1 + -8 shift ; inline
|
|
|
|
|
2020-02-09 15:54:27 +01:00
|
|
|
: key-side ( bits byte -- side )
|
2020-02-09 01:24:19 +01:00
|
|
|
direction 0 = -1 1 ? ;
|
|
|
|
|
|
|
|
: get-byte-at ( byte# key -- byte/0 )
|
|
|
|
object>bytes ?nth [ 0 ] unless* ;
|
|
|
|
|
2020-02-09 01:53:52 +01:00
|
|
|
! Keep the byte sequence of the current key in =key-bytes= and provide a working
|
|
|
|
! environment for it with =with-key=.
|
|
|
|
SYMBOL: key-bytes
|
2020-02-09 15:54:27 +01:00
|
|
|
SYMBOL: current-key
|
2020-02-09 01:53:52 +01:00
|
|
|
|
|
|
|
: with-key ( key quot -- )
|
2020-02-09 15:54:27 +01:00
|
|
|
[
|
|
|
|
{ current-key key-bytes }
|
|
|
|
[ dup object>bytes 2array ] dip zip
|
|
|
|
] dip with-variables ; inline
|
|
|
|
|
|
|
|
: byte-at ( byte# -- byte/0 )
|
|
|
|
key-bytes get ?nth [ 0 ] unless* ;
|
|
|
|
|
|
|
|
! ** Insertion
|
|
|
|
! Explain...
|
2020-02-09 01:53:52 +01:00
|
|
|
|
2020-02-09 01:24:19 +01:00
|
|
|
DEFER: cb-set
|
|
|
|
|
|
|
|
: cb-insert ( value key node -- node taller? created? )
|
2020-02-09 15:54:27 +01:00
|
|
|
f
|
|
|
|
swap [
|
|
|
|
dup [ bits>> ] [ byte#>> ] bi
|
|
|
|
byte-at key-side [
|
|
|
|
node-link cb-set
|
|
|
|
] with-side
|
|
|
|
] with-key ;
|
2020-02-09 01:24:19 +01:00
|
|
|
|
|
|
|
! Implement this and cb-insert using SYMBOL for current key bytes while
|
|
|
|
! traversing the tree.
|
|
|
|
: (cb-set) ( value key node -- node taller? created? )
|
2020-02-09 15:54:27 +01:00
|
|
|
drop
|
|
|
|
dup key>> current-key get = [
|
|
|
|
current-key get >>key swap >>value f f
|
|
|
|
] [ current-key get swap cb-insert ] if ;
|
2020-02-09 01:24:19 +01:00
|
|
|
|
2020-02-09 15:54:27 +01:00
|
|
|
! Insert a node into the tree
|
2020-02-09 01:24:19 +01:00
|
|
|
: cb-set ( value key node -- node taller? created? )
|
2020-02-09 15:54:27 +01:00
|
|
|
[ (cb-set) ] [ [ 0 byte-at 0xfe ] 2dip swap <cb-node> t t ] if* ;
|
2020-02-09 01:24:19 +01:00
|
|
|
|
|
|
|
PRIVATE>
|
2020-02-09 15:54:27 +01:00
|
|
|
|
|
|
|
M: cb set-at ( value key cb -- )
|
|
|
|
[ cb-set nip swap ] change-root
|
|
|
|
swap [ dup inc-count ] when drop ;
|