diff --git a/cb/cb-test.factor b/cb/cb-test.factor new file mode 100644 index 0000000..e584c9d --- /dev/null +++ b/cb/cb-test.factor @@ -0,0 +1,24 @@ +USING: assocs kernel tools.test trees trees.cb trees.private ; +IN: trees.cb.tests + +! Insertion into empty tree +{ T{ cb { root T{ node { key 0 } { value 0 } } } { count 1 } } } [ + 0 0 [ set-at ] keep +] unit-test + +! Insertion into a leaf-node resulting in splitting +{ + T{ cb + { root + T{ cb-node + { bits 247 } + { left T{ node { key 1 } { value 1 } } } + { right T{ node { key 0 } { value 0 } } } + } + } + { count 2 } + } +} [ + 0 0 [ set-at ] keep + 1 1 rot [ set-at ] keep +] unit-test diff --git a/cb/cb.factor b/cb/cb.factor index 5ea5ac8..4b9a30c 100644 --- a/cb/cb.factor +++ b/cb/cb.factor @@ -14,8 +14,8 @@ ! critical bit) in a length field (encoded as an integer and a mask) and two ! successor pointers. Arbitrary data objects make up its leaves. -USING: accessors arrays assocs kernel math namespaces sequences serialize trees -trees.private ; +USING: accessors arrays assocs fry kernel math namespaces sequences serialize +trees trees.private ; IN: trees.cb TUPLE: cb < tree ; @@ -34,63 +34,91 @@ TUPLE: cb-node { byte# integer } { bits integer } left right ; : ( byte# bits -- node ) cb-node new-node ; -! 0 = left +! -1 = left ! 1 = right -: direction ( n n -- direction ) - bitor 1 + -8 shift ; inline - : key-side ( bits byte -- side ) - direction 0 = -1 1 ? ; + bitor 1 + -8 shift 0 = -1 1 ? ; -: get-byte-at ( byte# key -- byte/0 ) - object>bytes ?nth [ 0 ] unless* ; +! Produce a byte with all bits set except the msb from bits*. +! See MAGIC Algorithms for rationale. +: msb0 ( bits* -- bits ) + dup -1 shift bitor + dup -2 shift bitor + dup -4 shift bitor + dup -1 shift bitnot bitand 255 bitxor ; + +! Calculate the direction and the critical bit for the differing byte. +: byte-diff ( newbyte oldbyte -- side bits ) + swap over + bitxor msb0 + [ key-side ] keep ; + +! For two byte strings, calculate the critical bit, byte and direction of +! difference (0 = left, 1 = right). +: bytes-diff ( newbytes oldbytes -- side bits byte# ) + 2dup mismatch + [ + [ '[ _ swap nth ] bi@ byte-diff ] keep + ] [ + ! Equal prefix over full (shorter) byte sequence. + [ 1 255 ] 2dip shorter length 1 - + ] if* ; ! Keep the byte sequence of the current key in =key-bytes= and provide a working ! environment for it with =with-key=. SYMBOL: key-bytes SYMBOL: current-key +: key>bytes ( key -- bytes ) + object>bytes ; + : with-key ( key quot -- ) [ { current-key key-bytes } swap - dup object>bytes 2array zip + dup key>bytes 2array zip ] dip with-variables ; inline +! Extract the critical byte : byte-at ( byte# -- byte/0 ) key-bytes get ?nth [ 0 ] unless* ; ! ** Insertion -! Explain... +! +! Tree insertion must be done by traversing the tree from the root, as it is +! ordered. + +! Insert a node at a leaf-node by first inserting a split node. +: cb-insert ( value leaf-node -- node created? ) + [ current-key get swap ] dip + dup key>> key>bytes key-bytes get swap + bytes-diff swap + ! ( new-leaf old-leaf side cb-node -- ) + [ swap + [ [ set-node-link ] keep + set-node+link + ] with-side + ] keep t ; DEFER: cb-set -: cb-insert ( value node -- node created? ) - ; - -! Implement this and cb-insert using SYMBOL for current key bytes while -! traversing the tree. -: (cb-set) ( value node -- created? ) +! Insert a node into a non-empty tree. +: (cb-set) ( value node -- node created? ) dup cb-node? [ dup [ bits>> ] [ byte#>> ] bi byte-at key-side [ [ node-link cb-set ] keep - swap [ set-node-link ] dip + swap [ [ set-node-link ] keep ] dip ] with-side ] [ - dup key>> current-key get = [ - dup current-key get - [ value<< ] [ >>key drop ] 2bi* f - ] [ - 2drop f ! cb-insert - ] if + cb-insert ] if ; ! Insert a node into the tree : cb-set ( value node -- node created? ) - [ [ (cb-set) ] keep ] [ current-key get swap t ] if* ; + [ (cb-set) ] [ current-key get swap t ] if* ; PRIVATE> M: cb set-at ( value key cb -- ) - [ swap [ cb-set ] with-key ] change-root + [ swap [ cb-set ] with-key swap ] change-root swap [ dup inc-count ] when drop ;