From 2a9301128b5515f06e34a830406bf8b170e38ea0 Mon Sep 17 00:00:00 2001 From: Tomi Jaga Date: Sat, 21 Dec 2024 03:47:43 -0800 Subject: [PATCH] Adding Key compression to BTree --- mops.toml | 1 - src/MemoryBTree/Base.mo | 229 ++++++++++++++---- src/MemoryBTree/Migrations/V0_0_2.mo | 62 +++++ src/MemoryBTree/Migrations/lib.mo | 25 +- src/MemoryBTree/Stable.mo | 8 +- src/MemoryBTree/lib.mo | 2 +- src/MemoryBTree/modules/Branch.mo | 177 +++++++++++++- src/MemoryBTree/modules/Common.mo | 3 + src/MemoryBTree/modules/Leaf.mo | 157 +++++++++++- src/MemoryBTree/modules/MemoryBlock.mo | 78 +++--- src/MemoryBTree/modules/Methods.mo | 26 +- src/MemoryBTree/modules/Types.mo | 5 + src/Utils.mo | 39 +++ .../MemoryBTree.Migrations.Test.mo | 4 +- tests/MemoryBTree/MemoryBTree.Test.mo | 2 +- 15 files changed, 710 insertions(+), 108 deletions(-) create mode 100644 src/MemoryBTree/Migrations/V0_0_2.mo create mode 100644 src/MemoryBTree/modules/Common.mo diff --git a/mops.toml b/mops.toml index bae0f9f..97c7bea 100644 --- a/mops.toml +++ b/mops.toml @@ -11,7 +11,6 @@ base = "0.12.1" itertools = "0.2.1" memory-region = "1.2.4" buffer-deque = "0.1.0" -memory-collection = "0.0.1" [dev-dependencies] test = "2.0.0" diff --git a/src/MemoryBTree/Base.mo b/src/MemoryBTree/Base.mo index 483ada9..ae7e161 100644 --- a/src/MemoryBTree/Base.mo +++ b/src/MemoryBTree/Base.mo @@ -1,3 +1,4 @@ +import Array "mo:base/Array"; import Debug "mo:base/Debug"; import Iter "mo:base/Iter"; import Int "mo:base/Int"; @@ -11,7 +12,7 @@ import Blob "mo:base/Blob"; import MemoryRegion "mo:memory-region/MemoryRegion"; import RevIter "mo:itertools/RevIter"; -import Find "mo:map/Map/modules/find"; +import Itertools "mo:itertools/Iter"; import MemoryCmp "../TypeUtils/MemoryCmp"; import Blobify "../TypeUtils/Blobify"; @@ -43,9 +44,9 @@ module { public type BTreeUtils = T.BTreeUtils; let CACHE_LIMIT = 50_000; - let DEFAULT_ORDER = 256; + let DEFAULT_ORDER = 32; - public func _new_with_options(node_capacity : ?Nat, opt_cache_size : ?Nat, is_set : Bool) : MemoryBTree { + public func _new_with_options(node_capacity : ?Nat, opt_cache_size : ?Nat, is_set : Bool, enable_key_compression : Bool) : MemoryBTree { let cache_size = Option.get(opt_cache_size, CACHE_LIMIT); let btree : MemoryBTree = { is_set; @@ -57,6 +58,7 @@ module { var leaf_count = 0; var depth = 0; var is_root_a_leaf = true; + var supports_key_compression = enable_key_compression; leaves = MemoryRegion.new(); branches = MemoryRegion.new(); @@ -83,8 +85,11 @@ module { // _new_with_options(node_capacity, cache_size, true); // }; - public func new(node_capacity : ?Nat) : MemoryBTree { - _new_with_options(node_capacity, ?0, false); + public func new(opt_options : ?T.InitOptions) : MemoryBTree { + let options = Option.get(opt_options, { node_capacity = null; enable_key_compression = null }); + let support_key_compression = Option.get(options.enable_key_compression, false); + + _new_with_options(options.node_capacity, null, false, support_key_compression); }; public let POINTER_SIZE = 12; @@ -265,8 +270,38 @@ module { }; func update_is_root_a_leaf(btree : MemoryBTree, is_leaf : Bool) { + + let flags = MemoryRegion.loadNat8(btree.data, MC.DATA.IS_ROOT_A_LEAF_ADDRESS); + + let is_root_a_leaf_position : Nat8 = 0; + let is_root_a_leaf_flag : Nat8 = 1 << is_root_a_leaf_position; + + let flags_with_is_root_a_leaf = if (is_leaf) { + flags | is_root_a_leaf_flag; + } else { + flags & (^is_root_a_leaf_flag); + }; + + MemoryRegion.storeNat8(btree.data, MC.DATA.IS_ROOT_A_LEAF_ADDRESS, flags_with_is_root_a_leaf); + btree.is_root_a_leaf := is_leaf; - MemoryRegion.storeNat8(btree.data, MC.DATA.IS_ROOT_A_LEAF_ADDRESS, if (is_leaf) 1 else 0); + }; + + func update_is_support_for_key_compression(btree : MemoryBTree, support : Bool) { + let flags = MemoryRegion.loadNat8(btree.data, MC.DATA.IS_ROOT_A_LEAF_ADDRESS); + + let key_compression_flag_position : Nat8 = 1; + let key_compression_flag : Nat8 = 1 << key_compression_flag_position; + + let flags_with_key_compression = if (support) { + flags | key_compression_flag; + } else { + flags & (^key_compression_flag); + }; + + MemoryRegion.storeNat8(btree.data, MC.DATA.IS_ROOT_A_LEAF_ADDRESS, flags_with_key_compression); + + btree.supports_key_compression := support; }; func inc_subtree_size(btree : MemoryBTree, branch_address : Nat, _child_index : Nat) { @@ -274,19 +309,54 @@ module { Branch.update_subtree_size(btree, branch_address, subtree_size + 1); }; - public func insert(btree : MemoryBTree, btree_utils : BTreeUtils, key : K, value : V) : ?V { - let key_blob = btree_utils.key.blobify.to_blob(key); - - let leaf_address = Methods.get_leaf_address_and_update_path(btree, btree_utils, key, ?key_blob, inc_subtree_size); + func binary_search_in_leaf(btree : MemoryBTree, btree_utils : BTreeUtils, leaf_address : Nat, key : K, opt_key_blob : ?Blob) : Int { let count = Leaf.get_count(btree, leaf_address); + let key_blob = switch (opt_key_blob) { + case (null) btree_utils.key.blobify.to_blob(key); + case (?key_blob) key_blob; + }; let int_index = switch (btree_utils.key.cmp) { case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); + + if (btree.supports_key_compression) { + let leaf_prefix_key_size : Nat = Leaf.get_prefix_key_size(btree, leaf_address); + + if (leaf_prefix_key_size > 0) { + let key_bytes = Blob.toArray(key_blob); + let key_suffix_bytes : [Nat8] = Array.subArray(key_bytes, leaf_prefix_key_size, key_bytes.size() - leaf_prefix_key_size : Nat); + let key_suffix_blob = Blob.fromArray(key_suffix_bytes); + + // this check is only true in leaf nodes because we store the key for the 0th element in the leaf node + // and is not true for branch because we store the key starting from the 1st element in the branch node + if (cmp(key_suffix_blob, key_blob) == 1) return -1; + + Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_suffix_blob, count); + } else { + Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); + }; + } else { + Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); + }; }; }; + int_index; + }; + + public func insert(btree : MemoryBTree, btree_utils : BTreeUtils, key : K, value : V) : ?V { + if (btree.supports_key_compression) { + let #BlobCmp(_) = btree_utils.key.cmp else Debug.trap("MemoryBTree: key compression can only be used if the comparator is set to #BlobCmp"); + }; + + let key_blob = btree_utils.key.blobify.to_blob(key); + + let leaf_address = Methods.get_leaf_address_and_update_path(btree, btree_utils, key, ?key_blob, inc_subtree_size); + let count = Leaf.get_count(btree, leaf_address); + + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); + let elem_index = if (int_index >= 0) Int.abs(int_index) else Int.abs(int_index + 1); let val_blob = btree_utils.value.blobify.to_blob(value); @@ -301,16 +371,67 @@ module { return ?btree_utils.value.blobify.from_blob(prev_val_blob); }; - let kv_address = MemoryBlock.store(btree, key_blob, val_blob); + var key_suffix_blob = key_blob; + var needs_to_update_median_key = false; + + if (btree.supports_key_compression) { + needs_to_update_median_key := elem_index == 0; + + var leaf_prefix_key : Blob = switch (Leaf.get_prefix_key(btree, leaf_address)) { + case (null) ""; + case (?prefix) prefix; + }; + + var leaf_prefix_key_bytes = Blob.toArray(leaf_prefix_key); + let key_bytes = Blob.toArray(key_blob); + + key_suffix_blob := if (Utils.is_prefix(leaf_prefix_key_bytes, key_bytes)) { + let key_suffix_bytes = Array.subArray(key_bytes, leaf_prefix_key_bytes.size(), key_bytes.size() - leaf_prefix_key_bytes.size() : Nat); + Blob.fromArray(key_suffix_bytes); + } else { + + let new_prefix_size = Utils.get_prefix_size(leaf_prefix_key_bytes, key_bytes); + leaf_prefix_key_bytes := Array.subArray(key_bytes, 0, new_prefix_size); + leaf_prefix_key := Blob.fromArray(leaf_prefix_key_bytes); + + Leaf.update_prefix_key(btree, leaf_address, ?leaf_prefix_key); + + let prefix_tail_to_return = Array.subArray(leaf_prefix_key_bytes, new_prefix_size, leaf_prefix_key_bytes.size() - new_prefix_size : Nat); + + needs_to_update_median_key := count > 0 or needs_to_update_median_key; + + Leaf.add_prefix_to_keys(btree, leaf_address, count, Blob.fromArray(prefix_tail_to_return)); + + let key_suffix_bytes = Array.subArray(key_bytes, new_prefix_size, key_bytes.size() - new_prefix_size : Nat); + let key_suffix_blob = Blob.fromArray(key_suffix_bytes); + key_suffix_blob; + }; + }; + + let kv_address = MemoryBlock.store(btree, key_suffix_blob, val_blob); // Debug.print("kv_address: " # debug_show kv_address); // Debug.print("kv_blobs: " # debug_show (MemoryBlock.get_key_blob(btree, kv_address), MemoryBlock.get_val_blob(btree, kv_address))); - // Debug.print("actual kv_blobs: " # debug_show (key_blob, val_blob)); + // Debug.print("actual kv_blobs: " # debug_show (key_suffix_blob, val_blob)); if (count < btree.node_capacity) { // Debug.print("found leaf with enough space"); Leaf.insert_with_count(btree, leaf_address, elem_index, kv_address, count); update_count(btree, btree.count + 1); + if (needs_to_update_median_key) { + let ?first_key_blob = Leaf.get_key_blob(btree, leaf_address, 0) else Debug.trap("insert: first_key_address accessed a null value"); + + let leaf_index = Leaf.get_index(btree, leaf_address); + + switch (Leaf.get_parent(btree, leaf_address)) { + case (null) {}; + case (?parent) { + Branch.update_median_key(btree, parent, leaf_index, first_key_blob); + }; + }; + + }; + return null; }; @@ -524,12 +645,7 @@ module { let leaf_address = Methods.get_leaf_address(btree, btree_utils, key, ?key_blob); let count = Leaf.get_count(btree, leaf_address); - let int_index = switch (btree_utils.key.cmp) { - case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); - case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); - }; - }; + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); if (int_index < 0) return null; @@ -546,12 +662,7 @@ module { let leaf_address = Methods.get_leaf_address(btree, btree_utils, key, ?key_blob); let count = Leaf.get_count(btree, leaf_address); - let int_index = switch (btree_utils.key.cmp) { - case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); - case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); - }; - }; + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); if (int_index < 0) return false; @@ -633,14 +744,9 @@ module { let key_blob = btree_utils.key.blobify.to_blob(key); let leaf_address = Methods.get_leaf_address_and_update_path(btree, btree_utils, key, ?key_blob, decrement_subtree_size); - let count = Leaf.get_count(btree, leaf_address); + var leaf_count = Leaf.get_count(btree, leaf_address); - let int_index = switch (btree_utils.key.cmp) { - case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); - case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); - }; - }; + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); if (int_index < 0) { // key not found, so revert the path to its original state by incrementing the subtree size @@ -675,8 +781,24 @@ module { Branch.update_median_key_address(btree, parent, leaf_index, next_key_address); }; + let prev_leaf_count = leaf_count; + leaf_count -= 1; + + if (btree.supports_key_compression) { + if (leaf_count > 1) { + + if (elem_index == 0 or elem_index == prev_leaf_count - 1) { + Leaf.compress_similar_keys(btree, leaf_address, leaf_count); + } + + } else if (leaf_count == 0) { + Leaf.update_prefix_key(btree, leaf_address, null); + }; + + }; + let min_count = btree.node_capacity / 2; - let leaf_count = Leaf.get_count(btree, leaf_address); + leaf_count := Leaf.get_count(btree, leaf_address); if (leaf_count >= min_count) return ?prev_val; @@ -732,8 +854,10 @@ module { if (child_is_leaf) { Leaf.update_parent(btree, child, null); + Leaf.update_prefix_key(btree, child, null); } else { Branch.update_parent(btree, child, null); + Branch.update_prefix_key(btree, child, null); }; update_root(btree, child); @@ -750,6 +874,7 @@ module { let ?branch_parent = Branch.get_parent(btree, branch) else return set_only_child_to_root(parent); parent := branch_parent; + var branch_count = Branch.get_count(btree, branch); while (Branch.get_count(btree, branch) < min_count) { // Debug.print("redistribute branch"); @@ -775,6 +900,22 @@ module { Branch.deallocate(btree, merged_branch); update_branch_count(btree, btree.branch_count - 1); + let prev_branch_count = branch_count; + branch_count := Branch.get_count(btree, branch); + + if (btree.supports_key_compression) { + if (branch_count > 1) { + + if (elem_index == 0 or elem_index == prev_branch_count - 1) { + Branch.compress_similar_keys(btree, branch, branch_count); + } + + } else if (branch_count == 0) { + Branch.update_prefix_key(btree, branch, null); + }; + + }; + // Debug.print("parent after merge: " # debug_show Branch.from_memory(btree, parent)); // Debug.print("leaf_nodes: " # debug_show Iter.toArray(Methods.leaf_addresses(btree))); @@ -803,12 +944,12 @@ module { ?max; }; - public func fromArray(btree_utils : BTreeUtils, arr : [(K, V)], order : ?Nat) : MemoryBTree { - fromEntries(btree_utils, arr.vals(), order); + public func fromArray(btree_utils : BTreeUtils, arr : [(K, V)], opt_options : ?T.InitOptions) : MemoryBTree { + fromEntries(btree_utils, arr.vals(), opt_options); }; - public func fromEntries(btree_utils : BTreeUtils, entries : Iter<(K, V)>, order : ?Nat) : MemoryBTree { - let btree = new(order); + public func fromEntries(btree_utils : BTreeUtils, entries : Iter<(K, V)>, opt_options : ?T.InitOptions) : MemoryBTree { + let btree = new(opt_options); for ((k, v) in entries) { ignore insert(btree, btree_utils, k, v); @@ -893,12 +1034,7 @@ module { let (leaf_address, index_pos) = Methods.get_leaf_node_and_index(btree, btree_utils, key_blob); let count = Leaf.get_count(btree, leaf_address); - let int_index = switch (btree_utils.key.cmp) { - case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); - case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); - }; - }; + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); if (int_index < 0) { Debug.trap("getIndex(): Key not found. Use getExpectedIndex() instead to get keys that might not be in the tree"); @@ -919,12 +1055,7 @@ module { let (leaf_address, index_pos) = Methods.get_leaf_node_and_index(btree, btree_utils, key_blob); let count = Leaf.get_count(btree, leaf_address); - let int_index = switch (btree_utils.key.cmp) { - case (#GenCmp(cmp)) Leaf.binary_search(btree, btree_utils, leaf_address, cmp, key, count); - case (#BlobCmp(cmp)) { - Leaf.binary_search_blob_seq(btree, leaf_address, cmp, key_blob, count); - }; - }; + let int_index = binary_search_in_leaf(btree, btree_utils, leaf_address, key, ?key_blob); if (int_index < 0) { #NotFound(index_pos + Int.abs(int_index + 1)); diff --git a/src/MemoryBTree/Migrations/V0_0_2.mo b/src/MemoryBTree/Migrations/V0_0_2.mo new file mode 100644 index 0000000..6f73fb7 --- /dev/null +++ b/src/MemoryBTree/Migrations/V0_0_2.mo @@ -0,0 +1,62 @@ +import Nat "mo:base/Nat"; + +import MemoryRegion "mo:memory-region/MemoryRegion"; +import RevIter "mo:itertools/RevIter"; +// import Branch "mo:augmented-btrees/BpTree/Branch"; + +import Blobify "../../TypeUtils/Blobify"; +import MemoryCmp "../../TypeUtils/MemoryCmp"; + +module V0_0_1 { + // thinking the version should match the mops version where the layout or heap structure was last changed + + public type Address = Nat; + type Size = Nat; + + public type MemoryBlock = (Address, Size); + + type MemoryRegionV1 = MemoryRegion.MemoryRegionV1; + type Blobify = Blobify.Blobify; + type RevIter = RevIter.RevIter; + + public type MemoryCmp = MemoryCmp.MemoryCmp; + + public type MemoryBTree = { + is_set : Bool; // is true, only keys are stored + node_capacity : Nat; + var count : Nat; + var root : Nat; + var branch_count : Nat; // number of branch nodes + var leaf_count : Nat; // number of leaf nodes + var depth : Nat; + var is_root_a_leaf : Bool; + var supports_key_compression : Bool; + + leaves : MemoryRegionV1; + branches : MemoryRegionV1; + data : MemoryRegionV1; + values : MemoryRegionV1; + + }; + + public type Leaf = ( + nats : [var Nat], // [address, index, count] + adjacent_nodes : [var ?Nat], // [parent, prev, next] (is_root if parent is null) + key_blocks : [var ?(MemoryBlock)], // [... ((key address, key size), key blob)] + val_blocks : [var ?(MemoryBlock)], + kv_blobs : [var ?(Blob, Blob)], + _branch_children_nodes : [var ?Nat], // [... child address] + _branch_keys_blobs : [var ?Blob], + ); + + public type Branch = ( + nats : [var Nat], // [address, index, count, subtree_size] + parent : [var ?Nat], // parent + key_blocks : [var ?(MemoryBlock)], // [... ((key address, key size), key blob)] + _leaf_val_blocks : [var ?(MemoryBlock)], + _leaf_kv_blobs : [var ?(Blob, Blob)], + children_nodes : [var ?Nat], // [... child address] + keys_blobs : [var ?Blob], + ); + +}; diff --git a/src/MemoryBTree/Migrations/lib.mo b/src/MemoryBTree/Migrations/lib.mo index 72af6dd..2bb9361 100644 --- a/src/MemoryBTree/Migrations/lib.mo +++ b/src/MemoryBTree/Migrations/lib.mo @@ -5,17 +5,19 @@ import MemoryRegion "mo:memory-region/MemoryRegion"; import V0 "V0"; import V0_0_1 "V0_0_1"; +import V0_0_2 "V0_0_2"; module Migrations { // should update to the latest version - public type MemoryBTree = V0_0_1.MemoryBTree; - public type Leaf = V0_0_1.Leaf; - public type Branch = V0_0_1.Branch; + public type MemoryBTree = V0_0_2.MemoryBTree; + public type Leaf = V0_0_2.Leaf; + public type Branch = V0_0_2.Branch; public type VersionedMemoryBTree = { #v0 : V0.MemoryBTree; #v0_0_1 : V0_0_1.MemoryBTree; + #v0_0_2 : V0_0_2.MemoryBTree; }; public type StableStore = VersionedMemoryBTree; @@ -25,18 +27,29 @@ module Migrations { case (#v0(v0)) { Debug.trap("Migration Error: Migrating from #v0 is not supported"); }; - case (#v0_0_1(v0_0_1)) versions; + case (#v0_0_1(v0_0_1)) #v0_0_2({ + v0_0_1 with + var supports_key_compression = false; + var branch_count = v0_0_1.branch_count; + var count = v0_0_1.count; + var root = v0_0_1.root; + var depth = v0_0_1.depth; + var is_root_a_leaf = v0_0_1.is_root_a_leaf; + var leaf_count = v0_0_1.leaf_count; + + }); + case (#v0_0_2(_)) versions; }; }; public func getCurrentVersion(versions : VersionedMemoryBTree) : MemoryBTree { switch (versions) { - case (#v0_0_1(curr)) curr; + case (#v0_0_2(curr)) curr; case (_) Debug.trap("Unsupported version. Please upgrade the memory buffer to the latest version."); }; }; public func addVersion(btree : MemoryBTree) : VersionedMemoryBTree { - #v0_0_1(btree); + #v0_0_2(btree); }; }; diff --git a/src/MemoryBTree/Stable.mo b/src/MemoryBTree/Stable.mo index 0664fd9..a97695f 100644 --- a/src/MemoryBTree/Stable.mo +++ b/src/MemoryBTree/Stable.mo @@ -18,17 +18,17 @@ module StableMemoryBTree { }; }; - public func new(order : ?Nat) : StableMemoryBTree { - let btree = MemoryBTree.new(order); + public func new(opt_options : ?T.InitOptions) : StableMemoryBTree { + let btree = MemoryBTree.new(opt_options); MemoryBTree.toVersioned(btree); }; public func fromArray( btree_utils : BTreeUtils, arr : [(K, V)], - order : ?Nat, + opt_options : ?T.InitOptions, ) : StableMemoryBTree { - let btree = MemoryBTree.fromArray(btree_utils, arr, order); + let btree = MemoryBTree.fromArray(btree_utils, arr, opt_options); MemoryBTree.toVersioned(btree); }; diff --git a/src/MemoryBTree/lib.mo b/src/MemoryBTree/lib.mo index 9a80238..3164877 100644 --- a/src/MemoryBTree/lib.mo +++ b/src/MemoryBTree/lib.mo @@ -24,7 +24,7 @@ module { public type ExpectedIndex = BaseMemoryBTree.ExpectedIndex; /// Create a new stable store - public func newStableStore(order : ?Nat) : StableStore = StableMemoryBTree.new(order); + public func newStableStore(opt_options : ?T.InitOptions) : StableStore = StableMemoryBTree.new(opt_options); /// Upgrade an older version of the BTree to the latest version public func upgrade(sstore : StableStore) : StableStore { diff --git a/src/MemoryBTree/modules/Branch.mo b/src/MemoryBTree/modules/Branch.mo index 7fae09b..e9f986b 100644 --- a/src/MemoryBTree/modules/Branch.mo +++ b/src/MemoryBTree/modules/Branch.mo @@ -13,13 +13,14 @@ import Float "mo:base/Float"; import MemoryRegion "mo:memory-region/MemoryRegion"; import RevIter "mo:itertools/RevIter"; -// import Branch "mo:augmented-btrees/BpTree/Branch"; +import Itertools "mo:itertools/Iter"; import MemoryFns "MemoryFns"; import T "Types"; import Leaf "Leaf"; import MemoryBlock "MemoryBlock"; import Migrations "../Migrations"; +import Utils "../../Utils"; module Branch { @@ -65,6 +66,9 @@ module Branch { PARENT_START = 16; ADDRESS_SIZE = 8; + KEY_PREFIX_START = 24; + KEY_PREFIX_SIZE_START = 30; + KEYS_START = 64; NULL_ADDRESS : Nat64 = 0x00; @@ -100,13 +104,14 @@ module Branch { MemoryRegion.storeBlob(btree.branches, branch_address, MC.MAGIC); MemoryRegion.storeNat8(btree.branches, branch_address + MC.DEPTH_START, 0); - // MemoryRegion.storeNat8(btree.branches, branch_address + MC.LAYOUT_VERSION_START, MC.LAYOUT_VERSION); MemoryRegion.storeNat16(btree.branches, branch_address + MC.INDEX_START, 0); MemoryRegion.storeNat16(btree.branches, branch_address + MC.COUNT_START, 0); MemoryRegion.storeNat64(btree.branches, branch_address + MC.SUBTREE_COUNT_START, 0); MemoryRegion.storeNat64(btree.branches, branch_address + MC.PARENT_START, MC.NULL_ADDRESS); + MemoryRegion.storeNat64(btree.branches, branch_address + MC.KEY_PREFIX_START, MC.NULL_ADDRESS); + MemoryRegion.storeNat16(btree.branches, branch_address + MC.KEY_PREFIX_SIZE_START, 0); var i = 0; @@ -457,7 +462,11 @@ module Branch { public func get_key_blob(btree : MemoryBTree, branch_address : Nat, i : Nat) : ?(Blob) { let ?kv_address = Branch.get_key_address(btree, branch_address, i) else return null; - ?MemoryBlock.get_key_blob(btree, kv_address); + let key_blob = MemoryBlock.get_key_blob(btree, kv_address); + + let ?prefix_key = Branch.get_prefix_key(btree, branch_address) else return ?key_blob; + + ?Utils.append_blob(prefix_key, key_blob); }; public func set_key_address_to_null(btree : MemoryBTree, branch_address : Nat, i : Nat) { @@ -482,6 +491,29 @@ module Branch { MemoryRegion.loadNat64(btree.branches, branch_address + MC.SUBTREE_COUNT_START) |> Nat64.toNat(_); }; + public func get_prefix_key_block(btree : MemoryBTree, address : Nat) : ?MemoryBlock { + let prefix_key = MemoryRegion.loadNat64(btree.branches, address + MC.KEY_PREFIX_START); + if (prefix_key == MC.NULL_ADDRESS) return null; + + let prefix_key_nat = Nat64.toNat(prefix_key); + let prefix_key_size = MemoryRegion.loadNat16(btree.branches, address + MC.KEY_PREFIX_SIZE_START) |> Nat16.toNat(_); + + ?(prefix_key_nat, prefix_key_size); + }; + + public func get_prefix_key_size(btree : MemoryBTree, address : Nat) : Nat { + let prefix_key = MemoryRegion.loadNat16(btree.branches, address + MC.KEY_PREFIX_SIZE_START) |> Nat16.toNat(_); + prefix_key; + }; + + public func get_prefix_key(btree : MemoryBTree, address : Nat) : ?Blob { + let ?prefix_key_block = get_prefix_key_block(btree, address) else return null; + + let prefix_key = MemoryRegion.loadBlob(btree.data, prefix_key_block.0, prefix_key_block.1); + + ?prefix_key; + }; + public func binary_search(btree : MemoryBTree, btree_utils : BTreeUtils, address : Nat, cmp : (K, K) -> Int8, search_key : K, arr_len : Nat) : Int { if (arr_len == 0) return -1; // should insert at index Int.abs(i + 1) var l = 0; @@ -601,6 +633,28 @@ module Branch { MemoryRegion.storeNat64(btree.branches, branch_address + MC.PARENT_START, parent); }; + public func update_prefix_key(btree : MemoryBTree, address : Nat, opt_prefix_key_blob : ?Blob) { + let (new_prefix_key_address, new_size) : (Nat64, Nat) = switch (opt_prefix_key_blob) { + case (null) (MC.NULL_ADDRESS, 0); + case (?prefix_key_blob) { + + let new_prefix_key_address = switch (get_prefix_key_block(btree, address)) { + case (?(prev_address, prev_size)) { + MemoryRegion.replaceBlob(btree.data, prev_address, prev_size, prefix_key_blob); + }; + case (_) { + MemoryRegion.addBlob(btree.data, prefix_key_blob); + }; + }; + + (Nat64.fromNat(new_prefix_key_address), prefix_key_blob.size()); + }; + }; + + MemoryRegion.storeNat64(btree.leaves, address + MC.KEY_PREFIX_START, new_prefix_key_address); + MemoryRegion.storeNat16(btree.leaves, address + MC.KEY_PREFIX_SIZE_START, Nat16.fromNat(new_size)); + }; + public func update_median_key_address(btree : MemoryBTree, parent_address : Nat, child_index : Nat, new_key_address : UniqueId) { var curr_address = parent_address; var i = child_index; @@ -614,6 +668,32 @@ module Branch { Branch.put_key_address(btree, curr_address, i - 1, new_key_address); }; + public func update_median_key(btree : MemoryBTree, parent_address : Nat, child_index : Nat, new_key : Blob) { + + var curr_address = parent_address; + var i = child_index; + + while (i == 0) { + i := Branch.get_index(btree, curr_address); + let ?parent_address = Branch.get_parent(btree, curr_address) else return; // occurs when key is the first key in the tree + curr_address := parent_address; + }; + + let ?prefix_key = Branch.get_prefix_key(btree, curr_address) else return Debug.trap("Branch.update_median_key: prefix key is null"); + let prefix_key_bytes = Blob.toArray(prefix_key); + let new_key_bytes = Blob.toArray(new_key); + + assert Utils.get_prefix_size(prefix_key_bytes, new_key_bytes) == prefix_key.size(); + + let key_suffix_bytes = Array.subArray(new_key_bytes, prefix_key.size(), new_key.size() - prefix_key.size() : Nat); + let key_suffix = Blob.fromArray(key_suffix_bytes); + + let key_address = MemoryBlock.store(btree, key_suffix, ""); + + Branch.put_key_address(btree, curr_address, i - 1, key_address); + + }; + // inserts node but does not update the subtree size with the node's subtree size // because it's likely that the inserted node is a node split from a node // in this branch's subtree @@ -802,9 +882,88 @@ module Branch { let ?_median_key_address = median_key_address else Debug.trap("Branch.split: median key_block is null"); Branch.put_key_address(btree, right_address, btree.node_capacity - 2, _median_key_address); + if (btree.supports_key_compression) { + compress_similar_keys(btree, branch_address, median); + compress_similar_keys(btree, right_address, right_cnt); + }; + right_address; }; + public func compress_similar_keys(btree : MemoryBTree, branch_address : Nat, count : Nat) { + if (count == 0) return; + + let ?smallest_key = Branch.get_key_blob(btree, branch_address, 0) else Debug.trap("Branch.split: smallest_key is null"); + let ?largest_key = Branch.get_key_blob(btree, branch_address, count - 1) else Debug.trap("Branch.split: largest_key is null"); + + let smallest_key_bytes = Blob.toArray(smallest_key); + let largest_key_bytes = Blob.toArray(largest_key); + + let prefix_key_size = Utils.get_prefix_size(smallest_key_bytes, largest_key_bytes); + + let prev_prefix_key_size = Branch.get_prefix_key_size(btree, branch_address); + + if (prefix_key_size > prev_prefix_key_size) { + let prev_prefix_key : Blob = switch (Branch.get_prefix_key(btree, branch_address)) { + case (?prefix_key) prefix_key; + case (_) ""; + }; + + let additional_prefix_key = Array.subArray(smallest_key_bytes, 0, prefix_key_size); + + let new_prefix_key = Blob.fromArray(Array.append(Blob.toArray(prev_prefix_key), additional_prefix_key)); + + update_prefix_key(btree, branch_address, ?new_prefix_key); + + remove_prefix_from_keys(btree, branch_address, count, prefix_key_size - prev_prefix_key_size); + + } else if (prefix_key_size < prev_prefix_key_size) { + let new_prefix_key = Blob.fromArray(Array.subArray(smallest_key_bytes, 0, prefix_key_size)); + update_prefix_key(btree, branch_address, ?new_prefix_key); + + let additional_prefix_key = Array.subArray(smallest_key_bytes, 0, prefix_key_size) |> Blob.fromArray(_); + + add_prefix_to_keys(btree, branch_address, count, additional_prefix_key); + }; + + }; + + public func remove_prefix_from_keys(btree : MemoryBTree, branch_address : Nat, count : Nat, remove_size : Nat) { + + for (i in Itertools.range(0, count)) { + let ?branch_key_address = Branch.get_key_address(btree, branch_address, i) else Debug.trap("Branch.add_prefix_to_keys: branch_key_address is null"); + let branch_key = MemoryBlock.get_key_blob(btree, branch_key_address); + let branch_key_bytes = Blob.toArray(branch_key); + let new_branch_key_bytes = Array.subArray(branch_key_bytes, remove_size, branch_key_bytes.size() - remove_size : Nat); + + let new_branch_key = Blob.fromArray(new_branch_key_bytes); + + let new_branch_key_address = MemoryBlock.replace_key(btree, branch_key_address, new_branch_key); + + Branch.put_key_address(btree, branch_address, i, new_branch_key_address); + + }; + + }; + + public func add_prefix_to_keys(btree : MemoryBTree, branch_address : Nat, count : Nat, prefix_key : Blob) { + + for (i in Itertools.range(0, count)) { + let ?branch_key_address = Branch.get_key_address(btree, branch_address, i) else Debug.trap("Branch.add_prefix_to_keys: branch_key_address is null"); + let branch_key = MemoryBlock.get_key_blob(btree, branch_key_address); + let branch_key_bytes = Blob.toArray(branch_key); + let new_branch_key_bytes = Array.append(Blob.toArray(prefix_key), branch_key_bytes); + + let new_branch_key = Blob.fromArray(new_branch_key_bytes); + + let new_branch_key_address = MemoryBlock.replace_key(btree, branch_key_address, new_branch_key); + + Branch.put_key_address(btree, branch_address, i, new_branch_key_address); + + }; + + }; + public func get_larger_neighbour(btree : MemoryBTree, parent_address : Address, index : Nat) : ?Address { let ?child = Branch.get_child(btree, parent_address, index) else Debug.trap("1. get_larger_neighbor: accessed a null value"); @@ -1004,8 +1163,11 @@ module Branch { Branch.put_key_address(btree, parent, branch_index, median_key_address); }; - Branch.update_count(btree, branch, branch_count + data_to_move); - Branch.update_count(btree, neighbour, neighbour_count - data_to_move); + let new_branch_count = branch_count + data_to_move; + let new_neighbour_count = neighbour_count - data_to_move : Nat; + + Branch.update_count(btree, branch, new_branch_count); + Branch.update_count(btree, neighbour, new_neighbour_count); let branch_subtree_size = Branch.get_subtree_size(btree, branch); Branch.update_subtree_size(btree, branch, branch_subtree_size + moved_subtree_size); @@ -1013,6 +1175,9 @@ module Branch { let neighbour_subtree_size = Branch.get_subtree_size(btree, neighbour); Branch.update_subtree_size(btree, neighbour, neighbour_subtree_size - moved_subtree_size); + compress_similar_keys(btree, branch, new_branch_count); + compress_similar_keys(btree, neighbour, new_neighbour_count); + true; }; @@ -1067,6 +1232,8 @@ module Branch { // Branch.remove(btree, parent, right_index); + compress_similar_keys(btree, left, left_count + right_count); + right; }; }; diff --git a/src/MemoryBTree/modules/Common.mo b/src/MemoryBTree/modules/Common.mo new file mode 100644 index 0000000..004c762 --- /dev/null +++ b/src/MemoryBTree/modules/Common.mo @@ -0,0 +1,3 @@ +module { + +}; diff --git a/src/MemoryBTree/modules/Leaf.mo b/src/MemoryBTree/modules/Leaf.mo index f4a51ea..bcbd76d 100644 --- a/src/MemoryBTree/modules/Leaf.mo +++ b/src/MemoryBTree/modules/Leaf.mo @@ -1,5 +1,6 @@ /// Leaf Node Operations +import Blob "mo:base/Blob"; import Debug "mo:base/Debug"; import Array "mo:base/Array"; import Nat "mo:base/Nat"; @@ -16,6 +17,7 @@ import MemoryBlock "MemoryBlock"; import T "Types"; import Migrations "../Migrations"; import Utils "../../Utils"; +import Itertools "mo:itertools/Iter"; module Leaf { public type Leaf = Migrations.Leaf; @@ -46,6 +48,9 @@ module Leaf { public let NEXT_START = 24; + public let KEY_PREFIX_START = 32; + public let KEY_PREFIX_SIZE_START = 40; + public let KV_IDS_START = HEADER_SIZE; // access constants @@ -93,6 +98,9 @@ module Leaf { MemoryRegion.storeNat64(btree.leaves, leaf_address + Leaf.PREV_START, NULL_ADDRESS); MemoryRegion.storeNat64(btree.leaves, leaf_address + Leaf.NEXT_START, NULL_ADDRESS); + MemoryRegion.storeNat64(btree.leaves, leaf_address + Leaf.KEY_PREFIX_START, NULL_ADDRESS); + MemoryRegion.storeNat16(btree.leaves, leaf_address + Leaf.KEY_PREFIX_SIZE_START, 0); + var i = 0; // keys @@ -291,7 +299,11 @@ module Leaf { public func get_key_blob(btree : MemoryBTree, address : Nat, i : Nat) : ?(Blob) { let ?id = get_kv_address(btree, address, i) else return null; - ?MemoryBlock.get_key_blob(btree, id); + let key_blob = MemoryBlock.get_key_blob(btree, id); + + let ?prefix_key = get_prefix_key(btree, address) else return ?key_blob; + + ?Utils.append_blob(prefix_key, key_blob); }; public func set_key_to_null(btree : MemoryBTree, address : Nat, i : Nat) { @@ -357,6 +369,28 @@ module Leaf { ?Nat64.toNat(prev); }; + public func get_prefix_key_block(btree : MemoryBTree, address : Nat) : ?MemoryBlock { + let prefix_key = MemoryRegion.loadNat64(btree.leaves, address + KEY_PREFIX_START); + if (prefix_key == NULL_ADDRESS) return null; + + let prefix_key_nat = Nat64.toNat(prefix_key); + let prefix_key_size = MemoryRegion.loadNat16(btree.leaves, address + KEY_PREFIX_SIZE_START) |> Nat16.toNat(_); + + ?(prefix_key_nat, prefix_key_size); + }; + + public func get_prefix_key_size(btree : MemoryBTree, address : Nat) : Nat { + MemoryRegion.loadNat16(btree.leaves, address + KEY_PREFIX_SIZE_START) |> Nat16.toNat(_); + }; + + public func get_prefix_key(btree : MemoryBTree, address : Nat) : ?Blob { + let ?prefix_key_block = get_prefix_key_block(btree, address) else return null; + + let prefix_key = MemoryRegion.loadBlob(btree.data, prefix_key_block.0, prefix_key_block.1); + + ?prefix_key; + }; + public func binary_search(btree : MemoryBTree, btree_utils : BTreeUtils, address : Nat, cmp : (K, K) -> Int8, search_key : K, arr_len : Nat) : Int { if (arr_len == 0) return -1; // should insert at index Int.abs(i + 1) var l = 0; @@ -499,12 +533,41 @@ module Leaf { MemoryRegion.storeNat64(btree.leaves, address + PREV_START, prev); }; + public func update_kv_address(btree : MemoryBTree, address : Nat, index : Nat, new_id : UniqueId) { + let kv_address_offset = get_kv_address_offset(address, index); + MemoryRegion.storeNat64(btree.leaves, kv_address_offset, Nat64.fromNat(new_id)); + }; + + public func update_prefix_key(btree : MemoryBTree, address : Nat, opt_prefix_key_blob : ?Blob) { + let (new_prefix_key_address, new_size) : (Nat64, Nat) = switch (opt_prefix_key_blob) { + case (null) (NULL_ADDRESS, 0); + case (?prefix_key_blob) { + + let new_prefix_key_address = switch (get_prefix_key_block(btree, address)) { + case (?(prev_address, prev_size)) { + MemoryRegion.replaceBlob(btree.data, prev_address, prev_size, prefix_key_blob); + }; + case (_) { + MemoryRegion.addBlob(btree.data, prefix_key_blob); + }; + }; + + (Nat64.fromNat(new_prefix_key_address), prefix_key_blob.size()); + }; + }; + + MemoryRegion.storeNat64(btree.leaves, address + KEY_PREFIX_START, new_prefix_key_address); + MemoryRegion.storeNat16(btree.leaves, address + KEY_PREFIX_SIZE_START, Nat16.fromNat(new_size)); + + }; + public func clear(btree : MemoryBTree, leaf_address : Nat) { Leaf.update_index(btree, leaf_address, 0); Leaf.update_count(btree, leaf_address, 0); Leaf.update_parent(btree, leaf_address, null); Leaf.update_prev(btree, leaf_address, null); Leaf.update_next(btree, leaf_address, null); + Leaf.update_prefix_key(btree, leaf_address, null); }; public func insert(btree : MemoryBTree, leaf_address : Nat, index : Nat, new_id : UniqueId) { @@ -569,8 +632,10 @@ module Leaf { MemoryRegion.storeBlob(btree.leaves, new_start, blob_slice); elems_removed_from_left += right_cnt; + + Leaf.insert(btree, leaf_address, elem_index, new_id); } else { - // | left | elem | right | + // | left | <-> | elem | right | // left var size = elem_index - (i + median - offset) : Nat; var start = get_kv_address_offset(leaf_address, i + median - offset); @@ -599,10 +664,6 @@ module Leaf { Leaf.update_count(btree, leaf_address, arr_len - elems_removed_from_left); - if (not is_elem_added_to_right) { - Leaf.insert(btree, leaf_address, elem_index, new_id); - }; - Leaf.update_count(btree, leaf_address, median); Leaf.update_count(btree, right_leaf_address, right_cnt); @@ -626,9 +687,88 @@ module Leaf { case (_) {}; }; + if (btree.supports_key_compression) { + compress_similar_keys(btree, leaf_address, median); + compress_similar_keys(btree, right_leaf_address, right_cnt); + }; + right_leaf_address; }; + public func compress_similar_keys(btree : MemoryBTree, leaf_address : Nat, count : Nat) { + if (count == 0) return; + + let ?smallest_key = Leaf.get_key_blob(btree, leaf_address, 0) else Debug.trap("Leaf.split: smallest_key is null"); + let ?largest_key = Leaf.get_key_blob(btree, leaf_address, count - 1) else Debug.trap("Leaf.split: largest_key is null"); + + let smallest_key_bytes = Blob.toArray(smallest_key); + let largest_key_bytes = Blob.toArray(largest_key); + + let prefix_key_size = Utils.get_prefix_size(smallest_key_bytes, largest_key_bytes); + + let prev_prefix_key_size = Leaf.get_prefix_key_size(btree, leaf_address); + + if (prefix_key_size > prev_prefix_key_size) { + let prev_prefix_key : Blob = switch (Leaf.get_prefix_key(btree, leaf_address)) { + case (?prefix_key) prefix_key; + case (_) ""; + }; + + let additional_prefix_key = Array.subArray(smallest_key_bytes, 0, prefix_key_size); + + let new_prefix_key = Blob.fromArray(Array.append(Blob.toArray(prev_prefix_key), additional_prefix_key)); + + update_prefix_key(btree, leaf_address, ?new_prefix_key); + + remove_prefix_from_keys(btree, leaf_address, count, prefix_key_size - prev_prefix_key_size); + + } else if (prefix_key_size < prev_prefix_key_size) { + let new_prefix_key = Blob.fromArray(Array.subArray(smallest_key_bytes, 0, prefix_key_size)); + update_prefix_key(btree, leaf_address, ?new_prefix_key); + + let additional_prefix_key = Array.subArray(smallest_key_bytes, 0, prefix_key_size) |> Blob.fromArray(_); + + add_prefix_to_keys(btree, leaf_address, count, additional_prefix_key); + }; + + }; + + public func remove_prefix_from_keys(btree : MemoryBTree, leaf_address : Nat, count : Nat, remove_size : Nat) { + + for (i in Itertools.range(0, count)) { + let ?leaf_key_address = Leaf.get_kv_address(btree, leaf_address, i) else Debug.trap("Leaf.add_prefix_to_keys: leaf_key_address is null"); + let leaf_key = MemoryBlock.get_key_blob(btree, leaf_key_address); + let leaf_key_bytes = Blob.toArray(leaf_key); + let new_leaf_key_bytes = Array.subArray(leaf_key_bytes, remove_size, leaf_key_bytes.size() - remove_size : Nat); + + let new_leaf_key = Blob.fromArray(new_leaf_key_bytes); + + let new_leaf_key_address = MemoryBlock.replace_key(btree, leaf_key_address, new_leaf_key); + + Leaf.update_kv_address(btree, leaf_address, i, new_leaf_key_address); + + }; + + }; + + public func add_prefix_to_keys(btree : MemoryBTree, leaf_address : Nat, count : Nat, prefix_key : Blob) { + + for (i in Itertools.range(0, count)) { + let ?leaf_key_address = Leaf.get_kv_address(btree, leaf_address, i) else Debug.trap("Leaf.add_prefix_to_keys: leaf_key_address is null"); + let leaf_key = MemoryBlock.get_key_blob(btree, leaf_key_address); + let leaf_key_bytes = Blob.toArray(leaf_key); + let new_leaf_key_bytes = Array.append(Blob.toArray(prefix_key), leaf_key_bytes); + + let new_leaf_key = Blob.fromArray(new_leaf_key_bytes); + + let new_leaf_key_address = MemoryBlock.replace_key(btree, leaf_key_address, new_leaf_key); + + Leaf.update_kv_address(btree, leaf_address, i, new_leaf_key_address); + + }; + + }; + public func shift(btree : MemoryBTree, leaf_address : Nat, start : Nat, end : Nat, offset : Int) { if (offset == 0) return; @@ -698,6 +838,9 @@ module Leaf { Leaf.update_count(btree, leaf, leaf_count + data_to_move); Leaf.update_count(btree, neighbour, neighbour_count - data_to_move); + compress_similar_keys(btree, leaf, leaf_count + data_to_move); + compress_similar_keys(btree, neighbour, neighbour_count - data_to_move); + // Debug.print("end redistribution"); true; }; @@ -749,6 +892,8 @@ module Leaf { case (_) {}; }; + compress_similar_keys(btree, left, left_count + right_count); + }; }; diff --git a/src/MemoryBTree/modules/MemoryBlock.mo b/src/MemoryBTree/modules/MemoryBlock.mo index ab8901c..d6d79f2 100644 --- a/src/MemoryBTree/modules/MemoryBlock.mo +++ b/src/MemoryBTree/modules/MemoryBlock.mo @@ -46,21 +46,7 @@ module MemoryBlock { MemoryRegion.isAllocated(btree.data, block_address); }; - public func store(btree : MemoryBTree, key : Blob, val : Blob) : UniqueId { - let block_address = MemoryRegion.allocate(btree.data, KEY_BLOB_START + key.size()); - - let val_address = MemoryRegion.addBlob(btree.values, val); - - MemoryRegion.storeNat8(btree.data, block_address, 0); // reference count - MemoryRegion.storeNat64(btree.data, block_address + VAL_POINTER_START, Nat64.fromNat(val_address)); // value mem block address - MemoryRegion.storeNat32(btree.data, block_address + VAL_SIZE_START, Nat32.fromNat(val.size())); // value mem block size - - MemoryRegion.storeNat16(btree.data, block_address + KEY_SIZE_START, Nat16.fromNat(key.size())); // key mem block size - MemoryRegion.storeBlob(btree.data, block_address + KEY_BLOB_START, key); - - block_address; - }; - + public func next_id(btree : MemoryBTree) : UniqueId { let block_address = MemoryRegion.allocate(btree.data, BLOCK_ENTRY_SIZE); @@ -88,29 +74,22 @@ module MemoryBlock { Nat8.toNat(ref_count - 1); }; - public func replace_val(btree : MemoryBTree, block_address : UniqueId, new_val : Blob) : Blob { - - let prev_val_address = MemoryRegion.loadNat64(btree.data, block_address + VAL_POINTER_START) |> Nat64.toNat(_); - let prev_val_size = MemoryRegion.loadNat16(btree.data, block_address + VAL_SIZE_START) |> Nat16.toNat(_); - let prev_val_blob = MemoryRegion.loadBlob(btree.values, prev_val_address, prev_val_size); - - if (prev_val_size == new_val.size()) { - MemoryRegion.storeBlob(btree.values, prev_val_address, new_val); - return prev_val_blob; - }; - - let new_val_address = MemoryRegion.resize(btree.values, prev_val_address, prev_val_size, new_val.size()); + public func store(btree : MemoryBTree, key : Blob, val : Blob) : UniqueId { + let block_address = MemoryRegion.allocate(btree.data, KEY_BLOB_START + key.size()); - MemoryRegion.storeBlob(btree.values, new_val_address, new_val); - MemoryRegion.storeNat32(btree.data, block_address + VAL_SIZE_START, Nat32.fromNat(new_val.size())); + let val_address = MemoryRegion.addBlob(btree.values, val); - if (new_val_address == prev_val_address) return prev_val_blob; + MemoryRegion.storeNat8(btree.data, block_address, 0); // reference count + MemoryRegion.storeNat64(btree.data, block_address + VAL_POINTER_START, Nat64.fromNat(val_address)); // value mem block address + MemoryRegion.storeNat32(btree.data, block_address + VAL_SIZE_START, Nat32.fromNat(val.size())); // value mem block size - MemoryRegion.storeNat64(btree.data, block_address + VAL_POINTER_START, Nat64.fromNat(new_val_address)); + MemoryRegion.storeNat16(btree.data, block_address + KEY_SIZE_START, Nat16.fromNat(key.size())); // key mem block size + MemoryRegion.storeBlob(btree.data, block_address + KEY_BLOB_START, key); - prev_val_blob; + block_address; }; + public func get_key_blob(btree : MemoryBTree, block_address : UniqueId) : Blob { let key_size = MemoryRegion.loadNat16(btree.data, block_address + KEY_SIZE_START) |> Nat16.toNat(_); let blob = MemoryRegion.loadBlob(btree.data, block_address + KEY_BLOB_START, key_size); @@ -124,6 +103,17 @@ module MemoryBlock { (block_address + KEY_BLOB_START, key_size); }; + // the unique id might be different after resizing + public func replace_key(btree: MemoryBTree, block_address : UniqueId, new_key: Blob) : UniqueId { + + let prev_size = MemoryRegion.loadNat16(btree.data, block_address + KEY_SIZE_START) |> Nat16.toNat(_); + let prev_address = block_address + KEY_BLOB_START; + + let new_address = MemoryRegion.replaceBlob(btree.data, prev_address, prev_size, new_key); + + new_address; + }; + public func get_val_block(btree : MemoryBTree, block_address : UniqueId) : MemoryBlock { let val_address = MemoryRegion.loadNat64(btree.data, block_address + VAL_POINTER_START) |> Nat64.toNat(_); @@ -142,6 +132,30 @@ module MemoryBlock { blob; }; + + public func replace_val(btree : MemoryBTree, block_address : UniqueId, new_val : Blob) : Blob { + + let prev_val_address = MemoryRegion.loadNat64(btree.data, block_address + VAL_POINTER_START) |> Nat64.toNat(_); + let prev_val_size = MemoryRegion.loadNat16(btree.data, block_address + VAL_SIZE_START) |> Nat16.toNat(_); + let prev_val_blob = MemoryRegion.loadBlob(btree.values, prev_val_address, prev_val_size); + + if (prev_val_size == new_val.size()) { + MemoryRegion.storeBlob(btree.values, prev_val_address, new_val); + return prev_val_blob; + }; + + let new_val_address = MemoryRegion.resize(btree.values, prev_val_address, prev_val_size, new_val.size()); + + MemoryRegion.storeBlob(btree.values, new_val_address, new_val); + MemoryRegion.storeNat32(btree.data, block_address + VAL_SIZE_START, Nat32.fromNat(new_val.size())); + + if (new_val_address == prev_val_address) return prev_val_blob; + + MemoryRegion.storeNat64(btree.data, block_address + VAL_POINTER_START, Nat64.fromNat(new_val_address)); + + prev_val_blob; + }; + public func remove(btree : MemoryBTree, block_address : UniqueId) { assert MemoryRegion.loadNat8(btree.data, block_address + REFERENCE_COUNT_START) == 0; diff --git a/src/MemoryBTree/modules/Methods.mo b/src/MemoryBTree/modules/Methods.mo index 601aad1..69bda50 100644 --- a/src/MemoryBTree/modules/Methods.mo +++ b/src/MemoryBTree/modules/Methods.mo @@ -28,6 +28,7 @@ module Methods { var curr_address = btree.root; var is_address_a_leaf = btree.is_root_a_leaf; var opt_key_blob : ?Blob = _opt_key_blob; + var opt_key_bytes : ?[Nat8] = null; loop { switch (is_address_a_leaf) { @@ -55,7 +56,30 @@ module Methods { case (?key_blob) key_blob; }; - Branch.binary_search_blob_seq(btree, curr_address, cmp, key_blob, count - 1); + if (btree.supports_key_compression) { + let leaf_prefix_key_size : Nat = Branch.get_prefix_key_size(btree, curr_address); + + if (leaf_prefix_key_size > 0) { + let key_bytes = switch (opt_key_bytes) { + case (null) { + let key_bytes = Blob.toArray(key_blob); + opt_key_bytes := ?key_bytes; + key_bytes; + }; + case (?key_bytes) key_bytes; + }; + + let key_suffix_bytes : [Nat8] = Array.subArray(key_bytes, leaf_prefix_key_size, key_bytes.size() - leaf_prefix_key_size : Nat); + let key_suffix_blob = Blob.fromArray(key_suffix_bytes); + + Branch.binary_search_blob_seq(btree, curr_address, cmp, key_suffix_blob, count - 1); + } else { + Branch.binary_search_blob_seq(btree, curr_address, cmp, key_blob, count - 1); + }; + } else { + Branch.binary_search_blob_seq(btree, curr_address, cmp, key_blob, count - 1); + }; + }; }; diff --git a/src/MemoryBTree/modules/Types.mo b/src/MemoryBTree/modules/Types.mo index c25e5a2..38958ab 100644 --- a/src/MemoryBTree/modules/Types.mo +++ b/src/MemoryBTree/modules/Types.mo @@ -39,4 +39,9 @@ module { #leaf; }; + public type InitOptions = { + node_capacity : ?Nat; + enable_key_compression : ?Bool; + }; + }; diff --git a/src/Utils.mo b/src/Utils.mo index a9cdd78..417a2ea 100644 --- a/src/Utils.mo +++ b/src/Utils.mo @@ -8,6 +8,8 @@ import Iter "mo:base/Iter"; import Debug "mo:base/Debug"; import Result "mo:base/Result"; +import Itertools "mo:itertools/Iter"; + module { type Buffer = Buffer.Buffer; @@ -183,4 +185,41 @@ module { Nat64.toNat(n64); }; + // is 'a' a prefix of 'b'? + public func is_prefix(a : [Nat8], b : [Nat8]) : Bool { + let len = a.size(); + if (len > b.size()) return false; + for (i in Itertools.range(0, len)) { + if (a[i] != b[i]) return false; + }; + true; + }; + + public func get_prefix_size(a : [Nat8], b : [Nat8]) : Nat { + let len = a.size(); + if (len > b.size()) return 0; + for (i in Itertools.range(0, len)) { + if (a[i] != b[i]) return i; + }; + len; + }; + + public func get_prefix(a : [Nat8], b : [Nat8]) : [Nat8] { + let len = a.size(); + if (len > b.size()) return []; + for (i in Itertools.range(0, len)) { + if (a[i] != b[i]) return Array.subArray(b, 0, i); + }; + a; + }; + + public func append_blob(a : Blob, b : Blob) : Blob { + Blob.fromArray( + Array.append( + Blob.toArray(a), + Blob.toArray(b), + ) + ); + }; + }; diff --git a/tests/MemoryBTree/MemoryBTree.Migrations.Test.mo b/tests/MemoryBTree/MemoryBTree.Migrations.Test.mo index bed87fe..1903cb9 100644 --- a/tests/MemoryBTree/MemoryBTree.Migrations.Test.mo +++ b/tests/MemoryBTree/MemoryBTree.Migrations.Test.mo @@ -12,10 +12,10 @@ suite( test( "deploys current version", func() { - let vs_memory_btree = StableMemoryBTree.new(?32); + let vs_memory_btree = StableMemoryBTree.new(null); ignore Migrations.getCurrentVersion(vs_memory_btree); // should not trap - let memory_btree = MemoryBTree.new(?32); + let memory_btree = MemoryBTree.new(null); let version = MemoryBTree.toVersioned(memory_btree); ignore Migrations.getCurrentVersion(version); // should not trap }, diff --git a/tests/MemoryBTree/MemoryBTree.Test.mo b/tests/MemoryBTree/MemoryBTree.Test.mo index 9b7c41f..4bd0235 100644 --- a/tests/MemoryBTree/MemoryBTree.Test.mo +++ b/tests/MemoryBTree/MemoryBTree.Test.mo @@ -49,7 +49,7 @@ let random = Itertools.toBuffer<(Nat, Nat)>( let sorted = Buffer.clone(random); sorted.sort(func(a : (Nat, Nat), b : (Nat, Nat)) : Order = Nat.compare(a.0, b.0)); -let btree = MemoryBTree._new_with_options(?8, ?0, false); +let btree = MemoryBTree.new(?{ node_capacity = null; enable_key_compression = ?false }); let btree_utils = MemoryBTree.createUtils(TypeUtils.BigEndian.Nat, TypeUtils.BigEndian.Nat); suite(