微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

为什么我的 b 树有时不返回随机插入的项目?

如何解决为什么我的 b 树有时不返回随机插入的项目?

我不明白为什么我的 btree 有时会产生乱序的遍历,但只有当插入的数据是随机生成的。当数据按顺序插入时,数据是有序的断言总是通过。

我怀疑找到一个有问题的插入点是我的逻辑。但我无法弄清楚它有什么问题,因为它有时会起作用。我想我需要找到 >= 插入键的最深节点。目前,我觉得它可能会跳过最深的节点。但我不确定。我需要其他人来看看代码

注意下面的两个断言。一种是按顺序插入。第二个是随机插入。在断言错误之上是遍历顺序。部分顺序让我感到困惑。

import bisect
import random

class BTree():
    def __init__(self,leaf,M,key,value,parent=None):
        self.leaf = leaf
        self.children = []
        self.M = M
        self.key = key
        self.value = value
        self.parent = parent
    
   
    def walk(self):
        for child in self.children:
            
            if child.leaf:
                yield child
            yield from child.walk()
        
    def __repr__(self):
        return "{}:{}".format(self.key,self.value)

    def __str__(self):
        return "{}:{}".format(self.key,self.value)
        
    def insert(self,height=1,parent=None):
        next_children = self.children
        leaf = self
        found = False
        parents = [self]
        child = None
        while found == False:
            next_children_changed = False
            for child in next_children:
                if key >= child.key:
                    print("Inspecting {} <= {} ".format(child.key,key))
                    next_children = child.children
                    
                    
                    # found one stage before the end
                    if len(next_children) == 0:
                        found = True
    
                    else:
                        parents.append(child)
                        leaf = child
                        next_children_changed = True
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
            
        
        print("Trying to insert {} at Found insertion leaf {}".format(key,leaf))
        walk(leaf)
        
        
        if len(leaf.children) < leaf.M:

            leaf.insert_non_full(key,parents[-1])
            
        else:
            # we need to split
            current = leaf
            last_current = current

            inserted = False
            new_root = None
            
            
            while current != None:
                
                
                original_parent = current.parent
                if len(current.children) >= current.M:
                    


                    
                    new_left,new_right,separation_value = current.split()
                    new_left.parent = original_parent
                    new_right.parent = original_parent

                   

                    if original_parent == None:
                        
                        new_root = BTree(False,self.M,None)
                        
                        parent = new_root
                        

                        new_root.children.append(new_left)
                        
                        new_root.children.append(new_right)
                        
                        new_root.key = new_left.key
                        new_root.value = new_left.value
                        

                    else:
                        
                        parent = original_parent
                        
                            
                        
                        
                        original_parent.children.remove(current)
                        original_parent.children.append(new_left)
                        
                        original_parent.children.append(new_right)
                        original_parent.sort()
                   
                        
                

                    new_left.parent = parent
                    new_right.parent = parent
                    
                    assert new_right.key > new_left.key
                        
                    

                



                last_current = current
                current = original_parent

                
                
            
            if new_root != None:
                # split went to root
                print("Split went to root")
                
                return new_root.insert(key,value)
            else:
                
                self.insert(key,value)
            return self
            
            
        
        return self
            
        
            

    def split(self):

        
        new_self = BTree(True,None)
        new_self.key = self.key
        new_self.value = self.value
        new_left = BTree(False,None)
        new_sibling = BTree(False,None)
        midpoint = int((len(self.children)+1)/2)

                
        left_children = []
        if self.leaf:
            left_children = [new_self]

        left_children = left_children + self.children[0:midpoint]
        right_children = self.children[midpoint:]
        for child in left_children:
            child.parent = new_left
        for child in right_children:
            child.parent = new_sibling

        new_sibling.key = right_children[0].key
        new_sibling.value = right_children[0].value

        new_left.children = left_children
        new_sibling.children = right_children

        new_left.leaf = False

        new_left.key = left_children[0].key
        new_left.value = left_children[0].value

        return new_left,new_sibling,self.children[midpoint].key


                
        
    def insert_after_split(self,parent):
        height = height + 1
        
        
        insertion_point,index = self.find_location_for_key(key)

        if insertion_point == None:
            self.insert_non_full(key,parent)
        else:
            split = insertion_point.insert(key,parent=self)

            return split
        
        return self
    
    def insert_non_full(self,parent):
        values = [child.key for child in self.children]
        new_pos = bisect.bisect(values,key)
        self.children.insert(new_pos,BTree(True,parent))
        return self

    def sort(self):
        self.children.sort(key=lambda x: x.key)
    
    def find_location_for_key(self,key):
        index = None
        for child in self.children:
            if cmp(key,child.key) >= 0:
                  index = child,self.children.index(child)
        if index:
            return index
        else:
            return None,-1
    
    def search(self,greater_than_equal,less_than):
        
            
        for child in self.children:
            if child.key >= greater_than_equal and child.key < less_than:
                if child.leaf:
                    yield child
                    yield from child.search(greater_than_equal,less_than)
                else:
                    yield from child.search(greater_than_equal,less_than)
    
    def delete(self,key):
        deletion_point,index = self.find_location_for_key(key)
        if deletion_point:
            if deletion_point.key == key:
                self.children.remove(deletion_point)
                return True
            else:
                return deletion_point.delete(key)
        else:
            return False
        

    

def walk(item,spaces=0):
    
    print("{}{}={} {} {}".format(" " * spaces,item.key,item.value,"leaf" if item.leaf else "",item.parent))
    
    for child in item.children:
        walk(child,spaces + 1)

root = BTree(False,3,None)\
.insert(1,"1")\
.insert(2,"2")\
.insert(3,"3")\
.insert(4,"4")

for i in range(5,100):
    root = root.insert(i,str(i))


walk(root)

print(root.children)

def keysonly(items):
    for item in items:
        yield item.key

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

root = BTree(False,None)


seen = {}

for i in range(1,100):
    num1 = random.randint(0,100)
    if num1 not in seen:
        seen[num1] = True
    
        root = root.insert(num1,str(num1))



walk(root)

for item in root.walk():
    print(item.key,item.value)

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

解决方法

我重写了插入位置的搜索。它现在应该总是产生正确的位置。我反向搜索子项并在小于插入键的第一个子项上中断。

当我去插入时,我总是插入最后一个子节点,所以我们填充一个旧节点而不是创建一个新子节点。

改变的一点。很微妙。

def insert(self,key,value,height=1,parent=None):
        next_children = self.children
        leaf = self
        found = False
        parents = [self]
        child = None
        last_child = self
        while found == False:
            next_children_changed = False
            for child in reversed(next_children):
                if key >= child.key:
                    print("Inspecting {} <= {} ".format(child.key,key))
                    next_children = child.children
                    
                    last_child = leaf
                    parents.append(child)
                    leaf = child
                    next_children_changed = True
                    break
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
        
        leaf = last_child

完整的工作代码:

import bisect
import random

class BTree():
    def __init__(self,leaf,M,parent=None):
        self.leaf = leaf
        self.children = []
        self.M = M
        self.key = key
        self.value = value
        self.parent = parent
    
   
    def walk(self):
        for child in self.children:
            
            if child.leaf:
                yield child
            yield from child.walk()
        
    def __repr__(self):
        return "{}:{}".format(self.key,self.value)

    def __str__(self):
        return "{}:{}".format(self.key,self.value)
        
    def insert(self,key))
                    next_children = child.children
                    
                    last_child = leaf
                    parents.append(child)
                    leaf = child
                    next_children_changed = True
                    break
                    
                    
                        
                        
            if not next_children_changed:
                found = True
                
        
        leaf = last_child
        
        print("Trying to insert {} at Found insertion leaf {}".format(key,leaf))
        # walk(leaf)
        
        
        if len(leaf.children) < leaf.M:

            leaf.insert_non_full(key,parents[-1])
            
        else:
            # we need to split
            current = leaf
            

            inserted = False
            new_root = None
            
            
            while current != None:
                
                
                original_parent = current.parent
                if len(current.children) >= current.M:
                    


                    
                    new_left,new_right,separation_value = current.split()
                                      

                    if original_parent == None:
                        
                        new_root = BTree(False,self.M,None)
                        
                        parent = new_root
                        

                        new_root.children.append(new_left)
                        
                        new_root.children.append(new_right)
                        
                        new_root.key = new_left.key
                        new_root.value = new_left.value
                        

                    else:
                        
                        parent = original_parent
                      
                        original_parent.children.remove(current)
                        original_parent.children.append(new_left)
                        
                        original_parent.children.append(new_right)
                        original_parent.sort()
                   
                        
                

                    new_left.parent = parent
                    new_right.parent = parent
                    
                    assert new_right.key > new_left.key
                        
                    

                



                
                current = original_parent

                
                
            
            if new_root != None:
                # split went to root
                print("Split went to root")
                
                # walk(new_root)
                return new_root.insert(key,value)
            else:
                
                return self.insert(key,value)
            return self
            
            
        
        return self
            
        
            

    def split(self):

        
        new_self = BTree(True,None)
        new_self.key = self.key
        new_self.value = self.value
        new_left = BTree(False,None)
        new_sibling = BTree(False,None)
        midpoint = int((len(self.children)+1)/2)

                
        left_children = []
        if self.leaf:
            left_children = [new_self]

        left_children = left_children + self.children[0:midpoint]
        right_children = self.children[midpoint:]
        for child in left_children:
            child.parent = new_left
        for child in right_children:
            child.parent = new_sibling

        new_sibling.key = right_children[0].key
        new_sibling.value = right_children[0].value

        new_left.children = left_children
        new_sibling.children = right_children

        new_left.leaf = False

        new_left.key = left_children[0].key
        new_left.value = left_children[0].value

        return new_left,new_sibling,self.children[midpoint].key


                
        
    def insert_after_split(self,parent):
        height = height + 1
        
        
        insertion_point,index = self.find_location_for_key(key)

        if insertion_point == None:
            self.insert_non_full(key,parent)
        else:
            split = insertion_point.insert(key,parent=self)

            return split
        
        return self
    
    def insert_non_full(self,parent):
        values = [child.key for child in self.children]
        new_pos = bisect.bisect(values,key)
        self.children.insert(new_pos,BTree(True,parent))
        return self

    def sort(self):
        self.children.sort(key=lambda x: x.key)
    
    def find_location_for_key(self,key):
        index = None
        for child in self.children:
            if cmp(key,child.key) >= 0:
                  index = child,self.children.index(child)
        if index:
            return index
        else:
            return None,-1
    
    def search(self,greater_than_equal,less_than):
        
            
        for child in self.children:
            if child.key >= greater_than_equal and child.key < less_than:
                if child.leaf:
                    yield child
                    yield from child.search(greater_than_equal,less_than)
                else:
                    yield from child.search(greater_than_equal,less_than)
    
    def delete(self,key):
        deletion_point,index = self.find_location_for_key(key)
        if deletion_point:
            if deletion_point.key == key:
                self.children.remove(deletion_point)
                return True
            else:
                return deletion_point.delete(key)
        else:
            return False
        

    

def walk(item,spaces=0):
    
    print("{}{}={} {} {}".format(" " * spaces,item.key,item.value,"leaf" if item.leaf else "",item.parent))
    
    for child in item.children:
        walk(child,spaces + 1)

root = BTree(False,3,None)\
.insert(1,"1")\
.insert(2,"2")\
.insert(3,"3")\
.insert(4,"4")

for i in range(5,100):
    root = root.insert(i,str(i))


walk(root)

print(root.children)

def keysonly(items):
    for item in items:
        yield item.key

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

root = BTree(False,None)


seen = {}

for i in range(1,100):
    num1 = random.randint(0,100)
    if num1 not in seen:
        seen[num1] = True
    
        root = root.insert(num1,str(num1))



walk(root)

for item in root.walk():
    print(item.key,item.value)

assert sorted(list(keysonly(root.walk()))) == list(keysonly(root.walk()))

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。