GuilinDev

Lc0705

05 August 2008

705 Design HashSet

解法一，简单数组

由于题目给出了 0 <= key <= 10^6 数据范围，同时限定了 key 只能是 int。

我们可以直接使用一个 boolean 数组记录某个 key 是否存在，key 直接对应 boolean 的下标

O(1) + O(1)

class MyHashSet {
    boolean[] nodes = new boolean[1000009];
    
    public void add(int key) {
        nodes[key] = true;
    }
    
    public void remove(int key) {
        nodes[key] = false;
    }
    
    public boolean contains(int key) {
        return nodes[key];
    }
}

解法二

使用链表，由于没有扩容的逻辑，最坏情况下复杂度为 O(n)，一般情况下复杂度为 O(1)

class MyHashSet {
    // 由于使用的是「链表」，这个值可以取得很小
    Node[] nodes = new Node[10009];

    public void add(int key) {
        // 根据 key 获取哈希桶的位置
        int idx = getIndex(key);
        // 判断链表中是否已经存在
        Node loc = nodes[idx], tmp = loc;
        if (loc != null) {
            Node prev = null;
            while (tmp != null) {
                if (tmp.key == key) {
                    return;
                }
                prev = tmp;
                tmp = tmp.next;
            }
            tmp = prev;
        }
        Node node = new Node(key);

        // 头插法
        // node.next = loc;
        // nodes[idx] = node;

        // 尾插法 
        if (tmp != null) {
            tmp.next = node;
        } else {
            nodes[idx] = node;
        }
    }

    public void remove(int key) {
        int idx = getIndex(key);
        Node loc = nodes[idx];
        if (loc != null) {
            Node prev = null;
            while (loc != null) {
                if (loc.key == key) {
                    if (prev != null) {
                        prev.next = loc.next;
                    } else {
                        nodes[idx] = loc.next;
                    }
                    return;
                }
                prev = loc;
                loc = loc.next;
            }
        }
    }

    public boolean contains(int key) {
        int idx = getIndex(key);
        Node loc = nodes[idx];
        if (loc != null) {
            while (loc != null) {
                if (loc.key == key) {
                    return true;
                }
                loc = loc.next;
            }
        }
        return false;
    }

    static class Node {
        private int key;
        private Node next;

        private Node(int key) {
            this.key = key;
        }
    }

    int getIndex(int key) {
        // 因为 nodes 的长度只有 10009，对应的十进制的 10011100011001（总长度为 32 位，其余高位都是 0）
        // 为了让 key 对应的 hash 高位也参与运算，这里对 hashCode 进行右移异或
        // 使得 hashCode 的高位随机性和低位随机性都能体现在低 16 位中
        int hash = Integer.hashCode(key);
        hash ^= (hash >>> 16);
        return hash % nodes.length;
    }
}

/**
 * Your MyHashSet object will be instantiated and called as such:
 * MyHashSet obj = new MyHashSet();
 * obj.add(key);
 * obj.remove(key);
 * boolean param_3 = obj.contains(key);
 */

解法3 - 分桶数组

类似「bitmap」数据结构。

使用 int 中的每一位代表一个位置。

由于数据范围为 0 <= key <= 10^6，我们最多需要的 int 数量不会超过 40000。

因此我们可以建立一个 buckets 数组，数组装载的 int 类型数值。

先对 key 进行 key / 32，确定当前 key 所在桶的位置（大概位置）

再对 key 进行 key % 32，确定当前 key 所在桶中的哪一位（精确位置）

根据位运算对「精确位置」进行修改。

class MyHashSet {
    int[] bs = new int[40000];

    public void add(int key) {
        int bucketIdx = key / 32;
        int bitIdx = key % 32;
        setVal(bucketIdx, bitIdx, true);
    }

    public void remove(int key) {
        int bucketIdx = key / 32;
        int bitIdx = key % 32;
        setVal(bucketIdx, bitIdx, false);
    }

    public boolean contains(int key) {
        int bucketIdx = key / 32;
        int bitIdx = key % 32;
        return getVal(bucketIdx, bitIdx);
    }

    void setVal(int bucket, int loc, boolean val) {
        if (val) {
            int u = bs[bucket] | (1 << loc);
            bs[bucket] = u;
        } else {
            int u = bs[bucket] & ~(1 << loc);
            bs[bucket] = u;
        }
    }

    boolean getVal(int bucket, int loc) {
        int u = (bs[bucket] >> loc) & 1;
        return u == 1;
    }
}

解法4，再hash，load factor, BST, Rehashing

/*This code has been written with the help of this Link:
https://courses.cs.washington.edu/courses/cse373/13wi/lectures/02-11/HashSet.java
*/
class MyHashSet {
    
    BSTNode [] elements; //my elements are stored in binary trees!
    int size;
    final static double LOAD_FACTOR = 0.7;
    final static int INITIAL_SIZE = 1; // 100 was picked totally random! No thoughts behind it.
    
    public MyHashSet() {
        elements = (BSTNode[]) new BSTNode[INITIAL_SIZE];
		size = 0;
    }
    
    public void add(int key) {
	    int h = hash(key);
	    BSTNode newNode = new BSTNode(key);
		if(elements[h]==null){
            elements[h]= newNode;
        }
        else{
            elements[h].add(newNode);
        }
		size++;
        if( size > (int)(LOAD_FACTOR*elements.length)){
               rehash(); 
        }
    }
    public void remove(int key) { 
        //for more efficiency it does not call contains() before removing an item
        //it rather checks if it exists while removing
        int h=hash(key);
        if(elements[h]!=null)
            elements[h] = elements[h].remove(key);
        size--;
        
    }
    
    /** Returns true if this set contains the specified element */
    public boolean contains(int key) {
        int h=hash(key);
        if(elements[h]!=null)
            return elements[h].contains(key);
        return false;
    }
    
     private int hash(int key){
        return key % elements.length;
    }
    
    private void rehash(){
        BSTNode [] newElements = new BSTNode[elements.length*2];
        BSTNode [] old = elements;
        elements=newElements;
        for(BSTNode n:old){
            if(n!=null){
                LinkedList <BSTNode> list = n.allTreeNodes(); 
                ListIterator it=list.listIterator();
                while(it.hasNext()){
                    BSTNode node = (BSTNode)it.next();
                    add(node.data);
                }
            }
        }   
    }
    
    // Build in binary search tree node to take care of collisions, even long ones with ologn()
    private class BSTNode {
		public int data;
		private BSTNode right;
        private BSTNode left;
		
		protected BSTNode(int data) {
			this.data = data;
		}
        
        protected void add(BSTNode node){
            //The add function is recursive, since there are as many as log(n) function calls
            if(node.data>this.data){
                if(this.right==null){
                 this.right=node;   
                }else{
                    this.right.add(node);
                }
            }
            else if(node.data<this.data){
                if(this.left==null){
                 this.left=node;   
                }else{
                    this.left.add(node);
                }
            }
        }
        
        protected LinkedList<BSTNode> allTreeNodes(){ 
            // This function is iterative since there is n function calls,
            //although the size of the stack can get as big as h~log(n)
            // The reason linkedList is used here is that we just 
            //need to iterate over it once for rehashing!
            Queue<BSTNode> q =new LinkedList<>();
            q.add(this);
            LinkedList <BSTNode> list=new LinkedList<>();
            while(!q.isEmpty()){
                BSTNode node=q.poll();
                list.add(node);
                if(node.right!=null)q.add(node.right);
                if(node.left!=null)q.add(node.left);
            }
            return list;
        }
        
        protected boolean contains(int data){
            if(this.data==data)
                return true;
            if(this.data>data && this.left!=null)
                return this.left.contains(data);
            if(this.data<data && this.right!=null)
                return this.right.contains(data);
            return false;
        }
        
        protected BSTNode remove(int d){
            if(this.data==d){ //what we wanna remove is found
                
                if(this.right==null && this.left==null) //if leaf remove
                    return null;
                
                if(this.right==null || this.left==null) //if one child return that one child!
                    return (this.left!=null)?this.left:this.right;
                
                this.data=this.left.findMax().data; //substitude with the rightmost left successor
                this.left.remove(this.data);
            }
            
            if(this.data>d && this.left!=null)
                this.left=this.left.remove(d);
            
            if(this.data<d && this.right!=null)
                this.right=this.right.remove(d);
            
            return this;
        }
        
        private BSTNode findMax(){
            BSTNode node=this;
            while(node.right!=null){
                node=node.right;
            }
            return node;
        }
        
	}

}

/**
 * Your MyHashSet object will be instantiated and called as such:
 * MyHashSet obj = new MyHashSet();
 * obj.add(key);
 * obj.remove(key);
 * boolean param_3 = obj.contains(key);
 ["MyHashSet","add","add","contains","contains","add","contains","remove","contains"]
[[],[1],[2],[1],[3],[2],[2],[2],[2]]
 
 
 */

This site is open source. Improve this page.