Redis 实现布隆过滤器

昨天听马士兵教育张福刚讲公开课,里面讲解了布隆过滤器,今天无聊没事干,整理了一下笔记。关于布隆过滤器是什么东西,有什么应用场景就不做讨论了,网上有很多,大家可以自行了解,只记录实现:

1. pom 依赖

<dependency>
            <groupId>redis.clients</groupId>
            <artifactId>jedis</artifactId>
            <version>3.3.0</version>
        </dependency>

        <dependency>
            <groupId>com.google.guava</groupId>
            <artifactId>guava</artifactId>
            <version>18.0</version>
        </dependency>

2. 具体实现

package cn.bridgeli.demo;

import com.google.common.hash.Funnels;
import com.google.common.hash.Hashing;
import org.junit.Before;
import org.junit.Test;
import redis.clients.jedis.Jedis;
import redis.clients.jedis.JedisPool;
import redis.clients.jedis.Pipeline;

import java.nio.charset.StandardCharsets;

/**
 * @author BridgeLi
 * @date 2020/6/6 16:38
 */
public class BloomFilter {

    private Jedis jedis = null;

    /**
     * 预估的数据量
     */
    private static long n = 10000;

    /**
     * 容忍的错误率
     */
    private static double fpp = 0.01;

    private static long numBits = optimalNumOfBits(n, fpp);
    private static int numHashFunctions = optimalNumOfHashFunctions(n, numBits);

    /**
     * 根据预估数据量 n 和允许的错误率 fpp 计算需要的 bit 数组的长度
     *
     * @param n
     * @param fpp
     * @return
     */
    private static long optimalNumOfBits(long n, double fpp) {
        if (0 == fpp) {
            fpp = Double.MIN_VALUE;
        }
        return (long) (-n * Math.log(fpp) / (Math.log(2) * Math.log(2)));
    }

    /**
     * 根据预估的数据量和计算出来的需要的 bit 数组的长度,计算所需要的 hash 函数的个数
     *
     * @param n
     * @param numBits
     * @return
     */
    private static int optimalNumOfHashFunctions(long n, long numBits) {
        return Math.max(1, (int) Math.round((double) numBits / n * Math.log(2)));
    }

    /**
     * 预热数据
     */
    @Before
    public void testBloomFilterBefore() {
        BloomFilter bloomFilter = new BloomFilter();
        bloomFilter.init();

        for (int i = 0; i < n; i++) {
            bloomFilter.put("bf", String.valueOf(i + 100));
        }
    }

    /**
     * 过滤数据
     */
    @Test
    public void testBloomFilter() {
        BloomFilter bloomFilter = new BloomFilter();
        bloomFilter.init();

        int ex_count = 0;
        int ne_count = 0;
        for (int i = 0; i < 2 * n; i++) {
            boolean exist = bloomFilter.isExist("bf", String.valueOf(i + 100));
            if (exist) {
                ex_count++;
            } else {
                ne_count++;
            }
        }

        System.out.println("ex_count: " + ex_count + ", ne_count: " + ne_count);
    }

    private void init() {
        JedisPool jedisPool = new JedisPool("127.0.0.1", 6379);
        jedis = jedisPool.getResource();
    }

    public boolean isExist(String where, String key) {
        long[] indexs = getIndexs(key);

        boolean result = false;
        try (Pipeline pipeline = jedis.pipelined()) {
            for (long index : indexs) {
                pipeline.getbit(where, index);
            }
            // 只要有一个位置为 false,即代表该数据不存在
            result = !pipeline.syncAndReturnAll().contains(false);
        } catch (Exception e) {

        }

        return result;
    }

    public void put(String where, String key) {
        long[] indexs = getIndexs(key);

        try (Pipeline pipeline = jedis.pipelined()) {
            for (long index : indexs) {
                pipeline.setbit(where, index, true);
            }
            pipeline.sync();
        } catch (Exception e) {

        }

    }

    private long[] getIndexs(String key) {

        long hash1 = Hashing.murmur3_128().hashObject(key, Funnels.stringFunnel(StandardCharsets.UTF_8)).asLong();
        long hash2 = hash1 >>> 16;

        long[] result = new long[numHashFunctions];

        for (int i = 0; i < numHashFunctions; i++) {
            long combinedHash = hash1 + i * hash2;
            if (combinedHash < 0) {
                combinedHash = ~combinedHash;
            }
            result[i] = combinedHash % numBits;
        }
        return result;
    }
}

全文完,如果本文对您有所帮助,请花 1 秒钟帮忙点击一下广告,谢谢。

作 者: BridgeLi,https://www.bridgeli.cn

原文链接: https://www.bridgeli.cn/archives/676

版权声明:非特殊声明均为本站原创作品,转载时请注明作者和原文链接。

我来评几句
登录后评论

已发表评论数()

相关站点

+订阅
热门文章