当前位置：首页 > news >正文

Hbase布隆过滤器

news 来源：原创 2024/9/30 13:30:20

Hbase布隆过滤器

小白的Hbase学习笔记

Hbase布隆过滤器

1.过滤表中所有Value中 >23 的内容

2.获取表中age列大于23的所有RowKey值（1的改进）

3.比较以某个Value值开头的列

4.按前缀准确值后缀查找

5.获取RowKey中包含15001000的所有RowKey（速度更快）

6.过滤列族名称以2结尾的RowKey数据

7.获取列名称以 na 开头的所有RowKey

8.对学生表中的信息进行过滤条件有：1.所有性别为男性 2.所有文科班 3.年龄大于23岁

1.过滤表中所有Value中 >23 的内容

package com.shujia.comparator;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
//过滤器/*** 需求：*      过滤表中所有Value中 >23 的内容*/
public class Code01ComparatorValue {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** (CompareOp valueCompareOp, ByteArrayComparable valueComparator)*///创建字节比较器 参数传入具体比较的值BinaryComparator binaryComparator = new BinaryComparator(Bytes.toBytes("23"));//该过滤器是针对于当前表中所有的值进行过滤 只要满足则返回一行 并且 如果不满足返回NULL//put 'jan:tbl1','1001','info:name','25'ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.GREATER, binaryComparator);//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

2.获取表中age列大于23的所有RowKey值（1的改进）

package com.shujia.comparator;//需求：获取表中age列大于23的所有RowKey值
//01的改进代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code02ComparatorSingleColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** 单列过滤器：*      用于过滤单列值*      返回的数据是满足条件的所有RowKey*注意：*      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回*/SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("age"),CompareFilter.CompareOp.GREATER,Bytes.toBytes(23));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

3.比较以某个Value值开头的列

package com.shujia.comparator;//该比较器用于比较以某个Value值开头的列
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code03ComparatorSingleColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** 单列过滤器：*      用于过滤单列值*      返回的数据是满足条件的所有RowKey*注意：*      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回*/SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("clazz"),CompareFilter.CompareOp.EQUAL,//该比较器用于比较以某个Value值开头的列new BinaryPrefixComparator(Bytes.toBytes("文科")));//二进制前缀比较器//new BinaryPrefixComparator(Bytes.toBytes("文科六")));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

4.按前缀准确值后缀查找

package com.shujia.comparator;//需求：获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code04ComparatorRowKey {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL//RowKey中的值以15001000为开头的, new BinaryPrefixComparator(Bytes.toBytes("15001000"))//如果我们想按照准确的信息查找//, new BinaryComparator(Bytes.toBytes("1500100001"))//通过RegexStringComparator的正则表达式过滤以98为结尾的内容//,new RegexStringComparator(".*02$"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

5.获取RowKey中包含15001000的所有RowKey（速度更快）

package com.shujia.comparator;//需求：获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code05ComparatorPrefix {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/***相比于在RowFilter中添加 BinaryComparator(Bytes.toBytes("15001000"))* PrefixFilter 执行速度更快 效率更高*/PrefixFilter filter = new PrefixFilter(Bytes.toBytes("15001000"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

6.过滤列族名称以2结尾的RowKey数据

package com.shujia.comparator;//需求：获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.List;//需求：
//      过滤列族名称以2结尾的RowKey数据public class Code06ComparatorFamily {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();FamilyFilter filter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*2$"));//desc 'jan:tbl1'//添加列族 alter 'jan:tbl1',{NAME => 'info2',VERSIONS => 1}//put 'jan:tbl1','1001','info2:name','zhangsan'//put 'jan:tbl1','1002','info2:name','zhangsan'//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {List<Cell> cells = result.listCells();String rowKey = Bytes.toString(result.getRow());for (Cell cell : cells) {String family = Bytes.toString(CellUtil.cloneFamily(cell));String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowKey+","+family+","+qualifier+","+value);}}table.close();conn.close();}
}

7.获取列名称以 na 开头的所有RowKey

package com.shujia.comparator;//需求：获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.List;//需求：
//      获取列名称以 na 开头的所有RowKeypublic class Code07ComparatorColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("na"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {List<Cell> cells = result.listCells();String rowKey = Bytes.toString(result.getRow());for (Cell cell : cells) {String family = Bytes.toString(CellUtil.cloneFamily(cell));String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowKey+","+family+","+qualifier+","+value);}}table.close();conn.close();}
}

8.对学生表中的信息进行过滤条件有：1.所有性别为男性 2.所有文科班 3.年龄大于23岁

package com.shujia.comparator;//需求：
//      对学生表中的信息进行过滤 条件有：1.所有性别为男性 2.所有文科班 3.年龄大于23岁import com.sun.xml.internal.bind.v2.runtime.unmarshaller.XsiNilLoader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;//需求：
//      获取列名称以 na 开头的所有RowKeypublic class Code08Comparator {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();//1.所有性别为男性SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("gender"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("男")));//2.所有文科班SingleColumnValueFilter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("clazz"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("文科")));//3.年龄大于23岁SingleColumnValueFilter filter3 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("age"), CompareFilter.CompareOp.GREATER, new BinaryPrefixComparator(Bytes.toBytes("23")));List<Filter> filters = new ArrayList<>();filters.add(filter1);filters.add(filter2);filters.add(filter3);FilterList filter = new FilterList(filters);//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}