当前位置: 首页 > news >正文

Hbase布隆过滤器

Hbase布隆过滤器

 

小白的Hbase学习笔记

 

目录

Hbase布隆过滤器

1.过滤表中所有Value中 >23 的内容

2.获取表中age列大于23的所有RowKey值(1的改进)

3.比较以某个Value值开头的列

4.按前缀 准确值 后缀查找

5.获取RowKey中包含15001000的所有RowKey(速度更快)

6.过滤列族名称以2结尾的RowKey数据

7.获取列名称以 na 开头的所有RowKey

8.对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁


 

 

1.过滤表中所有Value中 >23 的内容

 

package com.shujia.comparator;import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
//过滤器/*** 需求:*      过滤表中所有Value中 >23 的内容*/
public class Code01ComparatorValue {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** (CompareOp valueCompareOp, ByteArrayComparable valueComparator)*///创建字节比较器 参数传入具体比较的值BinaryComparator binaryComparator = new BinaryComparator(Bytes.toBytes("23"));//该过滤器是针对于当前表中所有的值进行过滤 只要满足则返回一行 并且 如果不满足返回NULL//put 'jan:tbl1','1001','info:name','25'ValueFilter filter = new ValueFilter(CompareFilter.CompareOp.GREATER, binaryComparator);//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

f0282c04f2904319bb0e74d546dff6be.png

 

2.获取表中age列大于23的所有RowKey值(1的改进)

 

package com.shujia.comparator;//需求:获取表中age列大于23的所有RowKey值
//01的改进代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.BinaryComparator;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.filter.ValueFilter;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code02ComparatorSingleColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** 单列过滤器:*      用于过滤单列值*      返回的数据是满足条件的所有RowKey*注意:*      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回*/SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("age"),CompareFilter.CompareOp.GREATER,Bytes.toBytes(23));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

7ea09daf85aa4d1dadf297030478f205.png

 

3.比较以某个Value值开头的列

 

package com.shujia.comparator;//该比较器用于比较以某个Value值开头的列
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code03ComparatorSingleColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/*** 单列过滤器:*      用于过滤单列值*      返回的数据是满足条件的所有RowKey*注意:*      如果一条RowKey用于比较的列不存在 那么该RowKey也会被返回*/SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("info"),Bytes.toBytes("clazz"),CompareFilter.CompareOp.EQUAL,//该比较器用于比较以某个Value值开头的列new BinaryPrefixComparator(Bytes.toBytes("文科")));//二进制前缀比较器//new BinaryPrefixComparator(Bytes.toBytes("文科六")));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

ef74e68da6c94770832e84981bd86e28.png

 

4.按前缀 准确值 后缀查找

 

package com.shujia.comparator;//需求:获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code04ComparatorRowKey {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();RowFilter filter = new RowFilter(CompareFilter.CompareOp.EQUAL//RowKey中的值以15001000为开头的, new BinaryPrefixComparator(Bytes.toBytes("15001000"))//如果我们想按照准确的信息查找//, new BinaryComparator(Bytes.toBytes("1500100001"))//通过RegexStringComparator的正则表达式过滤以98为结尾的内容//,new RegexStringComparator(".*02$"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

5186a1be27c34841a2566f46ff27918a.png

9f8f375e23be46229ff14d2663e76ac4.png

ab1bb63ab91a44f1b098712ecbd7d0a3.png

 

5.获取RowKey中包含15001000的所有RowKey(速度更快)

 

package com.shujia.comparator;//需求:获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;public class Code05ComparatorPrefix {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();/***相比于在RowFilter中添加 BinaryComparator(Bytes.toBytes("15001000"))* PrefixFilter 执行速度更快 效率更高*/PrefixFilter filter = new PrefixFilter(Bytes.toBytes("15001000"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

366b349d6c1f48cc843d7e4087a927a2.png

 

6.过滤列族名称以2结尾的RowKey数据

 

package com.shujia.comparator;//需求:获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.List;//需求:
//      过滤列族名称以2结尾的RowKey数据public class Code06ComparatorFamily {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();FamilyFilter filter = new FamilyFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator(".*2$"));//desc 'jan:tbl1'//添加列族 alter 'jan:tbl1',{NAME => 'info2',VERSIONS => 1}//put 'jan:tbl1','1001','info2:name','zhangsan'//put 'jan:tbl1','1002','info2:name','zhangsan'//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {List<Cell> cells = result.listCells();String rowKey = Bytes.toString(result.getRow());for (Cell cell : cells) {String family = Bytes.toString(CellUtil.cloneFamily(cell));String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowKey+","+family+","+qualifier+","+value);}}table.close();conn.close();}
}

777e78c120504c22bc4ae37f4a419678.png

81fccb2343df47c69dd2746208ca6a69.png

 

7.获取列名称以 na 开头的所有RowKey

 

package com.shujia.comparator;//需求:获取RowKey中包含15001000的所有RowKeyimport org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.List;//需求:
//      获取列名称以 na 开头的所有RowKeypublic class Code07ComparatorColumns {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("na"));//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {List<Cell> cells = result.listCells();String rowKey = Bytes.toString(result.getRow());for (Cell cell : cells) {String family = Bytes.toString(CellUtil.cloneFamily(cell));String qualifier = Bytes.toString(CellUtil.cloneQualifier(cell));String value = Bytes.toString(CellUtil.cloneValue(cell));System.out.println(rowKey+","+family+","+qualifier+","+value);}}table.close();conn.close();}
}

061408d885614570bf897193e01647a3.png

 

8.对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁

 

package com.shujia.comparator;//需求:
//      对学生表中的信息进行过滤 条件有:1.所有性别为男性 2.所有文科班 3.年龄大于23岁import com.sun.xml.internal.bind.v2.runtime.unmarshaller.XsiNilLoader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.filter.*;
import org.apache.hadoop.hbase.util.Bytes;import java.io.IOException;
import java.util.ArrayList;
import java.util.List;//需求:
//      获取列名称以 na 开头的所有RowKeypublic class Code08Comparator {public static void main(String[] args) throws IOException {Configuration conf = new Configuration();conf.set("hbase.zookeeper.quorum","node1,node2,master");Connection conn = ConnectionFactory.createConnection(conf);Table table = conn.getTable(TableName.valueOf("jan:tbl1"));Scan scan=new Scan();//1.所有性别为男性SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("gender"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("男")));//2.所有文科班SingleColumnValueFilter filter2 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("clazz"), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("文科")));//3.年龄大于23岁SingleColumnValueFilter filter3 = new SingleColumnValueFilter(Bytes.toBytes("info"), Bytes.toBytes("age"), CompareFilter.CompareOp.GREATER, new BinaryPrefixComparator(Bytes.toBytes("23")));List<Filter> filters = new ArrayList<>();filters.add(filter1);filters.add(filter2);filters.add(filter3);FilterList filter = new FilterList(filters);//设置过滤器scan.setFilter(filter);//获取扫描器对象ResultScanner scanner = table.getScanner(scan);for (Result result : scanner) {String rowKey = Bytes.toString(result.getRow());String name = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("name")));String age = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("age")));String gender = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("gender")));String clazz = Bytes.toString(result.getValue(Bytes.toBytes("info"), Bytes.toBytes("clazz")));System.out.println(rowKey+","+name+","+age+","+gender+","+clazz);}table.close();conn.close();}
}

d5cfd5c8ceb1469ba28f08696c50e95a.png

 

 

 

相关文章:

  • 手机丢失不惊慌,华为手机已升级至楼层级设备查找!
  • C++作业第四天
  • Handler通信机制
  • [论文笔记]Mixtral of Experts
  • 新版FMEA培训的应用误区是如何产生的?
  • XML解析库tinyxml2库使用详解
  • Windows系统安装Docker环境详细教程
  • Armbian OS(基于ubuntu24) 源码编译mysql 5.7
  • 路径规划 | 图解遗传(GA)算法(附ROS C++仿真)
  • 传神论文中心|第11期人工智能领域论文推荐
  • RPG Maker MZ中被你忽略的干货操作——独立开关和“开关”在事件页中的关系
  • Web前端魂斗罗:深度剖析前端技术的奇幻之旅
  • flutter实现UDP发送魔法包唤醒主机
  • 碳素钢化学成分分析 螺纹钢材质鉴定 钢材维氏硬度检测
  • 【Unity回调函数】创建自己的外部回调函数——以按钮点击为例
  • 2017年终总结、随想
  • CSS相对定位
  • ES6 ...操作符
  • Fabric架构演变之路
  • HomeBrew常规使用教程
  • JAVA_NIO系列——Channel和Buffer详解
  • JavaScript/HTML5图表开发工具JavaScript Charts v3.19.6发布【附下载】
  • JavaScript服务器推送技术之 WebSocket
  • JavaScript设计模式之工厂模式
  • JWT究竟是什么呢?
  • leetcode讲解--894. All Possible Full Binary Trees
  • node 版本过低
  • SpiderData 2019年2月23日 DApp数据排行榜
  • Vue2.0 实现互斥
  • Web设计流程优化:网页效果图设计新思路
  • 半理解系列--Promise的进化史
  • 如何利用MongoDB打造TOP榜小程序
  • 如何使用 OAuth 2.0 将 LinkedIn 集成入 iOS 应用
  • 如何优雅地使用 Sublime Text
  • 通过几道题目学习二叉搜索树
  • 微信端页面使用-webkit-box和绝对定位时,元素上移的问题
  • 微信公众号开发小记——5.python微信红包
  • 走向全栈之MongoDB的使用
  • [Shell 脚本] 备份网站文件至OSS服务(纯shell脚本无sdk) ...
  • 阿里云移动端播放器高级功能介绍
  • 曾刷新两项世界纪录,腾讯优图人脸检测算法 DSFD 正式开源 ...
  • 组复制官方翻译九、Group Replication Technical Details
  • # Pytorch 中可以直接调用的Loss Functions总结:
  • #【QT 5 调试软件后,发布相关:软件生成exe文件 + 文件打包】
  • (06)Hive——正则表达式
  • (2024,Vision-LSTM,ViL,xLSTM,ViT,ViM,双向扫描)xLSTM 作为通用视觉骨干
  • (poj1.3.2)1791(构造法模拟)
  • (附源码)springboot猪场管理系统 毕业设计 160901
  • (附源码)ssm基于jsp高校选课系统 毕业设计 291627
  • (转)德国人的记事本
  • .libPaths()设置包加载目录
  • .NET 中的轻量级线程安全
  • .NET/C# 中设置当发生某个特定异常时进入断点(不借助 Visual Studio 的纯代码实现)
  • .NET8 动态添加定时任务(CRON Expression, Whatever)
  • .Net多线程总结