博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
Hbase之缓存扫描加快读取速度
阅读量:6087 次
发布时间:2019-06-20

本文共 6853 字,大约阅读时间需要 22 分钟。

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.client.metrics.ScanMetrics;import java.io.IOException;/** * Created by similarface on 16/8/23. */public class ScanDataUseCache {    private static Table table=null;    public static Table getTable() {        if(table==null){            try {                Configuration configuration = HBaseConfiguration.create();                Connection connection = ConnectionFactory.createConnection(configuration);                //建立表的连接                return connection.getTable(TableName.valueOf("testtable"));            }catch (IOException e){                return table;            }        }        return table;    }    private static void scan(int caching,int batch,boolean small) {        int count=0;        //setCaching 设置的值为每次rpc的请求记录数,默认是1;cache大可以优化性能,但是太大了会花费很长的时间进行一次传输。        //setBatch 设置每次取的column size;有些row特别大,所以需要分开传给client,就是一次传一个row的几个column。        //setSmall 是否为小扫描        //setScanMetricsEnabled 使用了集合        Scan scan = new Scan().setCaching(caching).setBatch(batch).setSmall(small).setScanMetricsEnabled(true);        ResultScanner scanner=null;        try {            scanner = getTable().getScanner(scan);        }catch (IOException e){            System.out.println(e);        }        if (scanner!=null){            for (Result result:scanner){                count++;            }        scanner.close();        ScanMetrics metrics = scan.getScanMetrics();        System.out.println("Caching: " + caching + ", Batch: " + batch + ", Small: " + small + ", Results: " + count + ", RPCs: " + metrics.countOfRPCcalls);        }        else {            System.out.println("Error");        }    }    public static void main(String[] args) throws IOException {        // Caching: 1, Batch: 1, Small: false, Results: 9, RPCs: 12        scan(1, 1, false);        //Caching: 1, Batch: 0, Small: false, Results: 4, RPCs: 7        scan(1, 0, false);        // Caching: 1, Batch: 0, Small: true, Results: 4, RPCs: 0        scan(1, 0, true);        //Caching: 200, Batch: 1, Small: false, Results: 9, RPCs: 3        scan(200, 1, false);        //Caching: 200, Batch: 0, Small: false, Results: 4, RPCs: 3        scan(200, 0, false);        //Caching: 200, Batch: 0, Small: true, Results: 4, RPCs: 0        scan(200, 0, true);        // Caching: 2000, Batch: 100, Small: false, Results: 4, RPCs: 3        scan(2000, 100, false);        // Caching: 2, Batch: 100, Small: false, Results: 4, RPCs: 5        scan(2, 100, false);        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5        scan(2, 10, false);        // Caching: 2, Batch: 10, Small: false, Results: 4, RPCs: 5        scan(5, 100, false);        // Caching: 5, Batch: 100, Small: false, Results: 4, RPCs: 3        scan(5, 20, false);        // Caching: 10, Batch: 10, Small: false, Results: 4, RPCs: 3        scan(10, 10, false);    }}/** Caching: 1, Batch: 0, Small: false, Results: 5, RPCs: 8 Caching: 1, Batch: 0, Small: true, Results: 5, RPCs: 0 Caching: 200, Batch: 1, Small: false, Results: 1009, RPCs: 8 Caching: 200, Batch: 0, Small: false, Results: 5, RPCs: 3 Caching: 200, Batch: 0, Small: true, Results: 5, RPCs: 0 Caching: 2000, Batch: 100, Small: false, Results: 14, RPCs: 3 Caching: 2, Batch: 100, Small: false, Results: 14, RPCs: 10 Caching: 2, Batch: 10, Small: false, Results: 104, RPCs: 55 Caching: 5, Batch: 100, Small: false, Results: 14, RPCs: 5 Caching: 5, Batch: 20, Small: false, Results: 54, RPCs: 13 Caching: 10, Batch: 10, Small: false, Results: 104, RPCs: 13 **/

 

这是一个9行数据的表

每行包含一些列

使用缓存为6  批量为3的扫描器 

需要3个RPC

3个列装入一个Result实例

6个result到缓存中 组成一个RPC

import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.TableName;import org.apache.hadoop.hbase.client.*;import org.apache.hadoop.hbase.client.metrics.ScanMetrics;import java.io.IOException;/** * Created by similarface on 16/8/24. */public class ScanWithOffsetAndLimit {    private static Table table = null;    public static Table getTable() {        if (table == null) {            try {                Configuration configuration = HBaseConfiguration.create();                Connection connection = ConnectionFactory.createConnection(configuration);                //建立表的连接                return connection.getTable(TableName.valueOf("testtable"));            } catch (IOException e) {                return table;            }        }        return table;    }    /**     * 遍历访问数据     * @param num 运行次序     * @param caching     * @param batch     * @param offset     * @param maxResults     * @param maxResultSize     * @param dump     * @throws IOException     */    private static void scan(int num, int caching, int batch, int offset, int maxResults, int maxResultSize, boolean dump    ) throws IOException {        int count = 0;        Scan scan = new Scan().setCaching(caching).setBatch(batch)                .setRowOffsetPerColumnFamily(offset)                .setMaxResultsPerColumnFamily(maxResults)                .setMaxResultSize(maxResultSize)                .setScanMetricsEnabled(true);        ResultScanner scanner = getTable().getScanner(scan);        System.out.println("Scan #" + num + " running...");        for (Result result : scanner) {            count++;            if (dump)                System.out.println("Result [" + count + "]:" + result);        }        scanner.close();        ScanMetrics metrics = scan.getScanMetrics();        System.out.println("Caching: " + caching + ", Batch: " + batch +                ", Offset: " + offset + ", maxResults: " + maxResults +                ", maxSize: " + maxResultSize + ", Results: " + count +                ", RPCs: " + metrics.countOfRPCcalls);    }    public static void main(String[] args) throws IOException {        //偏移为0 最大2个cell 所以会扫描到列1 和列2        scan(1, 11, 0, 0, 2, -1, true);        //偏移为4 最大2个cell 所以会扫描到列5 和列6        scan(2, 11, 0, 4, 2, -1, true);        //        scan(3, 5, 0, 0, 2, -1, false);        scan(4, 11, 2, 0, 5, -1, true);        scan(5, 11, -1, -1, -1, 1, false);        scan(6, 11, -1, -1, -1, 10000, false);    }}/** Caching: 11, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 458 Caching: 11, Batch: 0, Offset: 4, maxResults: 2, maxSize: -1, Results: 1, RPCs: 3 Caching: 5, Batch: 0, Offset: 0, maxResults: 2, maxSize: -1, Results: 5005, RPCs: 1004 Caching: 11, Batch: 2, Offset: 0, maxResults: 5, maxSize: -1, Results: 5009, RPCs: 458 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 1, Results: 5005, RPCs: 11012 Caching: 11, Batch: -1, Offset: -1, maxResults: -1, maxSize: 10000, Results: 5005, RPCs: 469**/

 

转载于:https://www.cnblogs.com/similarface/p/5800509.html

你可能感兴趣的文章
完美解决PHP中文乱码
查看>>
js获取下拉,单选
查看>>
Spring源码系列 — Envoriment组件
查看>>
zw量化交易·实盘操作·系列培训班
查看>>
repeater 设置分页
查看>>
Linux基础命令一
查看>>
CSRF笔记
查看>>
关于JS的return false
查看>>
HDU - 1789 Doing Homework again 贪心
查看>>
MySQL 忘记密码怎么办?
查看>>
Linux关闭端口
查看>>
ROS在Ubuntu下的安装
查看>>
UML类图关系全面剖析
查看>>
“问吧”调查问卷的分析总结与感受
查看>>
SQL 递归查询
查看>>
《思维导图的三招十八式》读书笔记
查看>>
为什么要努力
查看>>
EBS R12应用启用过程
查看>>
Android系统架构概况
查看>>
PHP函数索引-E
查看>>