This website requires JavaScript.

Hbase 学习笔记

创建表

create ‘表名称’,’列族名称1’,’列族名称2’,’列族名称N’
create 'users','user_id','address','info'

查看表信息

表描述

describe 'user'
查看表中的记录总数
count ‘表名称’

添加/更新记录

put ‘表名称’,’行名称’,’列名称:’,’值’
put 'users','xiaoming','info:age','24';
put 'users','xiaoming','info:birthday','1987-06-17';
put 'users','xiaoming','info:company','alibaba';
put 'users','xiaoming','address:contry','china';
put 'users','xiaoming','address:province','zhejiang';
put 'users','xiaoming','address:city','hangzhou';
put 'users','zhangyifei','info:birthday','1987-4-17';
put 'users','zhangyifei','info:favorite','movie';
put 'users','zhangyifei','info:company','alibaba';
put 'users','zhangyifei','address:contry','china';
put 'users','zhangyifei','address:province','guangdong';
put 'users','zhangyifei','address:city','jieyang';
put 'users','zhangyifei','address:town','xianqiao';

 查看记录

get ‘表名称’,’行名称’
get 'users','zhangyifei'
get 'users','zhangyifei','address:city'
查看版本信息
put 'users','xiaoming','info:age','25';
get 'users','xiaoming',{COLUMN=>'info:age',VERSIONS=>3}   --版本数有versions属性设置
查看指定时间的版本
get 'users','xiaoming',{COLUMN=>'info:age',TIMESTAMP=>1462687764590}
查看所有记录
scan ‘表名称”
查看某个表某个列中所有数据
scan ‘表名称’,{‘COLUMNS=>’列族名称:列名称’}
找到某人的值等于sku188
scan 'test1', FILTER=>"ValueFilter(=,'binary:sku188')"
查找包含188的值
scan 'test1', FILTER=>"ValueFilter(=,'substring:188')"
查找列为c2 值包含88的值
scan 'test1', FILTER=>"ColumnPrefixFilter('c2') AND ValueFilter(=,'substring:88')"
scan 'test1', FILTER=>"ColumnPrefixFilter('b1') AND ValueFilter(=,'binary:sku1')"
scan 'test1', FILTER=>"ColumnPrefixFilter('s') AND ( ValueFilter(=,'substring:123') OR ValueFilter(=,'substring:222') )"
只取第一列的第一个版本,并且只显示列名
scan 'test1', FILTER=>"FirstKeyOnlyFilter() AND ValueFilter(=,'binary:sku188') AND KeyOnlyFilter()"
找出所有user1开头的行键
scan 'test1', FILTER => "PrefixFilter ('user1')"
指定行健开始至结束
scan 'test1', {STARTROW=>'user1|ts2', FILTER => "PrefixFilter ('user1')"}
scan 'test1', {STARTROW=>'user1|ts2', STOPROW=>'user2'}

使用Java类进行查询

找到所有列包含ts3的行

import org.apache.hadoop.hbase.filter.CompareFilter import org.apache.hadoop.hbase.filter.SubstringComparator import org.apache.hadoop.hbase.filter.RowFilter scan 'test1', {FILTER => RowFilter.new(CompareFilter::CompareOp.valueOf('EQUAL'), SubstringComparator.new('ts3'))}

使用正则表达式
import org.apache.hadoop.hbase.filter.RegexStringComparator
put 'test1', 'user2|err', 'sf:s1', 'sku999'
scan 'test1', {FILTER => RowFilter.new(CompareFilter::CompareOp.valueOf('EQUAL'),RegexStringComparator.new('^user\d+\|ts\d+$'))}
比较一个列值
import org.apache.hadoop.hbase.filter.CompareFilter
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter
import org.apache.hadoop.hbase.filter.SubstringComparator
import org.apache.hadoop.hbase.util.Bytes

scan 't1', { COLUMNS => 'family:qualifier', FILTER =>
     SingleColumnValueFilter.new
         (Bytes.toBytes('family'),
          Bytes.toBytes('qualifier'),
          CompareFilter::CompareOp.valueOf('EQUAL'),
          SubstringComparator.new('somevalue'))
scan 'test1', {COLUMNS => 'sf:b1', FILTER => SingleColumnValueFilter.new(Bytes.toBytes('sf'), Bytes.toBytes('b1'), CompareFilter::CompareOp.valueOf('EQUAL'), Bytes.toBytes('sku1'))}

二进制查询

org.apache.hadoop.hbase.util.Bytes.toString("Hello HBase".to_java_bytes)
org.apache.hadoop.hbase.util.Bytes.toString("\x48\x65\x6c\x6c\x6f\x20\x48\x42\x61\x73\x65".to_java_bytes)
-- 用户userX,作为rowkey,他的各种设备(brwoser, app, pc)作为列名,所对应的cookie_id作为value (长整型变量)
--在Hbase的shell里面存储二进制值使用双引号
put 'test1', 'user1', 'lf:browser1', "\x00\x00\x00\x00\x00\x00\x00\x02"
put 'test1', 'user1', 'lf:app1', "\x00\x00\x00\x00\x00\x00\x00\x0F"
put 'test1', 'user1', 'lf:app2', "\x00\x00\x00\x00\x00\x00\x00\x10"
put 'test1', 'user2', 'lf:app1', "\x00\x00\x00\x00\x00\x00\x00\x11"
put 'test1', 'user2', 'lf:pc1', "\x00\x00\x00\x00\x00\x00\x00\x12"

scan 'test1', STOPROW=>'user2', FILTER=>"( ColumnPrefixFilter('app') AND ValueFilter(>,'binary:\x00\x00\x00\x00\x00\x00\x00\x0F') )"

scan 'test1', LIMIT => 10, FILTER=>"( ColumnPrefixFilter('app') AND ValueFilter(>,'binary:\x00\x00\x00\x00\x00\x00\x00\x0F') )"

删除记录

delete ‘表名’,’行名称’,’列名称’

删除xiaoming值的’info:age’字段

delete 'users','xiaoming','info:age'
删除整行
deleteall 'users','xiaoming'

删除表

disable ‘user’
drop ‘user’
清空表
truncate ‘users’

修改表

增加cf族

alter 'test1', NAME => 'cf'
如果存在cf族删掉
alter 'test1', NAME => 'cf', METHOD => 'delete'
alter 'test1', 'delete' => 'cf'

Hive使用Hbase表

-- 注意下面#b表示二进制形式

CREATE EXTERNAL TABLE user_app_cookie_list ( username STRING, app1_cookie_id BIGINT, app2_cookie_id BIGINT )
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key, lf:app1#b, lf:app2#b")
TBLPROPERTIES("hbase.table.name" = "test1");

--将整个族列cf 对应到hive的map ,  #s:b 为值的数据类型

CREATE EXTERNAL TABLE ts_string ( username STRING, visits map<string, int> )
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key, cf:#s:b")
TBLPROPERTIES("hbase.table.name" = "test1");

HBase Rest API

获取版本信息

curl -H "Accept: text/xml" http://quickstart.cloudera:20550/version –v curl -H "Accept: application/json" http://quickstart.cloudera:20550/version –v

查看集群使用的版本和状态,使用python的json解析工具
curl -H "Accept: application/json" http://quickstart.cloudera:20550/status/cluster | python -mjson.tool
使用postman

hbase-postman

创建一个名为tbl_rest的Hbase表格

curl -X PUT -H "Content-Type: application/json" http://quickstart.cloudera:20550/tbl_rest/schema -d '{"ColumnSchema":[{"name":"cf"}]}' –v
看一下新建的表都有那些region
curl -H "Accept: application/json" http://quickstart.cloudera:20550/tbl_rest/regions | python -mjson.tool

往里面加数据

Json添加脚本

table=$1 ##簇族 cf=$2 column=$3 key=$4 value=$5

##Hbase的API都是用base64编码存储数据的

cq_enc=echo -ne $cf:$column | base64 key_enc=echo -ne $key | base64 value_enc=echo -ne $value | base64

##编码以后放到Json字符串里面然后向Hbase写数据

data='{ "Row": [ { "key": "'$key_enc'", "Cell": [ { "column": "'$cq_enc'", "$": "'$value_enc'" } ] } ] }' echo $data curl -H "Content-Type: application/json" --data "$data" http://quickstart.cloudera:20550/$table/$key –v

执行
./hbase_put_rest_json.sh tbl_rest cf test sku_1 123
xml添加脚本
table=$1
cf=$2
column=$3
key=$4
value=$5

cq_enc=`echo -ne $cf:$column | base64`
key_enc=`echo -ne $key | base64`
value_enc=`echo -ne $value | base64`
xml='<?xml version="1.0" encoding="UTF-8" standalone="yes"?><CellSet><Row key="'$key_enc'"><Cell column="'$cq_enc'">'$value_enc'</Cell></Row></CellSet>'
echo $xml
curl -H "Content-Type: text/xml" --data "$xml" http://quickstart.cloudera:20550/$table/$key –v
执行
./hbase_put_rest_xml.sh tbl_rest cf test1 sku_2 789

取值

curl -H "Accept: application/json" http://quickstart.cloudera:20550/tbl_rest/sku_2 | python -mjson.tool
注意返回过来的值是base64编码的因此需要做下转换
echo -n "c2t1XzI=" | base64 -d
echo -n "Nzg5" | base64 –d
也可以用stream形式来取值,但是必须指定列族和列名
curl -H "Accept: application/octet-stream" http://quickstart.cloudera:20550/tbl_rest/sku_2/cf:test1

删除表格

curl -X DELETE -H "Accept: application/json" http://quickstart.cloudera:20550/tbl_rest/schema -v

参考

Integration of Hive and HBase: Hive MAP column mapped to an entire column family with binary value (byte value)

0条评论
avatar