momo00 2018-08-24
在组拼hbase rowkey时,为了防止热点,rowkey可以使用 md5方式或者hash方式,
这里介绍下 hash方式:
0 work_regionmax_student=10000
1 hashfun (stu.class_id, ${work_regionmax_student})
2 hashfun : hashfun(string, int) 获取string的hashcode,根据int取余并补全位数
3 自定义函数中的写法:
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* Created by zm on 16/6/30.
*/
public class CreateHashId extends UDF {
public String evaluate(String value, long number){
if (value == null || value.toString().equals("") || number < 1 ){
return null;
} else {
long result = Math.abs(value.hashCode()%number);
System.out.println("result: " + result );
int formatLength = String.valueOf(number-1).length();
System.out.println("formatLength: " + formatLength );
String newString = String.format("%0" + formatLength + "d", result); // 用十进制0来填补
return newString;
}
}
//test
public static void main(String [] args) {
CreateHashId testid = new CreateHashId();
//System.out.println(testid.evaluate("010f5ae14d604b729456009a4c806bc0", 10000));
/**
* 结果:
result: 3754
formatLength: 4
3754
*/
System.out.println(testid.evaluate("01110158d9f041f6a9c75520f3e91b53", 10000));
/** 结果
result: 27
formatLength: 4 如果result的值是4位,那么 String.format("%0" + formatLength + "d", result);的操作没用 ,否则 result前面不够4位就用0填充
0027
*/
/*String test = "tmp_dm_dm_evaluation_school_grade_teacher_kp_rank_distribution_teacher_kp_mastered_count_incountyrank_allsubjectlist_20170416_semester";
System.out.println(test.length());*/
}
}这里介绍下md5的使用:
import com.google.common.base.Charsets;
import com.google.common.hash.Hashing;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* 转换md5
* Created by pc on 2016/12/23.
*/
public class MD5 extends UDF{
public String evaluate (String userId){
return Hashing.md5().hashString(userId, Charsets.UTF_16LE).toString();
}
public static void main(String[] args) {
String str1="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}";
String str2="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}";
Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString();
System.out.println( Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString()+":"+ Hashing.md5().hashString(str2, Charsets.UTF_16LE).toString());
}
}