momo00 2018-08-24
在组拼hbase rowkey时,为了防止热点,rowkey可以使用 md5方式或者hash方式,
这里介绍下 hash方式:
0 work_regionmax_student=10000 1 hashfun (stu.class_id, ${work_regionmax_student}) 2 hashfun : hashfun(string, int) 获取string的hashcode,根据int取余并补全位数 3 自定义函数中的写法: import org.apache.hadoop.hive.ql.exec.UDF; /** * Created by zm on 16/6/30. */ public class CreateHashId extends UDF { public String evaluate(String value, long number){ if (value == null || value.toString().equals("") || number < 1 ){ return null; } else { long result = Math.abs(value.hashCode()%number); System.out.println("result: " + result ); int formatLength = String.valueOf(number-1).length(); System.out.println("formatLength: " + formatLength ); String newString = String.format("%0" + formatLength + "d", result); // 用十进制0来填补 return newString; } } //test public static void main(String [] args) { CreateHashId testid = new CreateHashId(); //System.out.println(testid.evaluate("010f5ae14d604b729456009a4c806bc0", 10000)); /** * 结果: result: 3754 formatLength: 4 3754 */ System.out.println(testid.evaluate("01110158d9f041f6a9c75520f3e91b53", 10000)); /** 结果 result: 27 formatLength: 4 如果result的值是4位,那么 String.format("%0" + formatLength + "d", result);的操作没用 ,否则 result前面不够4位就用0填充 0027 */ /*String test = "tmp_dm_dm_evaluation_school_grade_teacher_kp_rank_distribution_teacher_kp_mastered_count_incountyrank_allsubjectlist_20170416_semester"; System.out.println(test.length());*/ } }
这里介绍下md5的使用:
import com.google.common.base.Charsets; import com.google.common.hash.Hashing; import org.apache.hadoop.hive.ql.exec.UDF; /** * 转换md5 * Created by pc on 2016/12/23. */ public class MD5 extends UDF{ public String evaluate (String userId){ return Hashing.md5().hashString(userId, Charsets.UTF_16LE).toString(); } public static void main(String[] args) { String str1="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}"; String str2="{\"actId\":\"1600005\",\"classId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"className\":\"四年级(1)班\",\"countyId\":\"370725\",\"countyName\":\"昌乐县\",\"gradeId\":\"4\",\"originCode\":\"2\",\"role\":\"STUDENT\",\"schoolId\":\"460958\",\"schoolName\":\"昌乐行知双语实验学校\",\"time\":1511059347806,\"userIcon\":\"de28f0bde3800375c3cf23fc7d03d0bc\",\"userId\":\"38085844\",\"userIp\":\"111.37.45.234\",\"userName\":\"孙豪章\",\"data\":{\"workMode\":2,\"publishClassType\":1,\"subjectName\":\"昌乐行知双语实验学校\",\"questionId\":\"tch_a285a3cb93bb4695b018b4e58e6a4ee2_v1_WT_3\",\"subjectId\":\"110\",\"workId\":\"172630abe0a949018b00fad7d80673cc\",\"publishClassId\":\"e291a2698c0a4c60a1af41d7dc40fe50\",\"unitId\":\"110002001065100001001\"}}"; Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString(); System.out.println( Hashing.md5().hashString(str1, Charsets.UTF_16LE).toString()+":"+ Hashing.md5().hashString(str2, Charsets.UTF_16LE).toString()); } }