Solrj JavaBinCodec分析

tyl 2013-12-03

solr请求回来的数据为字节流,solrj通过JavaBinCodec对其进行解析。

一、JavaBinCodec的主要标识tag

各tag的定义:

public static final byte
          NULL = 0,
          BOOL_TRUE = 1,
          BOOL_FALSE = 2,
          BYTE = 3,
          SHORT = 4,
          DOUBLE = 5,
          INT = 6,
          LONG = 7,
          FLOAT = 8,
          DATE = 9,
          MAP = 10,
          SOLRDOC = 11,  //SolrDocument对象,会先读取SortedMap,然后setField到SolrDocument中
          SOLRDOCLST = 12,  //SolrDocumentList对象,会先读一个长度为3的list即arr,并设置numFound,start,maxScore3个值,然后再读一个list,即doc的list
          BYTEARR = 13,
          ITERATOR = 14,
          /**
           * this is a special tag signals an end. No value is associated with it
           */
          END = 15,

          SOLRINPUTDOC = 16,
          SOLRINPUTDOC_CHILDS = 17,
          ENUM_FIELD_VALUE = 18,
          // types that combine tag + length (or other info) in a single byte
//这些tag除了表示tag外还包括了其子元素的个数或其他信息
          TAG_AND_LEN = (byte) (1 << 5),
          STR = (byte) (1 << 5), //表示字符串以及长度,tag范围32~63,&0x1f后得到的就是字符串长度
          SINT = (byte) (2 << 5), //表示small int以及长度,tag范围32~95,&0x1f后得到的就是sint的长度
          SLONG = (byte) (3 << 5), //表示small long以及长度,tag范围96~111, &0x0f后得到的就是slong的长度
          ARR = (byte) (4 << 5), //表示数组以及数组元素个数, tag范围-113~-128, &0x0f后得到的就是数组元素个数
          ORDERED_MAP = (byte) (5 << 5), // SimpleOrderedMap (a NamedList subclass, and more common)。Key-->Value,Key是先读EXTERN_STRING,再读Str即真正的key值。范围-65~-96。
          NAMED_LST = (byte) (6 << 5), // NamedList,范围-33~-64
          EXTERN_STRING = (byte) (7 << 5);//map中的keyvalue的key时会用此tag。 范围-32

二、JavaBinCodec主要方法

1、最主要的方法:readVal()
public Object readVal(DataInputInputStream dis) throws IOException {
    tagByte = dis.readByte();

    // if ((tagByte & 0xe0) == 0) {
    // if top 3 bits are clear, this is a normal tag

    // OK, try type + size in single byte
    switch (tagByte >>> 5) {
      case STR >>> 5:
        return readStr(dis);
      case SINT >>> 5:
        return readSmallInt(dis);
      case SLONG >>> 5:
        return readSmallLong(dis);
      case ARR >>> 5:
        return readArray(dis);
      case ORDERED_MAP >>> 5:
        return readOrderedMap(dis);
      case NAMED_LST >>> 5:
        return readNamedList(dis);
      case EXTERN_STRING >>> 5:
        return readExternString(dis);
    }

    switch (tagByte) {
      case NULL:
        return null;
      case DATE:
        return new Date(dis.readLong());
      case INT:
        return dis.readInt();
      case BOOL_TRUE:
        return Boolean.TRUE;
      case BOOL_FALSE:
        return Boolean.FALSE;
      case FLOAT:
        return dis.readFloat();
      case DOUBLE:
        return dis.readDouble();
      case LONG:
        return dis.readLong();
      case BYTE:
        return dis.readByte();
      case SHORT:
        return dis.readShort();
      case MAP:
        return readMap(dis);
      case SOLRDOC:
        return readSolrDocument(dis);
      case SOLRDOCLST:
        return readSolrDocumentList(dis);
      case BYTEARR:
        return readByteArray(dis);
      case ITERATOR:
        return readIterator(dis);
      case END:
        return END_OBJ;
      case SOLRINPUTDOC:
        return readSolrInputDocument(dis);
      case ENUM_FIELD_VALUE:
        return readEnumFieldValue(dis);
    }

    throw new RuntimeException("Unknown type " + tagByte);
  }

2、readSolrDocumentList:读SolrDocumentList
//先得到numFound,start,maxScore三个属性的值,存于List中,设置到solrdocumentlist对象中,再找其包含的doclist
  public SolrDocumentList readSolrDocumentList(DataInputInputStream dis) throws IOException {
    SolrDocumentList solrDocs = new SolrDocumentList();
    List list = (List) readVal(dis);
    solrDocs.setNumFound((Long) list.get(0));
    solrDocs.setStart((Long) list.get(1));
    solrDocs.setMaxScore((Float) list.get(2));

    @SuppressWarnings("unchecked")
    List<SolrDocument> l = (List<SolrDocument>) readVal(dis);
    solrDocs.addAll(l);
    return solrDocs;
  }
3、readSolrDocument:读一个doc
//先获得NameList,再放到doc中
public SolrDocument readSolrDocument(DataInputInputStream dis) throws IOException {
    NamedList nl = (NamedList) readVal(dis);//结果其实是OrderedMap,会被转成NamedList
    SolrDocument doc = new SolrDocument();
    for (int i = 0; i < nl.size(); i++) {
      String name = nl.getName(i);
      Object val = nl.getVal(i);
      doc.setField(name, val);
    }
    return doc;
  }
4、readOrderedMap:读map
//先key后value,key时有EXTERN_STRING的Tag,再是Str_tag
public SimpleOrderedMap<Object> readOrderedMap(DataInputInputStream dis) throws IOException {
    int sz = readSize(dis);
    SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
    for (int i = 0; i < sz; i++) {
      String name = (String) readVal(dis);
      Object val = readVal(dis);//读完key后会读value
      nl.add(name, val);
    }
    return nl;
  }
5、readArray:读数组list
//
public List<Object> readArray(DataInputInputStream dis) throws IOException {
    int sz = readSize(dis);
    ArrayList<Object> l = new ArrayList<Object>(sz);
    for (int i = 0; i < sz; i++) {
      l.add(readVal(dis));
    }
    return l;
  }
6、readSize:将tag &0x1f获得大小
public int readSize(DataInputInputStream in) throws IOException {
    int sz = tagByte & 0x1f;  //即如果tagByte<31  (0x1f是31), 则tag还表示个数。
    if (sz == 0x1f) sz += readVInt(in);   //如果太大,则下一个内容就是大小
    return sz;
  }
7、readSmallInt: &0x0f得到长度
public int readSmallInt(DataInputInputStream dis) throws IOException {
    int v = tagByte & 0x0F;
    if ((tagByte & 0x10) != 0)
      v = (readVInt(dis) << 4) | v;
    return v;
  }
8、readExternString
//
public String readExternString(DataInputInputStream fis) throws IOException {
    int idx = readSize(fis);
    if (idx != 0) {// idx != 0 is the index of the extern string  字符串索引
      return stringsList.get(idx - 1);
    } else {// idx == 0 means it has a string value  即后面是字符串
      String s = (String) readVal(fis);
      if (stringsList == null) stringsList = new ArrayList<String>();
      stringsList.add(s);
      return s;
    }
  }

三、举例

返回的byte:
2, -94, -32, 46, 114, 101, 115, 112, 111, 110, 115, 101, 72, 101, 97, 100, 101, 114, -93, -32, 38, 115, 116, 97, 116, 117, 115, 6, 0, 0, 0, 0, -32, 37, 81, 84, 105, 109, 101, 6, 0, 0, 0, 0, -32, 38, 112, 97, 114, 97, 109, 115, -93, -32, 33, 113, 36, 105, 100, 58, 49, -32, 34, 119, 116, 39, 106, 97, 118, 97, 98, 105, 110, -32, 39, 118, 101, 114, 115, 105, 111, 110, 33, 50, -32, 40, 114, 101, 115, 112, 111, 110, 115, 101, 12, -125, 97, 96, 0, -127, 11, -93, -32, 34, 105, 100, 33, 49, -32, 36, 110, 97, 109, 101, 33, 49, -32, 41, 95, 118, 101, 114, 115, 105, 111, 110, 95, 7, 20, 43, 47, -61, -44, 64, 0, 0

解析完的结果:
{responseHeader={status=0,QTime=0,params={q=id:1,wt=javabin,version=2}},response={numFound=1,start=0,docs=[SolrDocument{id=1, name=1, _version_=1453307822883209216}]}}

解析过程分析:
版本,
ORDERED_MAP Tag(read size: -94&0x1f结果为2,即有两个keyvalue),

//第一个keyvalue
EXTERN_STRING Tag,
 字符串长度(46&0x1f=14),
 responseHeader,
ORDERED_MAP Tag(read size: -93&0x1f结果为3),
EXTERN_STRING Tag,
 Str tag(32是str的tag)字符串长度(38&0x1f=6),
 status,
下个值是int型(6表示int)(ordermap的key读取完后就会读value),
0(4byte),
EXTERN_STRING Tag,
 字符串长度(37&0x1f=5),
 QTime,
下个值是int型(6表示int)(ordermap的key读取完后就会读value),
0(4byte),
  EXTERN_STRING Tag,
 字符串长度(38&0x1f=6),
 params,
ORDERED_MAP Tag(read size: -94&0x1f结果为3),
EXTERN_STRING Tag,
 字符串长度(33&0x1f=1),
 q,
STR TAG(下个值是string)(36&0x1f=4,即有4个字符),
id:1,
 EXTERN_STRING Tag,
 字符串长度(34&0x1f=2),
 wt,
STR TAG(下个值是string)(39&0x1f=7,即有7个字符),
javabin,
 EXTERN_STRING Tag,
 字符串长度(39&0x1f=7),
 version,
STR TAG(下个值是string)(33&0x1f=1,即有1个字符),
2,
 
//第二个keyvalue
EXTERN_STRING Tag,
 字符串长度(40&0x1f=8),
 response,
SOLRDOCLST TAG(12是doclist,即SolrDocumentList对象,有属性numFound,start,maxScore,以及自身是ArrayList<SolrDocument>),
ARRAY TAG(-128是arr tag)(-125&0x1f=3,
即有3个属性)(后面的三个属性分别是numFound,start,maxScore,详见readSolrDocumentList),
SLONG Tag(96是smalllong tag)(97&0x0f=1)结果是1,
SLONG Tag(96&0x0f=0)结果是0,
NULL Tag(0表示null),
 ARRAY Tag(-127&0x1f=1,即有一个元素)(该产生的list会被前面的solrdocumentlist.addAll(list)),
SOLRDOC Tag(11是solrdoc tag),
ORDERED_MAP Tag(read size: -93&0x1f结果为3),
EXTERN_STRING Tag,
Str Tag字符串长度(34&0x1f=2),
id,
Str Tag字符串长度(33&0x1f=1),
1,
EXTERN_STRING Tag,
Str Tag字符串长度(36&0x1f=4),
name,
Str Tag字符串长度(33&0x1f=1),
1,
EXTERN_STRING Tag,
Str Tag字符串长度(41&0x1f=9),
_version_,
Long Tag(7是Long Tag),
1453307822883209216

注:

(1453307822883209216=1010000101011001011111100001111010100010000000000000000000000)(20=00010100,43=00101011,47=00101111,-61=11000011,-44=11010100,64=01000000,0=00000000,0=00000000)

相关推荐